# Parallel argsort with Cython - 2.5x faster than NumPy!
## pip install cythonparallelargsort
### Tested against Windows / Python 3.11 / Anaconda
## Cython (and a C/C++ compiler) must be installed
A parallel argsort function for efficiently sorting numpy arrays in parallel.
It utilizes Cython to generate optimized C++ code, taking advantage of OpenMP for parallelism.
```python
from cythonparallelargsort import parallel_argsort
import pandas as pd
import numpy as np
df = pd.read_csv(
"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv"
)
df=pd.concat([df for _ in range(1000)],ignore_index=True)
df=df.sample(len(df))
indicopy=df.index.to_numpy().copy()
p1=parallel_argsort(indicopy,'parallel_buffered',)
p2=parallel_argsort(indicopy,'parallel',)
p3=parallel_argsort(indicopy,'sort')
p4=np.argsort(indicopy)
# p1
# Out[3]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)
# p2
# Out[4]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)
# p3
# Out[5]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)
# p4
# Out[6]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)
# df.index.shape
# Out[8]: (89100,)
# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)
# %timeit p2=parallel_argsort(indicopy,'parallel',)
# %timeit p3=parallel_argsort(indicopy,'sort')
# %timeit p4=np.argsort(indicopy)
# 3.18 ms ± 622 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 2.62 ms ± 38.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 6.38 ms ± 54.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 5.13 ms ± 99.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# df.index.shape
# (891000,)
# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)
# %timeit p2=parallel_argsort(indicopy,'parallel',)
# %timeit p3=parallel_argsort(indicopy,'sort')
# %timeit p4=np.argsort(indicopy)
# 28.4 ms ± 2.42 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
# 29 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
# 90.9 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
# 73.3 ms ± 910 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# df.index.shape
# Out[4]: (8910000,)
# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)
# %timeit p2=parallel_argsort(indicopy,'parallel',)
# %timeit p3=parallel_argsort(indicopy,'sort')
# %timeit p4=np.argsort(indicopy)
# 586 ms ± 24.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 604 ms ± 18.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 1.45 s ± 20.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 1.34 s ± 13.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# df.index.shape
# Out[4]: (89100000,)
# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)
# %timeit p2=parallel_argsort(indicopy,'parallel',)
# %timeit p3=parallel_argsort(indicopy,'sort')
# %timeit p4=np.argsort(indicopy)
# 10.1 s ± 97.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 10.5 s ± 45.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 20.9 s ± 82.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# 24.5 s ± 70.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/cythonparallelargsort",
"name": "cythonparallelargsort",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "cython,arrays,argsort,numpy",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/34/84/567eb4d5fbeb4409aa7e029b334e6109f128837aa62737f2f943853194cd/cythonparallelargsort-0.10.tar.gz",
"platform": null,
"description": "\r\n# Parallel argsort with Cython - 2.5x faster than NumPy!\r\n\r\n## pip install cythonparallelargsort \r\n\r\n### Tested against Windows / Python 3.11 / Anaconda\r\n\r\n## Cython (and a C/C++ compiler) must be installed\r\n\r\nA parallel argsort function for efficiently sorting numpy arrays in parallel.\r\nIt utilizes Cython to generate optimized C++ code, taking advantage of OpenMP for parallelism.\r\n\r\n```python\r\nfrom cythonparallelargsort import parallel_argsort\r\nimport pandas as pd\r\nimport numpy as np\r\ndf = pd.read_csv(\r\n \"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv\"\r\n)\r\ndf=pd.concat([df for _ in range(1000)],ignore_index=True)\r\ndf=df.sample(len(df))\r\nindicopy=df.index.to_numpy().copy()\r\np1=parallel_argsort(indicopy,'parallel_buffered',)\r\np2=parallel_argsort(indicopy,'parallel',)\r\np3=parallel_argsort(indicopy,'sort')\r\np4=np.argsort(indicopy)\r\n\r\n# p1\r\n# Out[3]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)\r\n# p2\r\n# Out[4]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)\r\n# p3\r\n# Out[5]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)\r\n# p4\r\n# Out[6]: array([125054, 85353, 788878, ..., 46414, 789033, 786844], dtype=int64)\r\n\r\n\r\n# df.index.shape\r\n# Out[8]: (89100,)\r\n# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)\r\n# %timeit p2=parallel_argsort(indicopy,'parallel',)\r\n# %timeit p3=parallel_argsort(indicopy,'sort')\r\n# %timeit p4=np.argsort(indicopy)\r\n# 3.18 ms \u00b1 622 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 100 loops each)\r\n# 2.62 ms \u00b1 38.8 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 100 loops each)\r\n# 6.38 ms \u00b1 54.3 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 100 loops each)\r\n# 5.13 ms \u00b1 99.1 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 100 loops each)\r\n\r\n# df.index.shape\r\n# (891000,)\r\n# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)\r\n# %timeit p2=parallel_argsort(indicopy,'parallel',)\r\n# %timeit p3=parallel_argsort(indicopy,'sort')\r\n# %timeit p4=np.argsort(indicopy)\r\n# 28.4 ms \u00b1 2.42 ms per loop (mean \u00b1 std. dev. of 7 runs, 10 loops each)\r\n# 29 ms \u00b1 1.04 ms per loop (mean \u00b1 std. dev. of 7 runs, 10 loops each)\r\n# 90.9 ms \u00b1 1.1 ms per loop (mean \u00b1 std. dev. of 7 runs, 10 loops each)\r\n# 73.3 ms \u00b1 910 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 10 loops each)\r\n\r\n# df.index.shape\r\n# Out[4]: (8910000,)\r\n# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)\r\n# %timeit p2=parallel_argsort(indicopy,'parallel',)\r\n# %timeit p3=parallel_argsort(indicopy,'sort')\r\n# %timeit p4=np.argsort(indicopy)\r\n# 586 ms \u00b1 24.3 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 604 ms \u00b1 18.4 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 1.45 s \u00b1 20.2 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 1.34 s \u00b1 13.6 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n\r\n# df.index.shape\r\n# Out[4]: (89100000,)\r\n# %timeit p1=parallel_argsort(indicopy,'parallel_buffered',)\r\n# %timeit p2=parallel_argsort(indicopy,'parallel',)\r\n# %timeit p3=parallel_argsort(indicopy,'sort')\r\n# %timeit p4=np.argsort(indicopy)\r\n# 10.1 s \u00b1 97.9 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 10.5 s \u00b1 45.9 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 20.9 s \u00b1 82.1 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n# 24.5 s \u00b1 70.9 ms per loop (mean \u00b1 std. dev. of 7 runs, 1 loop each)\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Parallel argsort with Cython - 2.5x faster than NumPy!",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/cythonparallelargsort"
},
"split_keywords": [
"cython",
"arrays",
"argsort",
"numpy"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "bf1d0e587d6a0cd5da64eb06612dcd242a6cee6973e8c06b187c11dde5725c58",
"md5": "fdf36142fe6bfc4ccf012bd34485eee0",
"sha256": "71ec76f596f4e809d0cf45cc7c8c62af947c6d3abf75f905952e69cdafb323a7"
},
"downloads": -1,
"filename": "cythonparallelargsort-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "fdf36142fe6bfc4ccf012bd34485eee0",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 22816,
"upload_time": "2023-12-10T02:14:18",
"upload_time_iso_8601": "2023-12-10T02:14:18.550158Z",
"url": "https://files.pythonhosted.org/packages/bf/1d/0e587d6a0cd5da64eb06612dcd242a6cee6973e8c06b187c11dde5725c58/cythonparallelargsort-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "3484567eb4d5fbeb4409aa7e029b334e6109f128837aa62737f2f943853194cd",
"md5": "4b2bc200acb33ad31a2aa69828948f2d",
"sha256": "7ec263b96e07d2db492c707a845b9ca04030a87463a1efdb4aac45ae56290fd9"
},
"downloads": -1,
"filename": "cythonparallelargsort-0.10.tar.gz",
"has_sig": false,
"md5_digest": "4b2bc200acb33ad31a2aa69828948f2d",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 22599,
"upload_time": "2023-12-10T02:14:20",
"upload_time_iso_8601": "2023-12-10T02:14:20.524587Z",
"url": "https://files.pythonhosted.org/packages/34/84/567eb4d5fbeb4409aa7e029b334e6109f128837aa62737f2f943853194cd/cythonparallelargsort-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-12-10 02:14:20",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "cythonparallelargsort",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "cythonparallelargsort"
}