# Fast implementation of unique elements in an array - up to 30x faster than NumPy
## pip install cythonunique
### Tested against Windows / Python 3.11 / Anaconda
## Cython (and a C/C++ compiler) must be installed to use the optimized Cython implementation.
```python
import timeit
import numpy as np
from cythonunique import fast_unique
def generate_random_arrays(shape, dtype='float64', low=0, high=1):
return np.random.uniform(low, high, size=shape).astype(dtype)
def fast_unique_ordered(a):
return fast_unique(a, accept_not_ordered=False)
def fast_unique_not_ordered(a):
return fast_unique(a, accept_not_ordered=True, uint64limit=4294967296)
size = 10000000
low = 0
high = 100000000
arras = [
(size, 'float32', low, high),
(size, 'float64', low, high),
(size, np.uint8, low, high),
(size, np.int8, low, high),
(size, np.int16, low, high),
(size, np.int32, low, high),
(size, np.int64, low, high),
(size, np.uint16, low, high),
(size, np.uint32, low, high),
(size, np.uint64, low, high),
]
reps = 5
print('Ordered --------------------------')
for a in arras:
arr = generate_random_arrays(*a)
s = """u=fast_unique_ordered(arr)"""
t1 = timeit.timeit(s, globals=globals(), number=reps) / reps
print('c++ ', t1)
s = """u=np.unique(arr)"""
t2 = timeit.timeit(s, globals=globals(), number=reps) / reps
print('np ', t2)
u = fast_unique_ordered(arr)
q = np.unique(arr)
print(np.all(u == q))
print('-------------------------')
print('Unordered --------------------------') # Falls back to Ordered if dtype is float or np.min(a)<0
for a in arras:
arr = generate_random_arrays(*a)
s = """u=fast_unique_not_ordered(arr)"""
t1 = timeit.timeit(s, globals=globals(), number=reps) / reps
print('c++ ', t1)
s = """u=np.unique(arr)"""
t2 = timeit.timeit(s, globals=globals(), number=reps) / reps
print('np ', t2)
u = fast_unique_not_ordered(arr)
q = np.unique(arr)
print(np.all(np.sort(u) == q))
print('-------------------------')
# Ordered --------------------------
# c++ 0.10320082000107504
# np 0.13888095999718644
# True
# -------------------------
# c++ 0.10645331999985501
# np 0.14625759999908042
# True
# -------------------------
# c++ 0.03644101999816485
# np 0.0833885799976997
# True
# -------------------------
# c++ 0.03784457999863662
# np 0.08405877999903169
# True
# -------------------------
# c++ 0.03909369999892078
# np 0.09831685999815817
# True
# -------------------------
# c++ 0.045269479998387395
# np 0.0970024200010812
# True
# -------------------------
# c++ 0.06357002000149806
# np 0.12426133999833837
# True
# -------------------------
# c++ 0.04224961999861989
# np 0.09802825999795459
# True
# -------------------------
# c++ 0.046695440000621605
# np 0.10013775999832433
# True
# -------------------------
# c++ 0.06854987999831792
# np 0.1277739599987399
# True
# -------------------------
# Unordered --------------------------
# c++ 0.10427475999749732
# np 0.13533045999938623
# True
# -------------------------
# c++ 0.1188001600006828
# np 0.14714665999927093
# True
# -------------------------
# c++ 0.011010520000127144
# np 0.2836028199992143
# True
# -------------------------
# c++ 0.03693970000022091
# np 0.08278198000043631
# True
# -------------------------
# c++ 0.021734919998561964
# np 0.29412690000026487
# True
# -------------------------
# c++ 0.02548580000002403
# np 0.29879269999801183
# True
# -------------------------
# c++ 0.030021439999109133
# np 0.31899350000021515
# True
# -------------------------
# c++ 0.012441499999840743
# np 0.28925163999956566
# True
# -------------------------
# c++ 0.015460380000877193
# np 0.2964318199985428
# True
# -------------------------
# c++ 0.026127819999237543
# np 0.31972092000069097
# True
# -------------------------
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/cythonunique",
"name": "cythonunique",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "unique,find",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/63/a9/ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f/cythonunique-0.11.tar.gz",
"platform": null,
"description": "\r\n# Fast implementation of unique elements in an array - up to 30x faster than NumPy\r\n\r\n## pip install cythonunique\r\n\r\n### Tested against Windows / Python 3.11 / Anaconda\r\n\r\n## Cython (and a C/C++ compiler) must be installed to use the optimized Cython implementation.\r\n\r\n\r\n```python\r\nimport timeit\r\nimport numpy as np\r\n\r\nfrom cythonunique import fast_unique\r\n\r\n\r\ndef generate_random_arrays(shape, dtype='float64', low=0, high=1):\r\n return np.random.uniform(low, high, size=shape).astype(dtype)\r\n\r\n\r\ndef fast_unique_ordered(a):\r\n return fast_unique(a, accept_not_ordered=False)\r\n\r\n\r\ndef fast_unique_not_ordered(a):\r\n return fast_unique(a, accept_not_ordered=True, uint64limit=4294967296)\r\n\r\n\r\nsize = 10000000\r\nlow = 0\r\nhigh = 100000000\r\narras = [\r\n (size, 'float32', low, high),\r\n (size, 'float64', low, high),\r\n (size, np.uint8, low, high),\r\n (size, np.int8, low, high),\r\n (size, np.int16, low, high),\r\n (size, np.int32, low, high),\r\n (size, np.int64, low, high),\r\n (size, np.uint16, low, high),\r\n (size, np.uint32, low, high),\r\n (size, np.uint64, low, high),\r\n]\r\nreps = 5\r\nprint('Ordered --------------------------')\r\n\r\nfor a in arras:\r\n arr = generate_random_arrays(*a)\r\n s = \"\"\"u=fast_unique_ordered(arr)\"\"\"\r\n t1 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n print('c++ ', t1)\r\n\r\n s = \"\"\"u=np.unique(arr)\"\"\"\r\n t2 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n print('np ', t2)\r\n u = fast_unique_ordered(arr)\r\n q = np.unique(arr)\r\n print(np.all(u == q))\r\n print('-------------------------')\r\n\r\nprint('Unordered --------------------------') # Falls back to Ordered if dtype is float or np.min(a)<0\r\nfor a in arras:\r\n arr = generate_random_arrays(*a)\r\n s = \"\"\"u=fast_unique_not_ordered(arr)\"\"\"\r\n t1 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n print('c++ ', t1)\r\n\r\n s = \"\"\"u=np.unique(arr)\"\"\"\r\n t2 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n print('np ', t2)\r\n u = fast_unique_not_ordered(arr)\r\n q = np.unique(arr)\r\n print(np.all(np.sort(u) == q))\r\n print('-------------------------')\r\n\r\n# Ordered --------------------------\r\n# c++ 0.10320082000107504\r\n# np 0.13888095999718644\r\n# True\r\n# -------------------------\r\n# c++ 0.10645331999985501\r\n# np 0.14625759999908042\r\n# True\r\n# -------------------------\r\n# c++ 0.03644101999816485\r\n# np 0.0833885799976997\r\n# True\r\n# -------------------------\r\n# c++ 0.03784457999863662\r\n# np 0.08405877999903169\r\n# True\r\n# -------------------------\r\n# c++ 0.03909369999892078\r\n# np 0.09831685999815817\r\n# True\r\n# -------------------------\r\n# c++ 0.045269479998387395\r\n# np 0.0970024200010812\r\n# True\r\n# -------------------------\r\n# c++ 0.06357002000149806\r\n# np 0.12426133999833837\r\n# True\r\n# -------------------------\r\n# c++ 0.04224961999861989\r\n# np 0.09802825999795459\r\n# True\r\n# -------------------------\r\n# c++ 0.046695440000621605\r\n# np 0.10013775999832433\r\n# True\r\n# -------------------------\r\n# c++ 0.06854987999831792\r\n# np 0.1277739599987399\r\n# True\r\n# -------------------------\r\n# Unordered --------------------------\r\n# c++ 0.10427475999749732\r\n# np 0.13533045999938623\r\n# True\r\n# -------------------------\r\n# c++ 0.1188001600006828\r\n# np 0.14714665999927093\r\n# True\r\n# -------------------------\r\n# c++ 0.011010520000127144\r\n# np 0.2836028199992143\r\n# True\r\n# -------------------------\r\n# c++ 0.03693970000022091\r\n# np 0.08278198000043631\r\n# True\r\n# -------------------------\r\n# c++ 0.021734919998561964\r\n# np 0.29412690000026487\r\n# True\r\n# -------------------------\r\n# c++ 0.02548580000002403\r\n# np 0.29879269999801183\r\n# True\r\n# -------------------------\r\n# c++ 0.030021439999109133\r\n# np 0.31899350000021515\r\n# True\r\n# -------------------------\r\n# c++ 0.012441499999840743\r\n# np 0.28925163999956566\r\n# True\r\n# -------------------------\r\n# c++ 0.015460380000877193\r\n# np 0.2964318199985428\r\n# True\r\n# -------------------------\r\n# c++ 0.026127819999237543\r\n# np 0.31972092000069097\r\n# True\r\n# -------------------------\r\n\r\n\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Fast implementation of unique elements in an array - up to 30x faster than NumPy",
"version": "0.11",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/cythonunique"
},
"split_keywords": [
"unique",
"find"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "53847ed2c81c4a9c0d7c69033b5941a43f387c00ed795e3e4265a50859ddb773",
"md5": "4a10cbb1edb730b25ca830aa70e76515",
"sha256": "7c5634ded1225c091c3a80eb49aee88391d22ad9d76c0b52c984210637679308"
},
"downloads": -1,
"filename": "cythonunique-0.11-py3-none-any.whl",
"has_sig": false,
"md5_digest": "4a10cbb1edb730b25ca830aa70e76515",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 23168,
"upload_time": "2023-12-01T09:06:43",
"upload_time_iso_8601": "2023-12-01T09:06:43.139733Z",
"url": "https://files.pythonhosted.org/packages/53/84/7ed2c81c4a9c0d7c69033b5941a43f387c00ed795e3e4265a50859ddb773/cythonunique-0.11-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "63a9ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f",
"md5": "4d2afeb4d861dd5d251eeee163012610",
"sha256": "53a4414ffb0904739b6869c3ea1ef8ed7c8ed0d54647f99ed4af4a1958dba6c3"
},
"downloads": -1,
"filename": "cythonunique-0.11.tar.gz",
"has_sig": false,
"md5_digest": "4d2afeb4d861dd5d251eeee163012610",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 23393,
"upload_time": "2023-12-01T09:06:45",
"upload_time_iso_8601": "2023-12-01T09:06:45.118976Z",
"url": "https://files.pythonhosted.org/packages/63/a9/ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f/cythonunique-0.11.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-12-01 09:06:45",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "cythonunique",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "cythonunique"
}