cythonunique


Namecythonunique JSON
Version 0.11 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/cythonunique
SummaryFast implementation of unique elements in an array - up to 30x faster than NumPy
upload_time2023-12-01 09:06:45
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords unique find
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
# Fast implementation of unique elements in an array - up to 30x faster than NumPy

## pip install cythonunique

### Tested against Windows / Python 3.11 / Anaconda

## Cython (and a C/C++ compiler) must be installed to use the optimized Cython implementation.


```python
import timeit
import numpy as np

from cythonunique import fast_unique


def generate_random_arrays(shape, dtype='float64', low=0, high=1):
    return np.random.uniform(low, high, size=shape).astype(dtype)


def fast_unique_ordered(a):
    return fast_unique(a, accept_not_ordered=False)


def fast_unique_not_ordered(a):
    return fast_unique(a, accept_not_ordered=True, uint64limit=4294967296)


size = 10000000
low = 0
high = 100000000
arras = [
    (size, 'float32', low, high),
    (size, 'float64', low, high),
    (size, np.uint8, low, high),
    (size, np.int8, low, high),
    (size, np.int16, low, high),
    (size, np.int32, low, high),
    (size, np.int64, low, high),
    (size, np.uint16, low, high),
    (size, np.uint32, low, high),
    (size, np.uint64, low, high),
]
reps = 5
print('Ordered --------------------------')

for a in arras:
    arr = generate_random_arrays(*a)
    s = """u=fast_unique_ordered(arr)"""
    t1 = timeit.timeit(s, globals=globals(), number=reps) / reps
    print('c++ ', t1)

    s = """u=np.unique(arr)"""
    t2 = timeit.timeit(s, globals=globals(), number=reps) / reps
    print('np ', t2)
    u = fast_unique_ordered(arr)
    q = np.unique(arr)
    print(np.all(u == q))
    print('-------------------------')

print('Unordered --------------------------') # Falls back to Ordered if dtype is float or np.min(a)<0
for a in arras:
    arr = generate_random_arrays(*a)
    s = """u=fast_unique_not_ordered(arr)"""
    t1 = timeit.timeit(s, globals=globals(), number=reps) / reps
    print('c++ ', t1)

    s = """u=np.unique(arr)"""
    t2 = timeit.timeit(s, globals=globals(), number=reps) / reps
    print('np ', t2)
    u = fast_unique_not_ordered(arr)
    q = np.unique(arr)
    print(np.all(np.sort(u) == q))
    print('-------------------------')

# Ordered --------------------------
# c++  0.10320082000107504
# np  0.13888095999718644
# True
# -------------------------
# c++  0.10645331999985501
# np  0.14625759999908042
# True
# -------------------------
# c++  0.03644101999816485
# np  0.0833885799976997
# True
# -------------------------
# c++  0.03784457999863662
# np  0.08405877999903169
# True
# -------------------------
# c++  0.03909369999892078
# np  0.09831685999815817
# True
# -------------------------
# c++  0.045269479998387395
# np  0.0970024200010812
# True
# -------------------------
# c++  0.06357002000149806
# np  0.12426133999833837
# True
# -------------------------
# c++  0.04224961999861989
# np  0.09802825999795459
# True
# -------------------------
# c++  0.046695440000621605
# np  0.10013775999832433
# True
# -------------------------
# c++  0.06854987999831792
# np  0.1277739599987399
# True
# -------------------------
# Unordered --------------------------
# c++  0.10427475999749732
# np  0.13533045999938623
# True
# -------------------------
# c++  0.1188001600006828
# np  0.14714665999927093
# True
# -------------------------
# c++  0.011010520000127144
# np  0.2836028199992143
# True
# -------------------------
# c++  0.03693970000022091
# np  0.08278198000043631
# True
# -------------------------
# c++  0.021734919998561964
# np  0.29412690000026487
# True
# -------------------------
# c++  0.02548580000002403
# np  0.29879269999801183
# True
# -------------------------
# c++  0.030021439999109133
# np  0.31899350000021515
# True
# -------------------------
# c++  0.012441499999840743
# np  0.28925163999956566
# True
# -------------------------
# c++  0.015460380000877193
# np  0.2964318199985428
# True
# -------------------------
# c++  0.026127819999237543
# np  0.31972092000069097
# True
# -------------------------


```

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/cythonunique",
    "name": "cythonunique",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "unique,find",
    "author": "Johannes Fischer",
    "author_email": "aulasparticularesdealemaosp@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/63/a9/ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f/cythonunique-0.11.tar.gz",
    "platform": null,
    "description": "\r\n# Fast implementation of unique elements in an array - up to 30x faster than NumPy\r\n\r\n## pip install cythonunique\r\n\r\n### Tested against Windows / Python 3.11 / Anaconda\r\n\r\n## Cython (and a C/C++ compiler) must be installed to use the optimized Cython implementation.\r\n\r\n\r\n```python\r\nimport timeit\r\nimport numpy as np\r\n\r\nfrom cythonunique import fast_unique\r\n\r\n\r\ndef generate_random_arrays(shape, dtype='float64', low=0, high=1):\r\n    return np.random.uniform(low, high, size=shape).astype(dtype)\r\n\r\n\r\ndef fast_unique_ordered(a):\r\n    return fast_unique(a, accept_not_ordered=False)\r\n\r\n\r\ndef fast_unique_not_ordered(a):\r\n    return fast_unique(a, accept_not_ordered=True, uint64limit=4294967296)\r\n\r\n\r\nsize = 10000000\r\nlow = 0\r\nhigh = 100000000\r\narras = [\r\n    (size, 'float32', low, high),\r\n    (size, 'float64', low, high),\r\n    (size, np.uint8, low, high),\r\n    (size, np.int8, low, high),\r\n    (size, np.int16, low, high),\r\n    (size, np.int32, low, high),\r\n    (size, np.int64, low, high),\r\n    (size, np.uint16, low, high),\r\n    (size, np.uint32, low, high),\r\n    (size, np.uint64, low, high),\r\n]\r\nreps = 5\r\nprint('Ordered --------------------------')\r\n\r\nfor a in arras:\r\n    arr = generate_random_arrays(*a)\r\n    s = \"\"\"u=fast_unique_ordered(arr)\"\"\"\r\n    t1 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n    print('c++ ', t1)\r\n\r\n    s = \"\"\"u=np.unique(arr)\"\"\"\r\n    t2 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n    print('np ', t2)\r\n    u = fast_unique_ordered(arr)\r\n    q = np.unique(arr)\r\n    print(np.all(u == q))\r\n    print('-------------------------')\r\n\r\nprint('Unordered --------------------------') # Falls back to Ordered if dtype is float or np.min(a)<0\r\nfor a in arras:\r\n    arr = generate_random_arrays(*a)\r\n    s = \"\"\"u=fast_unique_not_ordered(arr)\"\"\"\r\n    t1 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n    print('c++ ', t1)\r\n\r\n    s = \"\"\"u=np.unique(arr)\"\"\"\r\n    t2 = timeit.timeit(s, globals=globals(), number=reps) / reps\r\n    print('np ', t2)\r\n    u = fast_unique_not_ordered(arr)\r\n    q = np.unique(arr)\r\n    print(np.all(np.sort(u) == q))\r\n    print('-------------------------')\r\n\r\n# Ordered --------------------------\r\n# c++  0.10320082000107504\r\n# np  0.13888095999718644\r\n# True\r\n# -------------------------\r\n# c++  0.10645331999985501\r\n# np  0.14625759999908042\r\n# True\r\n# -------------------------\r\n# c++  0.03644101999816485\r\n# np  0.0833885799976997\r\n# True\r\n# -------------------------\r\n# c++  0.03784457999863662\r\n# np  0.08405877999903169\r\n# True\r\n# -------------------------\r\n# c++  0.03909369999892078\r\n# np  0.09831685999815817\r\n# True\r\n# -------------------------\r\n# c++  0.045269479998387395\r\n# np  0.0970024200010812\r\n# True\r\n# -------------------------\r\n# c++  0.06357002000149806\r\n# np  0.12426133999833837\r\n# True\r\n# -------------------------\r\n# c++  0.04224961999861989\r\n# np  0.09802825999795459\r\n# True\r\n# -------------------------\r\n# c++  0.046695440000621605\r\n# np  0.10013775999832433\r\n# True\r\n# -------------------------\r\n# c++  0.06854987999831792\r\n# np  0.1277739599987399\r\n# True\r\n# -------------------------\r\n# Unordered --------------------------\r\n# c++  0.10427475999749732\r\n# np  0.13533045999938623\r\n# True\r\n# -------------------------\r\n# c++  0.1188001600006828\r\n# np  0.14714665999927093\r\n# True\r\n# -------------------------\r\n# c++  0.011010520000127144\r\n# np  0.2836028199992143\r\n# True\r\n# -------------------------\r\n# c++  0.03693970000022091\r\n# np  0.08278198000043631\r\n# True\r\n# -------------------------\r\n# c++  0.021734919998561964\r\n# np  0.29412690000026487\r\n# True\r\n# -------------------------\r\n# c++  0.02548580000002403\r\n# np  0.29879269999801183\r\n# True\r\n# -------------------------\r\n# c++  0.030021439999109133\r\n# np  0.31899350000021515\r\n# True\r\n# -------------------------\r\n# c++  0.012441499999840743\r\n# np  0.28925163999956566\r\n# True\r\n# -------------------------\r\n# c++  0.015460380000877193\r\n# np  0.2964318199985428\r\n# True\r\n# -------------------------\r\n# c++  0.026127819999237543\r\n# np  0.31972092000069097\r\n# True\r\n# -------------------------\r\n\r\n\r\n```\r\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Fast implementation of unique elements in an array - up to 30x faster than NumPy",
    "version": "0.11",
    "project_urls": {
        "Homepage": "https://github.com/hansalemaos/cythonunique"
    },
    "split_keywords": [
        "unique",
        "find"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "53847ed2c81c4a9c0d7c69033b5941a43f387c00ed795e3e4265a50859ddb773",
                "md5": "4a10cbb1edb730b25ca830aa70e76515",
                "sha256": "7c5634ded1225c091c3a80eb49aee88391d22ad9d76c0b52c984210637679308"
            },
            "downloads": -1,
            "filename": "cythonunique-0.11-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "4a10cbb1edb730b25ca830aa70e76515",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 23168,
            "upload_time": "2023-12-01T09:06:43",
            "upload_time_iso_8601": "2023-12-01T09:06:43.139733Z",
            "url": "https://files.pythonhosted.org/packages/53/84/7ed2c81c4a9c0d7c69033b5941a43f387c00ed795e3e4265a50859ddb773/cythonunique-0.11-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "63a9ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f",
                "md5": "4d2afeb4d861dd5d251eeee163012610",
                "sha256": "53a4414ffb0904739b6869c3ea1ef8ed7c8ed0d54647f99ed4af4a1958dba6c3"
            },
            "downloads": -1,
            "filename": "cythonunique-0.11.tar.gz",
            "has_sig": false,
            "md5_digest": "4d2afeb4d861dd5d251eeee163012610",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 23393,
            "upload_time": "2023-12-01T09:06:45",
            "upload_time_iso_8601": "2023-12-01T09:06:45.118976Z",
            "url": "https://files.pythonhosted.org/packages/63/a9/ed8e6c7eb3111865223240ebdea5532eefe74d99ada96690aed080a3a76f/cythonunique-0.11.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-12-01 09:06:45",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "hansalemaos",
    "github_project": "cythonunique",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "cythonunique"
}
        
Elapsed time: 0.14250s