# performs sorting and grouping operations on multidimensional NumPy arrays using Cython and hash-based algorithms.
## pip install cythonnestednumpy
### Tested against Windows / Python 3.11 / Anaconda
## Cython (and a C/C++ compiler) must be installed
```python
import numpy as np
from cythonnestednumpy import HashSort
img1 = np.full([900,1800,3],255,dtype=np.uint8)
img2 = np.full([900,1800,3],255,dtype=np.uint8)
img2[...,0]=0
img3 = np.full([900,1800,3],255,dtype=np.uint8)
img3[...,2]=1
a=np.concatenate([img1,img2,img3])
cyne=HashSort(a,unordered=True)
# 1st column: absolut index (using a.flatten() or a.ravel())
# 2nd - n column: dimension (the more dimensions your array has, the more columns will show up)
# 3rd column: The index in cyne.iterray
# 4th column: 1 is for the first item (unique) found. 0 means that there has been found the same value before.
# 5th column: How many matches
# 6th column: Hashcode
cyne.generate_hash_array(last_dim=None)
resultdata=cyne.sort_by_absolut_index(ascending=True)
# Out[3]:
# array([[ 0, 0, 1, 1, 2497830064280488930],
# [ 30, 1, 0, 2, 2497830064280488930],
# [ 60, 2, 0, 3, 2497830064280488930],
# [ 90, 3, 0, 4, 2497830064280488930],
# ...
# [ 780, 26, 0, 7, -5024405870974420794],
# [ 810, 27, 0, 8, -5024405870974420794],
# [ 840, 28, 0, 9, -5024405870974420794],
# [ 870, 29, 0, 10, -5024405870974420794]], dtype=int64)
cyne.sort_by_absolut_index(ascending=False)
# Out[4]:
# array([[ 870, 29, 0, 10, -5024405870974420794],
# [ 840, 28, 0, 9, -5024405870974420794],
# [ 810, 27, 0, 8, -5024405870974420794],
# [ 780, 26, 0, 7, -5024405870974420794],
# [ 750, 25, 0, 6, -5024405870974420794],
# [ 720, 24, 0, 5, -5024405870974420794],
# ...
# [ 240, 8, 0, 9, 2497830064280488930],
# [ 210, 7, 0, 8, 2497830064280488930],
# [ 180, 6, 0, 7, 2497830064280488930],
# [ 150, 5, 0, 6, 2497830064280488930],
# [ 120, 4, 0, 5, 2497830064280488930],
# [ 90, 3, 0, 4, 2497830064280488930],
# [ 60, 2, 0, 3, 2497830064280488930],
# [ 30, 1, 0, 2, 2497830064280488930],
# [ 0, 0, 1, 1, 2497830064280488930]], dtype=int64)
cyne.get_unique_dims_data()
# Out[3]:
# [array([255, 255, 255], dtype=uint8),
# array([255, 255, 255], dtype=uint8),
# array([255, 255, 255], dtype=uint8)]
cyne.get_unique_dims_data(start_dim=2,end_dim=-1)
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255]],
#
# [[255, 255, 255],
# [255, 255, 255],
# ...
# [ 0, 255, 255]],
#
# [[ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255],
# [ 0, 255, 255]],
cyne.get_all_values(start_dim=0,end_dim=-1)
# ...,
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255]],
# ...,
# [[255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [255, 255, 1]],
# ...,
#allva=cyne.get_unique_dims_values()
cyne.group_equal_values()
# [255, 255, 1]],
# [[255, 255, 1],
# [255, 255, 1],
# [255, 255, 1],
# [ 0, 255, 255],
# [ 0, 255, 255],
# ...
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255],
# [255, 255, 255]],
groupedvalues=cyne.group_equal_values()
byqty=cyne.sort_by_quantity(ascending=False)
cyne.sort_by_hash(ascending=False)
# Out[3]:
# array([[ 270, 9, 0, 10, 2497830064280488930],
# [ 240, 8, 0, 9, 2497830064280488930],
# [ 210, 7, 0, 8, 2497830064280488930],
# [ 180, 6, 0, 7, 2497830064280488930],
# ...
# [ 750, 25, 0, 6, -5024405870974420794],
# [ 720, 24, 0, 5, -5024405870974420794],
# [ 690, 23, 0, 4, -5024405870974420794],
# [ 660, 22, 0, 3, -5024405870974420794],
# [ 630, 21, 0, 2, -5024405870974420794],
# [ 600, 20, 1, 1, -5024405870974420794]], dtype=int64)
class HashSort(builtins.object)
| HashSort(a, unordered=True)
|
| The HashSort class is designed to perform sorting and grouping operations on multi-dimensional NumPy arrays
| using a hash-based algorithm. It utilizes the xxhash https://xxhash.com/ function (Cython! Not Python!) for efficient hash computation.
|
| Parameters:
| - a (numpy.ndarray): The input multi-dimensional NumPy array.
| - unordered (bool): If True, will create the index array with multi processing
|
| Methods:
| - generate_hash_array(last_dim=None): Generates a hash array based on the provided array and optional last_dim.
| - sort_by_absolut_index(ascending=True): Sorts the hash array by absolute index in ascending or descending order.
| - get_unique_dims_data(start_dim=0, end_dim=-1): Returns unique dimensions data based on hash array.
| - get_all_values(start_dim=0, end_dim=-1): Returns all values based on hash array and specified dimensions.
| - group_equal_values(start_dim=0, end_dim=-1): Groups equal values based on hash array and specified dimensions.
| - sort_by_hash(ascending=False): Sorts the hash array by hash values in ascending or descending order.
| - sort_by_quantity(ascending=False): Sorts the hash array by quantity values in ascending or descending order.
|
| Methods defined here:
|
| __init__(self, a, unordered=True)
| Initializes a new instance of the HashSort class.
|
| Parameters:
| - a (numpy.ndarray): The input multi-dimensional NumPy array.
| - unordered (bool): If True, will create the index array with multi processing
|
| generate_hash_array(self, last_dim=None)
| Generates a hash array based on the provided array and optional last_dim.
|
| Parameters:
| - last_dim (int, optional): The last dimension to consider. If None, uses the last dimension of the array.
|
| Returns:
| - HashSort: The current HashSort instance.
|
| get_all_values(self, start_dim=0, end_dim=-1)
| Returns all values based on hash array and specified dimensions.
|
| Parameters:
| - start_dim (int, optional): The starting dimension index to consider.
| - end_dim (int, optional): The ending dimension index to consider.
|
| Returns:
| - numpy.ndarray: All values based on the specified dimensions.
|
| get_shape_array(self, last_dim)
| Returns the shape array based on the provided last dimension.
|
| Parameters:
| - last_dim (int): The last dimension to consider.
|
| Returns:
| - Tuple: A tuple containing the shape array and the product of array shape elements from last_dim onwards.
|
| get_unique_dims_data(self, start_dim=0, end_dim=-1)
| Returns unique dimensions data based on hash array.
|
| Parameters:
| - start_dim (int, optional): The starting dimension index to consider.
| - end_dim (int, optional): The ending dimension index to consider.
|
| Returns:
| - List[numpy.ndarray]: A list containing unique dimensions data.
|
| group_equal_values(self, start_dim=0, end_dim=-1)
| Groups equal values based on hash array and specified dimensions.
|
| Parameters:
| - start_dim (int, optional): The starting dimension index to consider.
| - end_dim (int, optional): The ending dimension index to consider.
|
| Returns:
| - numpy.ndarray: Grouped values based on the specified dimensions.
|
| sort_by_absolut_index(self, ascending=True)
| Sorts the hash array by absolute index (np.flatten()/np.ravel() in ascending or descending order.
|
| Parameters:
| - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.
|
| Returns:
| - numpy.ndarray: The sorted hash array.
|
| sort_by_hash(self, ascending=False)
| Sorts the hash array by hash values in ascending or descending order.
|
| Parameters:
| - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.
|
| Returns:
| - numpy.ndarray: The sorted hash array.
|
| sort_by_quantity(self, ascending=False)
| Sorts the hash array by quantity values in ascending or descending order.
|
| Parameters:
| - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.
|
| Returns:
| - numpy.ndarray: The sorted hash array.
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/cythonnestednumpy",
"name": "cythonnestednumpy",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "cython,arrays",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/f8/d6/593155f5275a02cb9c62306140202307891de1bd85d6bc0a7a035c48f9d9/cythonnestednumpy-0.10.tar.gz",
"platform": null,
"description": "\r\n# performs sorting and grouping operations on multidimensional NumPy arrays using Cython and hash-based algorithms.\r\n\r\n\r\n## pip install cythonnestednumpy\r\n\r\n### Tested against Windows / Python 3.11 / Anaconda\r\n\r\n\r\n## Cython (and a C/C++ compiler) must be installed\r\n\r\n\r\n\r\n```python\r\nimport numpy as np\r\nfrom cythonnestednumpy import HashSort\r\n\r\nimg1 = np.full([900,1800,3],255,dtype=np.uint8)\r\nimg2 = np.full([900,1800,3],255,dtype=np.uint8)\r\nimg2[...,0]=0\r\nimg3 = np.full([900,1800,3],255,dtype=np.uint8)\r\nimg3[...,2]=1\r\na=np.concatenate([img1,img2,img3])\r\ncyne=HashSort(a,unordered=True)\r\n# 1st column: absolut index (using a.flatten() or a.ravel())\r\n# 2nd - n column: dimension (the more dimensions your array has, the more columns will show up)\r\n# 3rd column: The index in cyne.iterray\r\n# 4th column: 1 is for the first item (unique) found. 0 means that there has been found the same value before.\r\n# 5th column: How many matches\r\n# 6th column: Hashcode\r\n\r\n\r\ncyne.generate_hash_array(last_dim=None)\r\nresultdata=cyne.sort_by_absolut_index(ascending=True)\r\n# Out[3]:\r\n# array([[ 0, 0, 1, 1, 2497830064280488930],\r\n# [ 30, 1, 0, 2, 2497830064280488930],\r\n# [ 60, 2, 0, 3, 2497830064280488930],\r\n# [ 90, 3, 0, 4, 2497830064280488930],\r\n# ...\r\n# [ 780, 26, 0, 7, -5024405870974420794],\r\n# [ 810, 27, 0, 8, -5024405870974420794],\r\n# [ 840, 28, 0, 9, -5024405870974420794],\r\n# [ 870, 29, 0, 10, -5024405870974420794]], dtype=int64)\r\ncyne.sort_by_absolut_index(ascending=False)\r\n\r\n# Out[4]:\r\n# array([[ 870, 29, 0, 10, -5024405870974420794],\r\n# [ 840, 28, 0, 9, -5024405870974420794],\r\n# [ 810, 27, 0, 8, -5024405870974420794],\r\n# [ 780, 26, 0, 7, -5024405870974420794],\r\n# [ 750, 25, 0, 6, -5024405870974420794],\r\n# [ 720, 24, 0, 5, -5024405870974420794],\r\n# ...\r\n# [ 240, 8, 0, 9, 2497830064280488930],\r\n# [ 210, 7, 0, 8, 2497830064280488930],\r\n# [ 180, 6, 0, 7, 2497830064280488930],\r\n# [ 150, 5, 0, 6, 2497830064280488930],\r\n# [ 120, 4, 0, 5, 2497830064280488930],\r\n# [ 90, 3, 0, 4, 2497830064280488930],\r\n# [ 60, 2, 0, 3, 2497830064280488930],\r\n# [ 30, 1, 0, 2, 2497830064280488930],\r\n# [ 0, 0, 1, 1, 2497830064280488930]], dtype=int64)\r\n\r\n\r\ncyne.get_unique_dims_data()\r\n# Out[3]:\r\n# [array([255, 255, 255], dtype=uint8),\r\n# array([255, 255, 255], dtype=uint8),\r\n# array([255, 255, 255], dtype=uint8)]\r\n\r\ncyne.get_unique_dims_data(start_dim=2,end_dim=-1)\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255]],\r\n#\r\n# [[255, 255, 255],\r\n# [255, 255, 255],\r\n# ...\r\n# [ 0, 255, 255]],\r\n#\r\n# [[ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255]],\r\n\r\ncyne.get_all_values(start_dim=0,end_dim=-1)\r\n# ...,\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255]],\r\n# ...,\r\n# [[255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1]],\r\n# ...,\r\n#allva=cyne.get_unique_dims_values()\r\ncyne.group_equal_values()\r\n# [255, 255, 1]],\r\n# [[255, 255, 1],\r\n# [255, 255, 1],\r\n# [255, 255, 1],\r\n\r\n# [ 0, 255, 255],\r\n# [ 0, 255, 255],\r\n# ...\r\n\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255],\r\n# [255, 255, 255]],\r\n\r\ngroupedvalues=cyne.group_equal_values()\r\nbyqty=cyne.sort_by_quantity(ascending=False)\r\ncyne.sort_by_hash(ascending=False)\r\n# Out[3]:\r\n# array([[ 270, 9, 0, 10, 2497830064280488930],\r\n# [ 240, 8, 0, 9, 2497830064280488930],\r\n# [ 210, 7, 0, 8, 2497830064280488930],\r\n# [ 180, 6, 0, 7, 2497830064280488930],\r\n# ...\r\n# [ 750, 25, 0, 6, -5024405870974420794],\r\n# [ 720, 24, 0, 5, -5024405870974420794],\r\n# [ 690, 23, 0, 4, -5024405870974420794],\r\n# [ 660, 22, 0, 3, -5024405870974420794],\r\n# [ 630, 21, 0, 2, -5024405870974420794],\r\n# [ 600, 20, 1, 1, -5024405870974420794]], dtype=int64)\r\n\r\nclass HashSort(builtins.object)\r\n | HashSort(a, unordered=True)\r\n | \r\n | The HashSort class is designed to perform sorting and grouping operations on multi-dimensional NumPy arrays\r\n | using a hash-based algorithm. It utilizes the xxhash https://xxhash.com/ function (Cython! Not Python!) for efficient hash computation.\r\n | \r\n | Parameters:\r\n | - a (numpy.ndarray): The input multi-dimensional NumPy array.\r\n | - unordered (bool): If True, will create the index array with multi processing\r\n | \r\n | Methods:\r\n | - generate_hash_array(last_dim=None): Generates a hash array based on the provided array and optional last_dim.\r\n | - sort_by_absolut_index(ascending=True): Sorts the hash array by absolute index in ascending or descending order.\r\n | - get_unique_dims_data(start_dim=0, end_dim=-1): Returns unique dimensions data based on hash array.\r\n | - get_all_values(start_dim=0, end_dim=-1): Returns all values based on hash array and specified dimensions.\r\n | - group_equal_values(start_dim=0, end_dim=-1): Groups equal values based on hash array and specified dimensions.\r\n | - sort_by_hash(ascending=False): Sorts the hash array by hash values in ascending or descending order.\r\n | - sort_by_quantity(ascending=False): Sorts the hash array by quantity values in ascending or descending order.\r\n | \r\n | Methods defined here:\r\n | \r\n | __init__(self, a, unordered=True)\r\n | Initializes a new instance of the HashSort class.\r\n | \r\n | Parameters:\r\n | - a (numpy.ndarray): The input multi-dimensional NumPy array.\r\n | - unordered (bool): If True, will create the index array with multi processing\r\n | \r\n | generate_hash_array(self, last_dim=None)\r\n | Generates a hash array based on the provided array and optional last_dim.\r\n | \r\n | Parameters:\r\n | - last_dim (int, optional): The last dimension to consider. If None, uses the last dimension of the array.\r\n | \r\n | Returns:\r\n | - HashSort: The current HashSort instance.\r\n | \r\n | get_all_values(self, start_dim=0, end_dim=-1)\r\n | Returns all values based on hash array and specified dimensions.\r\n | \r\n | Parameters:\r\n | - start_dim (int, optional): The starting dimension index to consider.\r\n | - end_dim (int, optional): The ending dimension index to consider.\r\n | \r\n | Returns:\r\n | - numpy.ndarray: All values based on the specified dimensions.\r\n | \r\n | get_shape_array(self, last_dim)\r\n | Returns the shape array based on the provided last dimension.\r\n | \r\n | Parameters:\r\n | - last_dim (int): The last dimension to consider.\r\n | \r\n | Returns:\r\n | - Tuple: A tuple containing the shape array and the product of array shape elements from last_dim onwards.\r\n | \r\n | get_unique_dims_data(self, start_dim=0, end_dim=-1)\r\n | Returns unique dimensions data based on hash array.\r\n | \r\n | Parameters:\r\n | - start_dim (int, optional): The starting dimension index to consider.\r\n | - end_dim (int, optional): The ending dimension index to consider.\r\n | \r\n | Returns:\r\n | - List[numpy.ndarray]: A list containing unique dimensions data.\r\n | \r\n | group_equal_values(self, start_dim=0, end_dim=-1)\r\n | Groups equal values based on hash array and specified dimensions.\r\n | \r\n | Parameters:\r\n | - start_dim (int, optional): The starting dimension index to consider.\r\n | - end_dim (int, optional): The ending dimension index to consider.\r\n | \r\n | Returns:\r\n | - numpy.ndarray: Grouped values based on the specified dimensions.\r\n | \r\n | sort_by_absolut_index(self, ascending=True)\r\n | Sorts the hash array by absolute index (np.flatten()/np.ravel() in ascending or descending order.\r\n | \r\n | Parameters:\r\n | - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.\r\n | \r\n | Returns:\r\n | - numpy.ndarray: The sorted hash array.\r\n | \r\n | sort_by_hash(self, ascending=False)\r\n | Sorts the hash array by hash values in ascending or descending order.\r\n | \r\n | Parameters:\r\n | - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.\r\n | \r\n | Returns:\r\n | - numpy.ndarray: The sorted hash array.\r\n | \r\n | sort_by_quantity(self, ascending=False)\r\n | Sorts the hash array by quantity values in ascending or descending order.\r\n | \r\n | Parameters:\r\n | - ascending (bool, optional): If True, sorts in ascending order; otherwise, sorts in descending order.\r\n | \r\n | Returns:\r\n | - numpy.ndarray: The sorted hash array.\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "performs sorting and grouping operations on multidimensional NumPy arrays using Cython and hash-based algorithms.",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/cythonnestednumpy"
},
"split_keywords": [
"cython",
"arrays"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "a3363eb661637ef75868abb5ef5191e72dd384a9d18b6a42eabda2b38009105e",
"md5": "ce3f31f02e8f2aa222f16e3be9157eb4",
"sha256": "ae8b0fb92f6cf580b21b075bcc9031e1a3d3ea51458502a3058ca2da32bb4a05"
},
"downloads": -1,
"filename": "cythonnestednumpy-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "ce3f31f02e8f2aa222f16e3be9157eb4",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 89176,
"upload_time": "2023-12-09T18:14:58",
"upload_time_iso_8601": "2023-12-09T18:14:58.614366Z",
"url": "https://files.pythonhosted.org/packages/a3/36/3eb661637ef75868abb5ef5191e72dd384a9d18b6a42eabda2b38009105e/cythonnestednumpy-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "f8d6593155f5275a02cb9c62306140202307891de1bd85d6bc0a7a035c48f9d9",
"md5": "4f89a23f23ae59196ea46f9bcb12d9a1",
"sha256": "d2aa86997e8e2abbd445932b756b7e91277ce8b46fc08d15655eec37d9cc9d7d"
},
"downloads": -1,
"filename": "cythonnestednumpy-0.10.tar.gz",
"has_sig": false,
"md5_digest": "4f89a23f23ae59196ea46f9bcb12d9a1",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 86591,
"upload_time": "2023-12-09T18:15:00",
"upload_time_iso_8601": "2023-12-09T18:15:00.939336Z",
"url": "https://files.pythonhosted.org/packages/f8/d6/593155f5275a02cb9c62306140202307891de1bd85d6bc0a7a035c48f9d9/cythonnestednumpy-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-12-09 18:15:00",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "cythonnestednumpy",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "cythonnestednumpy"
}