# Group lists/NumPy arrays by continuous sequence
```python
$pip install group-by-continuous-sequence
import numpy as np
import random
from group_by_continuous_sequence import search_sequence_in_list, search_sequence_in_list_with_repeated_numbers, find_sequence_in_np_array
iterable = random.choices(range(2, 6), k=1000)
# Out[4]: [4, 3, 4, 2, 3, 2, 3, 3, 4, 5, 2, 5, 2, 2, 2, 3, 4, 4, 4, 2, 3, 3, 5, 4, 4, 2, 5, 4 ...]
# Find groups of consecutive items. (no difference between the numbers in this case) - first number in the tuple is the index, the second is the value
#
a1 = search_sequence_in_list(
iterable, difference=0, return_index=True, return_values=True
)
# [[(0, 4)],
# [(1, 3)],
# [(2, 4)],
# [(3, 2)],
# [(4, 3)],
# [(5, 2)],
# [(6, 3), (7, 3)],
# [(8, 4)],
# [(9, 5)],
# [(10, 2)],
# [(11, 5)],
# [(12, 2), (13, 2), (14, 2)],
# [(15, 3)],
# [(16, 4), (17, 4), (18, 4)],
# [(19, 2)],
# [(20, 3), (21, 3)],
# [(22, 5)],
# [(23, 4), (24, 4)],
# [(25, 2)],
# [(26, 5)],
# [(27, 4), (28, 4)],
# [(29, 3)],
# [(30, 2)],
# [(31, 4)],
# [(32, 5), (33, 5)],
# [(34, 2)],
# [(35, 4), (36, 4)]
# ...
# (difference of 1 between consecutive numbers) - only index
a2 = search_sequence_in_list(
iterable, difference=1, return_index=True, return_values=False
)
# [[0],
# [1, 2],
# [3, 4],
# [5, 6],
# [7, 8, 9],
# [10],
# [11],
# [12],
# [13],
# [14, 15, 16],
# [17],
# [18],
# [19, 20],
# [21],
# [22],
# [23],
# [24],
# [25],
# [26],
# [27],
# [28],
# [29],
# [30],
# [31, 32],
# ...
# (difference of 2 between consecutive numbers) - only values
a3 = search_sequence_in_list(
iterable, difference=2, return_values=True, return_index=False
)
# ...
# [4],
# [4],
# [2],
# [3],
# [3, 5],
# [4],
# [4],
# [2],
# [5],
# [4],
# [4],
# [3],
# [2, 4],
# [5],
# [5],
# [2, 4],
# [4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5],
# [5],
# [2, 4],
# ...
# (difference of 3 between consecutive numbers) - only values
a4 = search_sequence_in_list(
iterable, difference=3, return_values=True, return_index=False
)
# [[4],
# [3],
# [4],
# [2],
# [3],
# [2],
# [3],
# [3],
# [4],
# [5],
# [2, 5],
# [2],
# [2],
# [2],
# [3],
# [4],
# [4],
# [4],
# [2],
# [3],
# [3],
# [5],
# [4],
# [4],
# [2, 5],
# [4],
# [4],
# [3],
# [2],
# [4],
# [5],
# [5],
# [2],
# [4],
# [4],
# [2, 5] ...]
# ((19, 2), (20, 3), (21, 3)) # Includes repeated numbers
# if ignore_only_repeated is True: Matches like: [(1,4), (1,4)] will be ignored, because there
# is only one hit (4)
a21 = search_sequence_in_list_with_repeated_numbers(
iterable,
difference=1,
return_index=True,
return_values=True,
ignore_only_repeated=True,
)
# [(0, 4),
# ((1, 3), (2, 4)),
# ((3, 2), (4, 3)),
# ((5, 2), (6, 3), (7, 3), (8, 4), (9, 5)),
# (10, 2),
# (11, 5),
# ((12, 2), (13, 2), (14, 2), (15, 3), (16, 4), (17, 4), (18, 4)),
# ((19, 2), (20, 3), (21, 3)),
# (22, 5),
# (23, 4),
# (24, 4),
# (25, 2),
# (26, 5),
# (27, 4),
# (28, 4),
# (29, 3),
# (30, 2),
# ((31, 4), (32, 5), (33, 5)),
# (34, 2), ...
#
# difference of 2 between consecutive numbers), accepting a repeated unique number ([2, 2, 2], [4, 4, 4] ...)
a31 = search_sequence_in_list_with_repeated_numbers(
iterable,
difference=2,
return_index=False,
return_values=True,
ignore_only_repeated=False,
)
# [3, 3],
# [4],
# [5],
# [2],
# [5],
# [2, 2, 2],
# [3],
# [4, 4, 4],
# [2],
# [3, 3, 5],
# [4, 4],
# [2],
# [5],
# [4, 4],
# [3],
# [2, 4],
# [5, 5],
# [2, 4, 4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5, 5],
# [2, 4],
# [2],
# [3],
# [4],
# [3, 3],
# [2, 2, 2],
# difference of 2 between consecutive numbers), not accepting a repeated unique number ([2, 2, 2], [4, 4, 4] ...)
a41 = search_sequence_in_list_with_repeated_numbers(
iterable,
difference=3,
return_index=True,
return_values=True,
ignore_only_repeated=True,
)
# (767, 5),
# ((768, 2), (769, 5), (770, 5)),
# (771, 2),
# (772, 3),
# (773, 2),
# (774, 2),
# (775, 3),
# (776, 3),
# (777, 2),
# (778, 4),
# (779, 5),
# (780, 3),
# (781, 4),
# ((782, 2), (783, 2), (784, 5)),
# (785, 4),
# (786, 5),
# (787, 5),
# (788, 4),
# (789, 4),
# ((790, 2), (791, 5)),
# (792, 3),
# (793, 2),
# (794, 4),
# (795, 4),
# ((796, 2), (797, 5)),
# (798, 2),
# (799, 4),
# (800, 4),
# (801, 2),
# (802, 4),
# ((803, 2), (804, 2), (805, 5)),
# (806, 3),
# (807, 5),
# (808, 4),
# ((809, 2), (810, 2), (811, 5), (812, 5), (813, 5)),
# (814, 4),
# (815, 4),
# (816, 3),
# if inonline is True: all values are in one row [1,2,3,4,5,6,7,8] instead of [[1,2,3,4],[5,6,7,8]]
m = find_sequence_in_np_array(np.asarray(iterable), [3, 4, 5])
# [{'inoneline': True,
# 'location': array([[7],
# [8],
# [9]], dtype=int64),
# 'values': [3, 4, 5]},
# {'inoneline': True,
# 'location': array([[85],
# [86],
# [87]], dtype=int64),
# 'values': [3, 4, 5]},
# {'inoneline': True,
# 'location': array([[176],
# [177],
# [178]], dtype=int64), ...
# Works with nested arrays too
m = find_sequence_in_np_array(np.asarray(iterable).reshape((10, 10, 10)), [3, 4, 5])
# m
# Out[28]:
# [{'inoneline': True,
# 'location': array([[0, 0, 7],
# [0, 0, 8],
# [0, 0, 9]], dtype=int64),
# 'values': [3, 4, 5]},
# {'inoneline': True,
# 'location': array([[0, 8, 5],
# [0, 8, 6],
# [0, 8, 7]], dtype=int64),
# 'values': [3, 4, 5]},
# {'inoneline': True,
# 'location': array([[1, 7, 6],
# [1, 7, 7],
# [1, 7, 8]], dtype=int64),
# and also with strings
m = find_sequence_in_np_array(
np.asarray(iterable).reshape((10, 10, 5, 2)).astype(str), ["3", "4", "5"]
)
# [{'inoneline': False,
# 'location': array([[0, 0, 3, 1],
# [0, 0, 4, 0],
# [0, 0, 4, 1]], dtype=int64),
# 'values': ['3', '4', '5']},
# {'inoneline': False,
# 'location': array([[0, 8, 2, 1],
# [0, 8, 3, 0],
# [0, 8, 3, 1]], dtype=int64),
# 'values': ['3', '4', '5']},
# {'inoneline': False,
# 'location': array([[1, 7, 3, 0],
# [1, 7, 3, 1],
# [1, 7, 4, 0]], dtype=int64),
# 'values': ['3', '4', '5']},
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/group_by_continuous_sequence",
"name": "group-by-continuous-sequence",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "numpy,group,list,sequence",
"author": "Johannes Fischer",
"author_email": "<aulasparticularesdealemaosp@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/b4/92/a52cfd9337847ddce669d82b26eeba101071186a7a32538ec2ee96450a3d/group_by_continuous_sequence-0.10.tar.gz",
"platform": null,
"description": "\n# Group lists/NumPy arrays by continuous sequence\n\n```python\n$pip install group-by-continuous-sequence\nimport numpy as np\nimport random\nfrom group_by_continuous_sequence import search_sequence_in_list, search_sequence_in_list_with_repeated_numbers, find_sequence_in_np_array\n\niterable = random.choices(range(2, 6), k=1000)\n\n\n# Out[4]: [4, 3, 4, 2, 3, 2, 3, 3, 4, 5, 2, 5, 2, 2, 2, 3, 4, 4, 4, 2, 3, 3, 5, 4, 4, 2, 5, 4 ...]\n\n# Find groups of consecutive items. (no difference between the numbers in this case) - first number in the tuple is the index, the second is the value\n#\na1 = search_sequence_in_list(\n iterable, difference=0, return_index=True, return_values=True\n)\n# [[(0, 4)],\n# [(1, 3)],\n# [(2, 4)],\n# [(3, 2)],\n# [(4, 3)],\n# [(5, 2)],\n# [(6, 3), (7, 3)],\n# [(8, 4)],\n# [(9, 5)],\n# [(10, 2)],\n# [(11, 5)],\n# [(12, 2), (13, 2), (14, 2)],\n# [(15, 3)],\n# [(16, 4), (17, 4), (18, 4)],\n# [(19, 2)],\n# [(20, 3), (21, 3)],\n# [(22, 5)],\n# [(23, 4), (24, 4)],\n# [(25, 2)],\n# [(26, 5)],\n# [(27, 4), (28, 4)],\n# [(29, 3)],\n# [(30, 2)],\n# [(31, 4)],\n# [(32, 5), (33, 5)],\n# [(34, 2)],\n# [(35, 4), (36, 4)]\n# ...\n\n\n# (difference of 1 between consecutive numbers) - only index\n\na2 = search_sequence_in_list(\n iterable, difference=1, return_index=True, return_values=False\n)\n# [[0],\n# [1, 2],\n# [3, 4],\n# [5, 6],\n# [7, 8, 9],\n# [10],\n# [11],\n# [12],\n# [13],\n# [14, 15, 16],\n# [17],\n# [18],\n# [19, 20],\n# [21],\n# [22],\n# [23],\n# [24],\n# [25],\n# [26],\n# [27],\n# [28],\n# [29],\n# [30],\n# [31, 32],\n# ...\n\n\n# (difference of 2 between consecutive numbers) - only values\n\na3 = search_sequence_in_list(\n iterable, difference=2, return_values=True, return_index=False\n)\n# ...\n# [4],\n# [4],\n# [2],\n# [3],\n# [3, 5],\n# [4],\n# [4],\n# [2],\n# [5],\n# [4],\n# [4],\n# [3],\n# [2, 4],\n# [5],\n# [5],\n# [2, 4],\n# [4],\n# [2],\n# [5],\n# [3],\n# [2, 4],\n# [3, 5],\n# [2],\n# [5],\n# [5],\n# [2, 4],\n# ...\n\n\n# (difference of 3 between consecutive numbers) - only values\n\na4 = search_sequence_in_list(\n iterable, difference=3, return_values=True, return_index=False\n)\n# [[4],\n# [3],\n# [4],\n# [2],\n# [3],\n# [2],\n# [3],\n# [3],\n# [4],\n# [5],\n# [2, 5],\n# [2],\n# [2],\n# [2],\n# [3],\n# [4],\n# [4],\n# [4],\n# [2],\n# [3],\n# [3],\n# [5],\n# [4],\n# [4],\n# [2, 5],\n# [4],\n# [4],\n# [3],\n# [2],\n# [4],\n# [5],\n# [5],\n# [2],\n# [4],\n# [4],\n# [2, 5] ...]\n\n\n# ((19, 2), (20, 3), (21, 3)) # Includes repeated numbers\n# if ignore_only_repeated is True: Matches like: [(1,4), (1,4)] will be ignored, because there\n# is only one hit (4)\n\na21 = search_sequence_in_list_with_repeated_numbers(\n iterable,\n difference=1,\n return_index=True,\n return_values=True,\n ignore_only_repeated=True,\n)\n\n# [(0, 4),\n# ((1, 3), (2, 4)),\n# ((3, 2), (4, 3)),\n# ((5, 2), (6, 3), (7, 3), (8, 4), (9, 5)),\n# (10, 2),\n# (11, 5),\n# ((12, 2), (13, 2), (14, 2), (15, 3), (16, 4), (17, 4), (18, 4)),\n# ((19, 2), (20, 3), (21, 3)),\n# (22, 5),\n# (23, 4),\n# (24, 4),\n# (25, 2),\n# (26, 5),\n# (27, 4),\n# (28, 4),\n# (29, 3),\n# (30, 2),\n# ((31, 4), (32, 5), (33, 5)),\n# (34, 2), ...\n#\n\n# difference of 2 between consecutive numbers), accepting a repeated unique number ([2, 2, 2], [4, 4, 4] ...)\na31 = search_sequence_in_list_with_repeated_numbers(\n iterable,\n difference=2,\n return_index=False,\n return_values=True,\n ignore_only_repeated=False,\n)\n\n# [3, 3],\n# [4],\n# [5],\n# [2],\n# [5],\n# [2, 2, 2],\n# [3],\n# [4, 4, 4],\n# [2],\n# [3, 3, 5],\n# [4, 4],\n# [2],\n# [5],\n# [4, 4],\n# [3],\n# [2, 4],\n# [5, 5],\n# [2, 4, 4],\n# [2],\n# [5],\n# [3],\n# [2, 4],\n# [3, 5],\n# [2],\n# [5, 5],\n# [2, 4],\n# [2],\n# [3],\n# [4],\n# [3, 3],\n# [2, 2, 2],\n\n\n# difference of 2 between consecutive numbers), not accepting a repeated unique number ([2, 2, 2], [4, 4, 4] ...)\n\na41 = search_sequence_in_list_with_repeated_numbers(\n iterable,\n difference=3,\n return_index=True,\n return_values=True,\n ignore_only_repeated=True,\n)\n\n# (767, 5),\n# ((768, 2), (769, 5), (770, 5)),\n# (771, 2),\n# (772, 3),\n# (773, 2),\n# (774, 2),\n# (775, 3),\n# (776, 3),\n# (777, 2),\n# (778, 4),\n# (779, 5),\n# (780, 3),\n# (781, 4),\n# ((782, 2), (783, 2), (784, 5)),\n# (785, 4),\n# (786, 5),\n# (787, 5),\n# (788, 4),\n# (789, 4),\n# ((790, 2), (791, 5)),\n# (792, 3),\n# (793, 2),\n# (794, 4),\n# (795, 4),\n# ((796, 2), (797, 5)),\n# (798, 2),\n# (799, 4),\n# (800, 4),\n# (801, 2),\n# (802, 4),\n# ((803, 2), (804, 2), (805, 5)),\n# (806, 3),\n# (807, 5),\n# (808, 4),\n# ((809, 2), (810, 2), (811, 5), (812, 5), (813, 5)),\n# (814, 4),\n# (815, 4),\n# (816, 3),\n\n# if inonline is True: all values are in one row [1,2,3,4,5,6,7,8] instead of [[1,2,3,4],[5,6,7,8]]\nm = find_sequence_in_np_array(np.asarray(iterable), [3, 4, 5])\n\n\n# [{'inoneline': True,\n# 'location': array([[7],\n# [8],\n# [9]], dtype=int64),\n# 'values': [3, 4, 5]},\n# {'inoneline': True,\n# 'location': array([[85],\n# [86],\n# [87]], dtype=int64),\n# 'values': [3, 4, 5]},\n# {'inoneline': True,\n# 'location': array([[176],\n# [177],\n# [178]], dtype=int64), ...\n\n# Works with nested arrays too\nm = find_sequence_in_np_array(np.asarray(iterable).reshape((10, 10, 10)), [3, 4, 5])\n# m\n# Out[28]:\n# [{'inoneline': True,\n# 'location': array([[0, 0, 7],\n# [0, 0, 8],\n# [0, 0, 9]], dtype=int64),\n# 'values': [3, 4, 5]},\n# {'inoneline': True,\n# 'location': array([[0, 8, 5],\n# [0, 8, 6],\n# [0, 8, 7]], dtype=int64),\n# 'values': [3, 4, 5]},\n# {'inoneline': True,\n# 'location': array([[1, 7, 6],\n# [1, 7, 7],\n# [1, 7, 8]], dtype=int64),\n\n# and also with strings\nm = find_sequence_in_np_array(\n np.asarray(iterable).reshape((10, 10, 5, 2)).astype(str), [\"3\", \"4\", \"5\"]\n)\n# [{'inoneline': False,\n# 'location': array([[0, 0, 3, 1],\n# [0, 0, 4, 0],\n# [0, 0, 4, 1]], dtype=int64),\n# 'values': ['3', '4', '5']},\n# {'inoneline': False,\n# 'location': array([[0, 8, 2, 1],\n# [0, 8, 3, 0],\n# [0, 8, 3, 1]], dtype=int64),\n# 'values': ['3', '4', '5']},\n# {'inoneline': False,\n# 'location': array([[1, 7, 3, 0],\n# [1, 7, 3, 1],\n# [1, 7, 4, 0]], dtype=int64),\n# 'values': ['3', '4', '5']},\n\n```\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Groups lists/NumPy arrays by continuous sequence",
"version": "0.10",
"split_keywords": [
"numpy",
"group",
"list",
"sequence"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "6e28585a5139d93f2412e90566476c07",
"sha256": "f1ab7084bbb5dc893d21b8480ce8fa1486b6ddf300f24bd8b59504790c00d5e5"
},
"downloads": -1,
"filename": "group_by_continuous_sequence-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "6e28585a5139d93f2412e90566476c07",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 9420,
"upload_time": "2022-12-31T13:40:16",
"upload_time_iso_8601": "2022-12-31T13:40:16.277730Z",
"url": "https://files.pythonhosted.org/packages/9c/ee/aa7c463d0fbac922502cf84b4ed2420f74c41789304e1515e4aeb4e2f200/group_by_continuous_sequence-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "1f4c3d6324735f11b6f23885fd879ab9",
"sha256": "d718f050caadb766eb1a8c45596902b18e042c9daa8f2ffabfe2d1c9d80a6793"
},
"downloads": -1,
"filename": "group_by_continuous_sequence-0.10.tar.gz",
"has_sig": false,
"md5_digest": "1f4c3d6324735f11b6f23885fd879ab9",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 8600,
"upload_time": "2022-12-31T13:40:17",
"upload_time_iso_8601": "2022-12-31T13:40:17.776552Z",
"url": "https://files.pythonhosted.org/packages/b4/92/a52cfd9337847ddce669d82b26eeba101071186a7a32538ec2ee96450a3d/group_by_continuous_sequence-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-12-31 13:40:17",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "hansalemaos",
"github_project": "group_by_continuous_sequence",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "flatten_everything",
"specs": []
},
{
"name": "intersection_grouper",
"specs": []
},
{
"name": "numpy",
"specs": []
},
{
"name": "pandas",
"specs": []
}
],
"lcname": "group-by-continuous-sequence"
}