group-by-continuous-sequence


Namegroup-by-continuous-sequence JSON
Version 0.10 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/group_by_continuous_sequence
SummaryGroups lists/NumPy arrays by continuous sequence
upload_time2022-12-31 13:40:17
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords numpy group list sequence
VCS
bugtrack_url
requirements flatten_everything intersection_grouper numpy pandas
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
# Group lists/NumPy arrays by continuous sequence

```python
$pip install group-by-continuous-sequence
import numpy as np
import random
from group_by_continuous_sequence import search_sequence_in_list, search_sequence_in_list_with_repeated_numbers, find_sequence_in_np_array

iterable = random.choices(range(2, 6), k=1000)


# Out[4]: [4, 3, 4, 2, 3, 2, 3, 3, 4, 5, 2, 5, 2, 2, 2, 3, 4, 4, 4, 2, 3, 3, 5, 4, 4, 2, 5, 4 ...]

# Find groups of consecutive items. (no difference between the numbers in this case) - first number in the tuple is the index, the second is the value
#
a1 = search_sequence_in_list(
    iterable, difference=0, return_index=True, return_values=True
)
# [[(0, 4)],
#  [(1, 3)],
#  [(2, 4)],
#  [(3, 2)],
#  [(4, 3)],
#  [(5, 2)],
#  [(6, 3), (7, 3)],
#  [(8, 4)],
#  [(9, 5)],
#  [(10, 2)],
#  [(11, 5)],
#  [(12, 2), (13, 2), (14, 2)],
#  [(15, 3)],
#  [(16, 4), (17, 4), (18, 4)],
#  [(19, 2)],
#  [(20, 3), (21, 3)],
#  [(22, 5)],
#  [(23, 4), (24, 4)],
#  [(25, 2)],
#  [(26, 5)],
#  [(27, 4), (28, 4)],
#  [(29, 3)],
#  [(30, 2)],
#  [(31, 4)],
#  [(32, 5), (33, 5)],
#  [(34, 2)],
#  [(35, 4), (36, 4)]
#  ...


# (difference of 1 between consecutive numbers) - only index

a2 = search_sequence_in_list(
    iterable, difference=1, return_index=True, return_values=False
)
# [[0],
#  [1, 2],
#  [3, 4],
#  [5, 6],
#  [7, 8, 9],
#  [10],
#  [11],
#  [12],
#  [13],
#  [14, 15, 16],
#  [17],
#  [18],
#  [19, 20],
#  [21],
#  [22],
#  [23],
#  [24],
#  [25],
#  [26],
#  [27],
#  [28],
#  [29],
#  [30],
#  [31, 32],
#  ...


# (difference of 2 between consecutive numbers) - only values

a3 = search_sequence_in_list(
    iterable, difference=2, return_values=True, return_index=False
)
# ...
# [4],
# [4],
# [2],
# [3],
# [3, 5],
# [4],
# [4],
# [2],
# [5],
# [4],
# [4],
# [3],
# [2, 4],
# [5],
# [5],
# [2, 4],
# [4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5],
# [5],
# [2, 4],
# ...


# (difference of 3 between consecutive numbers) - only values

a4 = search_sequence_in_list(
    iterable, difference=3, return_values=True, return_index=False
)
# [[4],
#  [3],
#  [4],
#  [2],
#  [3],
#  [2],
#  [3],
#  [3],
#  [4],
#  [5],
#  [2, 5],
#  [2],
#  [2],
#  [2],
#  [3],
#  [4],
#  [4],
#  [4],
#  [2],
#  [3],
#  [3],
#  [5],
#  [4],
#  [4],
#  [2, 5],
#  [4],
#  [4],
#  [3],
#  [2],
#  [4],
#  [5],
#  [5],
#  [2],
#  [4],
#  [4],
#  [2, 5] ...]


# ((19, 2), (20, 3), (21, 3)) # Includes repeated numbers
# if ignore_only_repeated is True: Matches like: [(1,4), (1,4)] will be ignored, because there
# is only one hit (4)

a21 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=1,
    return_index=True,
    return_values=True,
    ignore_only_repeated=True,
)

# [(0, 4),
#  ((1, 3), (2, 4)),
#  ((3, 2), (4, 3)),
#  ((5, 2), (6, 3), (7, 3), (8, 4), (9, 5)),
#  (10, 2),
#  (11, 5),
#  ((12, 2), (13, 2), (14, 2), (15, 3), (16, 4), (17, 4), (18, 4)),
#  ((19, 2), (20, 3), (21, 3)),
#  (22, 5),
#  (23, 4),
#  (24, 4),
#  (25, 2),
#  (26, 5),
#  (27, 4),
#  (28, 4),
#  (29, 3),
#  (30, 2),
#  ((31, 4), (32, 5), (33, 5)),
#  (34, 2), ...
#

# difference of 2 between consecutive numbers), accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)
a31 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=2,
    return_index=False,
    return_values=True,
    ignore_only_repeated=False,
)

# [3, 3],
# [4],
# [5],
# [2],
# [5],
# [2, 2, 2],
# [3],
# [4, 4, 4],
# [2],
# [3, 3, 5],
# [4, 4],
# [2],
# [5],
# [4, 4],
# [3],
# [2, 4],
# [5, 5],
# [2, 4, 4],
# [2],
# [5],
# [3],
# [2, 4],
# [3, 5],
# [2],
# [5, 5],
# [2, 4],
# [2],
# [3],
# [4],
# [3, 3],
# [2, 2, 2],


# difference of 2 between consecutive numbers), not accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)

a41 = search_sequence_in_list_with_repeated_numbers(
    iterable,
    difference=3,
    return_index=True,
    return_values=True,
    ignore_only_repeated=True,
)

# (767, 5),
# ((768, 2), (769, 5), (770, 5)),
# (771, 2),
# (772, 3),
# (773, 2),
# (774, 2),
# (775, 3),
# (776, 3),
# (777, 2),
# (778, 4),
# (779, 5),
# (780, 3),
# (781, 4),
# ((782, 2), (783, 2), (784, 5)),
# (785, 4),
# (786, 5),
# (787, 5),
# (788, 4),
# (789, 4),
# ((790, 2), (791, 5)),
# (792, 3),
# (793, 2),
# (794, 4),
# (795, 4),
# ((796, 2), (797, 5)),
# (798, 2),
# (799, 4),
# (800, 4),
# (801, 2),
# (802, 4),
# ((803, 2), (804, 2), (805, 5)),
# (806, 3),
# (807, 5),
# (808, 4),
# ((809, 2), (810, 2), (811, 5), (812, 5), (813, 5)),
# (814, 4),
# (815, 4),
# (816, 3),

# if inonline is True: all values are in one row [1,2,3,4,5,6,7,8] instead of [[1,2,3,4],[5,6,7,8]]
m = find_sequence_in_np_array(np.asarray(iterable), [3, 4, 5])


# [{'inoneline': True,
#   'location': array([[7],
#          [8],
#          [9]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[85],
#          [86],
#          [87]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[176],
#          [177],
#          [178]], dtype=int64), ...

# Works with nested arrays too
m = find_sequence_in_np_array(np.asarray(iterable).reshape((10, 10, 10)), [3, 4, 5])
# m
# Out[28]:
# [{'inoneline': True,
#   'location': array([[0, 0, 7],
#          [0, 0, 8],
#          [0, 0, 9]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[0, 8, 5],
#          [0, 8, 6],
#          [0, 8, 7]], dtype=int64),
#   'values': [3, 4, 5]},
#  {'inoneline': True,
#   'location': array([[1, 7, 6],
#          [1, 7, 7],
#          [1, 7, 8]], dtype=int64),

# and also with strings
m = find_sequence_in_np_array(
    np.asarray(iterable).reshape((10, 10, 5, 2)).astype(str), ["3", "4", "5"]
)
# [{'inoneline': False,
#   'location': array([[0, 0, 3, 1],
#          [0, 0, 4, 0],
#          [0, 0, 4, 1]], dtype=int64),
#   'values': ['3', '4', '5']},
#  {'inoneline': False,
#   'location': array([[0, 8, 2, 1],
#          [0, 8, 3, 0],
#          [0, 8, 3, 1]], dtype=int64),
#   'values': ['3', '4', '5']},
#  {'inoneline': False,
#   'location': array([[1, 7, 3, 0],
#          [1, 7, 3, 1],
#          [1, 7, 4, 0]], dtype=int64),
#   'values': ['3', '4', '5']},

```

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/group_by_continuous_sequence",
    "name": "group-by-continuous-sequence",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "numpy,group,list,sequence",
    "author": "Johannes Fischer",
    "author_email": "<aulasparticularesdealemaosp@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/b4/92/a52cfd9337847ddce669d82b26eeba101071186a7a32538ec2ee96450a3d/group_by_continuous_sequence-0.10.tar.gz",
    "platform": null,
    "description": "\n# Group lists/NumPy arrays by continuous sequence\n\n```python\n$pip install group-by-continuous-sequence\nimport numpy as np\nimport random\nfrom group_by_continuous_sequence import search_sequence_in_list, search_sequence_in_list_with_repeated_numbers, find_sequence_in_np_array\n\niterable = random.choices(range(2, 6), k=1000)\n\n\n# Out[4]: [4, 3, 4, 2, 3, 2, 3, 3, 4, 5, 2, 5, 2, 2, 2, 3, 4, 4, 4, 2, 3, 3, 5, 4, 4, 2, 5, 4 ...]\n\n# Find groups of consecutive items. (no difference between the numbers in this case) - first number in the tuple is the index, the second is the value\n#\na1 = search_sequence_in_list(\n    iterable, difference=0, return_index=True, return_values=True\n)\n# [[(0, 4)],\n#  [(1, 3)],\n#  [(2, 4)],\n#  [(3, 2)],\n#  [(4, 3)],\n#  [(5, 2)],\n#  [(6, 3), (7, 3)],\n#  [(8, 4)],\n#  [(9, 5)],\n#  [(10, 2)],\n#  [(11, 5)],\n#  [(12, 2), (13, 2), (14, 2)],\n#  [(15, 3)],\n#  [(16, 4), (17, 4), (18, 4)],\n#  [(19, 2)],\n#  [(20, 3), (21, 3)],\n#  [(22, 5)],\n#  [(23, 4), (24, 4)],\n#  [(25, 2)],\n#  [(26, 5)],\n#  [(27, 4), (28, 4)],\n#  [(29, 3)],\n#  [(30, 2)],\n#  [(31, 4)],\n#  [(32, 5), (33, 5)],\n#  [(34, 2)],\n#  [(35, 4), (36, 4)]\n#  ...\n\n\n# (difference of 1 between consecutive numbers) - only index\n\na2 = search_sequence_in_list(\n    iterable, difference=1, return_index=True, return_values=False\n)\n# [[0],\n#  [1, 2],\n#  [3, 4],\n#  [5, 6],\n#  [7, 8, 9],\n#  [10],\n#  [11],\n#  [12],\n#  [13],\n#  [14, 15, 16],\n#  [17],\n#  [18],\n#  [19, 20],\n#  [21],\n#  [22],\n#  [23],\n#  [24],\n#  [25],\n#  [26],\n#  [27],\n#  [28],\n#  [29],\n#  [30],\n#  [31, 32],\n#  ...\n\n\n# (difference of 2 between consecutive numbers) - only values\n\na3 = search_sequence_in_list(\n    iterable, difference=2, return_values=True, return_index=False\n)\n# ...\n# [4],\n# [4],\n# [2],\n# [3],\n# [3, 5],\n# [4],\n# [4],\n# [2],\n# [5],\n# [4],\n# [4],\n# [3],\n# [2, 4],\n# [5],\n# [5],\n# [2, 4],\n# [4],\n# [2],\n# [5],\n# [3],\n# [2, 4],\n# [3, 5],\n# [2],\n# [5],\n# [5],\n# [2, 4],\n# ...\n\n\n# (difference of 3 between consecutive numbers) - only values\n\na4 = search_sequence_in_list(\n    iterable, difference=3, return_values=True, return_index=False\n)\n# [[4],\n#  [3],\n#  [4],\n#  [2],\n#  [3],\n#  [2],\n#  [3],\n#  [3],\n#  [4],\n#  [5],\n#  [2, 5],\n#  [2],\n#  [2],\n#  [2],\n#  [3],\n#  [4],\n#  [4],\n#  [4],\n#  [2],\n#  [3],\n#  [3],\n#  [5],\n#  [4],\n#  [4],\n#  [2, 5],\n#  [4],\n#  [4],\n#  [3],\n#  [2],\n#  [4],\n#  [5],\n#  [5],\n#  [2],\n#  [4],\n#  [4],\n#  [2, 5] ...]\n\n\n# ((19, 2), (20, 3), (21, 3)) # Includes repeated numbers\n# if ignore_only_repeated is True: Matches like: [(1,4), (1,4)] will be ignored, because there\n# is only one hit (4)\n\na21 = search_sequence_in_list_with_repeated_numbers(\n    iterable,\n    difference=1,\n    return_index=True,\n    return_values=True,\n    ignore_only_repeated=True,\n)\n\n# [(0, 4),\n#  ((1, 3), (2, 4)),\n#  ((3, 2), (4, 3)),\n#  ((5, 2), (6, 3), (7, 3), (8, 4), (9, 5)),\n#  (10, 2),\n#  (11, 5),\n#  ((12, 2), (13, 2), (14, 2), (15, 3), (16, 4), (17, 4), (18, 4)),\n#  ((19, 2), (20, 3), (21, 3)),\n#  (22, 5),\n#  (23, 4),\n#  (24, 4),\n#  (25, 2),\n#  (26, 5),\n#  (27, 4),\n#  (28, 4),\n#  (29, 3),\n#  (30, 2),\n#  ((31, 4), (32, 5), (33, 5)),\n#  (34, 2), ...\n#\n\n# difference of 2 between consecutive numbers), accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)\na31 = search_sequence_in_list_with_repeated_numbers(\n    iterable,\n    difference=2,\n    return_index=False,\n    return_values=True,\n    ignore_only_repeated=False,\n)\n\n# [3, 3],\n# [4],\n# [5],\n# [2],\n# [5],\n# [2, 2, 2],\n# [3],\n# [4, 4, 4],\n# [2],\n# [3, 3, 5],\n# [4, 4],\n# [2],\n# [5],\n# [4, 4],\n# [3],\n# [2, 4],\n# [5, 5],\n# [2, 4, 4],\n# [2],\n# [5],\n# [3],\n# [2, 4],\n# [3, 5],\n# [2],\n# [5, 5],\n# [2, 4],\n# [2],\n# [3],\n# [4],\n# [3, 3],\n# [2, 2, 2],\n\n\n# difference of 2 between consecutive numbers), not accepting a repeated unique number  ([2, 2, 2], [4, 4, 4] ...)\n\na41 = search_sequence_in_list_with_repeated_numbers(\n    iterable,\n    difference=3,\n    return_index=True,\n    return_values=True,\n    ignore_only_repeated=True,\n)\n\n# (767, 5),\n# ((768, 2), (769, 5), (770, 5)),\n# (771, 2),\n# (772, 3),\n# (773, 2),\n# (774, 2),\n# (775, 3),\n# (776, 3),\n# (777, 2),\n# (778, 4),\n# (779, 5),\n# (780, 3),\n# (781, 4),\n# ((782, 2), (783, 2), (784, 5)),\n# (785, 4),\n# (786, 5),\n# (787, 5),\n# (788, 4),\n# (789, 4),\n# ((790, 2), (791, 5)),\n# (792, 3),\n# (793, 2),\n# (794, 4),\n# (795, 4),\n# ((796, 2), (797, 5)),\n# (798, 2),\n# (799, 4),\n# (800, 4),\n# (801, 2),\n# (802, 4),\n# ((803, 2), (804, 2), (805, 5)),\n# (806, 3),\n# (807, 5),\n# (808, 4),\n# ((809, 2), (810, 2), (811, 5), (812, 5), (813, 5)),\n# (814, 4),\n# (815, 4),\n# (816, 3),\n\n# if inonline is True: all values are in one row [1,2,3,4,5,6,7,8] instead of [[1,2,3,4],[5,6,7,8]]\nm = find_sequence_in_np_array(np.asarray(iterable), [3, 4, 5])\n\n\n# [{'inoneline': True,\n#   'location': array([[7],\n#          [8],\n#          [9]], dtype=int64),\n#   'values': [3, 4, 5]},\n#  {'inoneline': True,\n#   'location': array([[85],\n#          [86],\n#          [87]], dtype=int64),\n#   'values': [3, 4, 5]},\n#  {'inoneline': True,\n#   'location': array([[176],\n#          [177],\n#          [178]], dtype=int64), ...\n\n# Works with nested arrays too\nm = find_sequence_in_np_array(np.asarray(iterable).reshape((10, 10, 10)), [3, 4, 5])\n# m\n# Out[28]:\n# [{'inoneline': True,\n#   'location': array([[0, 0, 7],\n#          [0, 0, 8],\n#          [0, 0, 9]], dtype=int64),\n#   'values': [3, 4, 5]},\n#  {'inoneline': True,\n#   'location': array([[0, 8, 5],\n#          [0, 8, 6],\n#          [0, 8, 7]], dtype=int64),\n#   'values': [3, 4, 5]},\n#  {'inoneline': True,\n#   'location': array([[1, 7, 6],\n#          [1, 7, 7],\n#          [1, 7, 8]], dtype=int64),\n\n# and also with strings\nm = find_sequence_in_np_array(\n    np.asarray(iterable).reshape((10, 10, 5, 2)).astype(str), [\"3\", \"4\", \"5\"]\n)\n# [{'inoneline': False,\n#   'location': array([[0, 0, 3, 1],\n#          [0, 0, 4, 0],\n#          [0, 0, 4, 1]], dtype=int64),\n#   'values': ['3', '4', '5']},\n#  {'inoneline': False,\n#   'location': array([[0, 8, 2, 1],\n#          [0, 8, 3, 0],\n#          [0, 8, 3, 1]], dtype=int64),\n#   'values': ['3', '4', '5']},\n#  {'inoneline': False,\n#   'location': array([[1, 7, 3, 0],\n#          [1, 7, 3, 1],\n#          [1, 7, 4, 0]], dtype=int64),\n#   'values': ['3', '4', '5']},\n\n```\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Groups lists/NumPy arrays by continuous sequence",
    "version": "0.10",
    "split_keywords": [
        "numpy",
        "group",
        "list",
        "sequence"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "6e28585a5139d93f2412e90566476c07",
                "sha256": "f1ab7084bbb5dc893d21b8480ce8fa1486b6ddf300f24bd8b59504790c00d5e5"
            },
            "downloads": -1,
            "filename": "group_by_continuous_sequence-0.10-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "6e28585a5139d93f2412e90566476c07",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 9420,
            "upload_time": "2022-12-31T13:40:16",
            "upload_time_iso_8601": "2022-12-31T13:40:16.277730Z",
            "url": "https://files.pythonhosted.org/packages/9c/ee/aa7c463d0fbac922502cf84b4ed2420f74c41789304e1515e4aeb4e2f200/group_by_continuous_sequence-0.10-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "md5": "1f4c3d6324735f11b6f23885fd879ab9",
                "sha256": "d718f050caadb766eb1a8c45596902b18e042c9daa8f2ffabfe2d1c9d80a6793"
            },
            "downloads": -1,
            "filename": "group_by_continuous_sequence-0.10.tar.gz",
            "has_sig": false,
            "md5_digest": "1f4c3d6324735f11b6f23885fd879ab9",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 8600,
            "upload_time": "2022-12-31T13:40:17",
            "upload_time_iso_8601": "2022-12-31T13:40:17.776552Z",
            "url": "https://files.pythonhosted.org/packages/b4/92/a52cfd9337847ddce669d82b26eeba101071186a7a32538ec2ee96450a3d/group_by_continuous_sequence-0.10.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2022-12-31 13:40:17",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "github_user": "hansalemaos",
    "github_project": "group_by_continuous_sequence",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "flatten_everything",
            "specs": []
        },
        {
            "name": "intersection_grouper",
            "specs": []
        },
        {
            "name": "numpy",
            "specs": []
        },
        {
            "name": "pandas",
            "specs": []
        }
    ],
    "lcname": "group-by-continuous-sequence"
}
        
Elapsed time: 0.05180s