parifinder


Nameparifinder JSON
Version 0.10 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/parifinder
Summaryextracts structured data from text using user-defined delimiters (strings or regex)
upload_time2023-10-14 22:38:14
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords webscraping html parsing
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
# extracts structured data from text using user-defined delimiters (strings or regex)

## Tested against Windows / Python 3.11 / Anaconda

## pip install parifinder

parifinder extracts structured data from text using user-defined delimiters (strings or regex), making it versatile for data processing.

## Advantages

### Flexibility: 

The function can handle a wide range of scenarios, making it versatile for parsing text with various delimiters. It can handle both single and multiple pairs of delimiters, whether they are simple strings or complex regular expressions. This flexibility makes it suitable for different use cases.

### Scalability: 

It can parse multiple pairs of delimiters within a given text, which is especially useful when dealing with documents or data containing nested elements.

### Pure Python: 

It uses only Python's standard library


```python
from parifinder import parse_pairs
from pprint import pprint

text_0 = """[[1, 2, 2], [5], [2, 3]], 12: [[4, 4, 4], [12, 0], [6, 6]], 3: [[1, 2]][[1, 2, 2], [5], [2, 3]], 12: [[4, 4, 4], [12, 0], [6, 6]], 3: [[1, 2]]"""
s1_0 = "["
s2_0 = "]"
r0 = parse_pairs(string=text_0, s1=s1_0, s2=s2_0, str_regex=False)
print("r0-----------------------------------------------------------------")
pprint(r0, indent=1, width=1)

text_1 = "<body><p>a</p><p>a</p><p>The HTML <code>button</code> tag defines a clickable button.</p><p>x</p><p>The CSS <code>background-color</code> property defines the background color of an element.</p></body></html>"
s1_1 = "<p>"
s2_1 = "</p>"
r1 = parse_pairs(string=text_1, s1=s1_1, s2=s2_1, str_regex=False)
print("r1-----------------------------------------------------------------")
pprint(r1, indent=1, width=1)

text_2 = "[1bla[2bla/2]/1]"
s1_2 = r"\[\d"
s2_2 = r"/\d]"
r2 = parse_pairs(string=text_2, s1=s1_2, s2=s2_2, str_regex=True)
print("r2-----------------------------------------------------------------")
pprint(r2, indent=1, width=1)

text_3 = "[1bla[2bla/2]/1]"
s1_3 = [("[1", "/1]"), ("[2", "/2]")]
s2_3 = None
r3 = parse_pairs(string=text_3, s1=s1_3, s2=s2_3, str_regex=False)
print("r3-----------------------------------------------------------------")
pprint(r3, indent=1, width=1)

text_4 = "[1bla[2bla/2]/1]"
s1_4 = ["[1", "[2"]
s2_4 = ["/1]", "/2]"]
r4 = parse_pairs(string=text_4, s1=s1_4, s2=s2_4, str_regex=False)
print("r4-----------------------------------------------------------------")
pprint(r4, indent=1, width=1)


# r0-----------------------------------------------------------------
# {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23): {'children': [(1,
#                                                                                                         2,
#                                                                                                         3,
#                                                                                                         4,
#                                                                                                         5,
#                                                                                                         6,
#                                                                                                         7,
#                                                                                                         8,
#                                                                                                         9),
#                                                                                                        (17,
#                                                                                                         18,
#                                                                                                         19,
#                                                                                                         20,
#                                                                                                         21,
#                                                                                                         22),
#                                                                                                        (12,
#                                                                                                         13,
#                                                                                                         14)],
#                                                                                           'end': 23,
#                                                                                           'parents': [],
#                                                                                           'size': 23,
#                                                                                           'start': 0,
#                                                                                           'text': '[[1, '
#                                                                                                   '2, '
#                                                                                                   '2], '
#                                                                                                   '[5], '
#                                                                                                   '[2, '
#                                                                                                   '3]]'},
#  (1, 2, 3, 4, 5, 6, 7, 8, 9): {'children': [],
#                                'end': 9,
#                                'parents': [(0,
#                                             1,
#                                             2,
#                                             3,
#                                             4,
#                                             5,
#                                             6,
#                                             7,
#                                             8,
#                                             9,
#                                             10,
#                                             11,
#                                             12,
#                                             13,
#                                             14,
#                                             15,
#                                             16,
#                                             17,
#                                             18,
#                                             19,
#                                             20,
#                                             21,
#                                             22,
#                                             23)],
#                                'size': 8,
#                                'start': 1,
#                                'text': '[1, '
#                                        '2, '
#                                        '2]'},
#  (12, 13, 14): {'children': [],
#                 'end': 14,
#                 'parents': [(0,
#                              1,
#                              2,
#                              3,
#                              4,
#                              5,
#                              6,
#                              7,
#                              8,
#                              9,
#                              10,
#                              11,
#                              12,
#                              13,
#                              14,
#                              15,
#                              16,
#                              17,
#                              18,
#                              19,
#                              20,
#                              21,
#                              22,
#                              23)],
#                 'size': 2,
#                 'start': 12,
#                 'text': '[5]'},
#  (17, 18, 19, 20, 21, 22): {'children': [],
#                             'end': 22,
#                             'parents': [(0,
#                                          1,
#                                          2,
#                                          3,
#                                          4,
#                                          5,
#                                          6,
#                                          7,
#                                          8,
#                                          9,
#                                          10,
#                                          11,
#                                          12,
#                                          13,
#                                          14,
#                                          15,
#                                          16,
#                                          17,
#                                          18,
#                                          19,
#                                          20,
#                                          21,
#                                          22,
#                                          23)],
#                             'size': 5,
#                             'start': 17,
#                             'text': '[2, '
#                                     '3]'},
#  (30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57): {'children': [(31,
#                                                                                                                                   32,
#                                                                                                                                   33,
#                                                                                                                                   34,
#                                                                                                                                   35,
#                                                                                                                                   36,
#                                                                                                                                   37,
#                                                                                                                                   38,
#                                                                                                                                   39),
#                                                                                                                                  (42,
#                                                                                                                                   43,
#                                                                                                                                   44,
#                                                                                                                                   45,
#                                                                                                                                   46,
#                                                                                                                                   47,
#                                                                                                                                   48),
#                                                                                                                                  (51,
#                                                                                                                                   52,
#                                                                                                                                   53,
#                                                                                                                                   54,
#                                                                                                                                   55,
#                                                                                                                                   56)],
#                                                                                                                     'end': 57,
#                                                                                                                     'parents': [],
#                                                                                                                     'size': 27,
#                                                                                                                     'start': 30,
#                                                                                                                     'text': '[[4, '
#                                                                                                                             '4, '
#                                                                                                                             '4], '
#                                                                                                                             '[12, '
#                                                                                                                             '0], '
#                                                                                                                             '[6, '
#                                                                                                                             '6]]'},
#  (31, 32, 33, 34, 35, 36, 37, 38, 39): {'children': [],
#                                         'end': 39,
#                                         'parents': [(30,
#                                                      31,
#                                                      32,
#                                                      33,
#                                                      34,
#                                                      35,
#                                                      36,
#                                                      37,
#                                                      38,
#                                                      39,
#                                                      40,
#                                                      41,
#                                                      42,
#                                                      43,
#                                                      44,
#                                                      45,
#                                                      46,
#                                                      47,
#                                                      48,
#                                                      49,
#                                                      50,
#                                                      51,
#                                                      52,
#                                                      53,
#                                                      54,
#                                                      55,
#                                                      56,
#                                                      57)],
#                                         'size': 8,
#                                         'start': 31,
#                                         'text': '[4, '
#                                                 '4, '
#                                                 '4]'},
#  (42, 43, 44, 45, 46, 47, 48): {'children': [],
#                                 'end': 48,
#                                 'parents': [(30,
#                                              31,
#                                              32,
#                                              33,
#                                              34,
#                                              35,
#                                              36,
#                                              37,
#                                              38,
#                                              39,
#                                              40,
#                                              41,
#                                              42,
#                                              43,
#                                              44,
#                                              45,
#                                              46,
#                                              47,
#                                              48,
#                                              49,
#                                              50,
#                                              51,
#                                              52,
#                                              53,
#                                              54,
#                                              55,
#                                              56,
#                                              57)],
#                                 'size': 6,
#                                 'start': 42,
#                                 'text': '[12, '
#                                         '0]'},
#  (51, 52, 53, 54, 55, 56): {'children': [],
#                             'end': 56,
#                             'parents': [(30,
#                                          31,
#                                          32,
#                                          33,
#                                          34,
#                                          35,
#                                          36,
#                                          37,
#                                          38,
#                                          39,
#                                          40,
#                                          41,
#                                          42,
#                                          43,
#                                          44,
#                                          45,
#                                          46,
#                                          47,
#                                          48,
#                                          49,
#                                          50,
#                                          51,
#                                          52,
#                                          53,
#                                          54,
#                                          55,
#                                          56,
#                                          57)],
#                             'size': 5,
#                             'start': 51,
#                             'text': '[6, '
#                                     '6]'},
#  (63, 64, 65, 66, 67, 68, 69, 70): {'children': [(64,
#                                                   65,
#                                                   66,
#                                                   67,
#                                                   68,
#                                                   69)],
#                                     'end': 70,
#                                     'parents': [],
#                                     'size': 7,
#                                     'start': 63,
#                                     'text': '[[1, '
#                                             '2]]'},
#  (64, 65, 66, 67, 68, 69): {'children': [],
#                             'end': 69,
#                             'parents': [(63,
#                                          64,
#                                          65,
#                                          66,
#                                          67,
#                                          68,
#                                          69,
#                                          70)],
#                             'size': 5,
#                             'start': 64,
#                             'text': '[1, '
#                                     '2]'},
#  (71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94): {'children': [(72,
#                                                                                                                   73,
#                                                                                                                   74,
#                                                                                                                   75,
#                                                                                                                   76,
#                                                                                                                   77,
#                                                                                                                   78,
#                                                                                                                   79,
#                                                                                                                   80),
#                                                                                                                  (88,
#                                                                                                                   89,
#                                                                                                                   90,
#                                                                                                                   91,
#                                                                                                                   92,
#                                                                                                                   93),
#                                                                                                                  (83,
#                                                                                                                   84,
#                                                                                                                   85)],
#                                                                                                     'end': 94,
#                                                                                                     'parents': [],
#                                                                                                     'size': 23,
#                                                                                                     'start': 71,
#                                                                                                     'text': '[[1, '
#                                                                                                             '2, '
#                                                                                                             '2], '
#                                                                                                             '[5], '
#                                                                                                             '[2, '
#                                                                                                             '3]]'},
#  (72, 73, 74, 75, 76, 77, 78, 79, 80): {'children': [],
#                                         'end': 80,
#                                         'parents': [(71,
#                                                      72,
#                                                      73,
#                                                      74,
#                                                      75,
#                                                      76,
#                                                      77,
#                                                      78,
#                                                      79,
#                                                      80,
#                                                      81,
#                                                      82,
#                                                      83,
#                                                      84,
#                                                      85,
#                                                      86,
#                                                      87,
#                                                      88,
#                                                      89,
#                                                      90,
#                                                      91,
#                                                      92,
#                                                      93,
#                                                      94)],
#                                         'size': 8,
#                                         'start': 72,
#                                         'text': '[1, '
#                                                 '2, '
#                                                 '2]'},
#  (83, 84, 85): {'children': [],
#                 'end': 85,
#                 'parents': [(71,
#                              72,
#                              73,
#                              74,
#                              75,
#                              76,
#                              77,
#                              78,
#                              79,
#                              80,
#                              81,
#                              82,
#                              83,
#                              84,
#                              85,
#                              86,
#                              87,
#                              88,
#                              89,
#                              90,
#                              91,
#                              92,
#                              93,
#                              94)],
#                 'size': 2,
#                 'start': 83,
#                 'text': '[5]'},
#  (88, 89, 90, 91, 92, 93): {'children': [],
#                             'end': 93,
#                             'parents': [(71,
#                                          72,
#                                          73,
#                                          74,
#                                          75,
#                                          76,
#                                          77,
#                                          78,
#                                          79,
#                                          80,
#                                          81,
#                                          82,
#                                          83,
#                                          84,
#                                          85,
#                                          86,
#                                          87,
#                                          88,
#                                          89,
#                                          90,
#                                          91,
#                                          92,
#                                          93,
#                                          94)],
#                             'size': 5,
#                             'start': 88,
#                             'text': '[2, '
#                                     '3]'},
#  (101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128): {'children': [(102,
#                                                                                                                                                               103,
#                                                                                                                                                               104,
#                                                                                                                                                               105,
#                                                                                                                                                               106,
#                                                                                                                                                               107,
#                                                                                                                                                               108,
#                                                                                                                                                               109,
#                                                                                                                                                               110),
#                                                                                                                                                              (113,
#                                                                                                                                                               114,
#                                                                                                                                                               115,
#                                                                                                                                                               116,
#                                                                                                                                                               117,
#                                                                                                                                                               118,
#                                                                                                                                                               119),
#                                                                                                                                                              (122,
#                                                                                                                                                               123,
#                                                                                                                                                               124,
#                                                                                                                                                               125,
#                                                                                                                                                               126,
#                                                                                                                                                               127)],
#                                                                                                                                                 'end': 128,
#                                                                                                                                                 'parents': [],
#                                                                                                                                                 'size': 27,
#                                                                                                                                                 'start': 101,
#                                                                                                                                                 'text': '[[4, '
#                                                                                                                                                         '4, '
#                                                                                                                                                         '4], '
#                                                                                                                                                         '[12, '
#                                                                                                                                                         '0], '
#                                                                                                                                                         '[6, '
#                                                                                                                                                         '6]]'},
#  (102, 103, 104, 105, 106, 107, 108, 109, 110): {'children': [],
#                                                  'end': 110,
#                                                  'parents': [(101,
#                                                               102,
#                                                               103,
#                                                               104,
#                                                               105,
#                                                               106,
#                                                               107,
#                                                               108,
#                                                               109,
#                                                               110,
#                                                               111,
#                                                               112,
#                                                               113,
#                                                               114,
#                                                               115,
#                                                               116,
#                                                               117,
#                                                               118,
#                                                               119,
#                                                               120,
#                                                               121,
#                                                               122,
#                                                               123,
#                                                               124,
#                                                               125,
#                                                               126,
#                                                               127,
#                                                               128)],
#                                                  'size': 8,
#                                                  'start': 102,
#                                                  'text': '[4, '
#                                                          '4, '
#                                                          '4]'},
#  (113, 114, 115, 116, 117, 118, 119): {'children': [],
#                                        'end': 119,
#                                        'parents': [(101,
#                                                     102,
#                                                     103,
#                                                     104,
#                                                     105,
#                                                     106,
#                                                     107,
#                                                     108,
#                                                     109,
#                                                     110,
#                                                     111,
#                                                     112,
#                                                     113,
#                                                     114,
#                                                     115,
#                                                     116,
#                                                     117,
#                                                     118,
#                                                     119,
#                                                     120,
#                                                     121,
#                                                     122,
#                                                     123,
#                                                     124,
#                                                     125,
#                                                     126,
#                                                     127,
#                                                     128)],
#                                        'size': 6,
#                                        'start': 113,
#                                        'text': '[12, '
#                                                '0]'},
#  (122, 123, 124, 125, 126, 127): {'children': [],
#                                   'end': 127,
#                                   'parents': [(101,
#                                                102,
#                                                103,
#                                                104,
#                                                105,
#                                                106,
#                                                107,
#                                                108,
#                                                109,
#                                                110,
#                                                111,
#                                                112,
#                                                113,
#                                                114,
#                                                115,
#                                                116,
#                                                117,
#                                                118,
#                                                119,
#                                                120,
#                                                121,
#                                                122,
#                                                123,
#                                                124,
#                                                125,
#                                                126,
#                                                127,
#                                                128)],
#                                   'size': 5,
#                                   'start': 122,
#                                   'text': '[6, '
#                                           '6]'},
#  (134, 135, 136, 137, 138, 139, 140, 141): {'children': [(135,
#                                                           136,
#                                                           137,
#                                                           138,
#                                                           139,
#                                                           140)],
#                                             'end': 141,
#                                             'parents': [],
#                                             'size': 7,
#                                             'start': 134,
#                                             'text': '[[1, '
#                                                     '2]]'},
#  (135, 136, 137, 138, 139, 140): {'children': [],
#                                   'end': 140,
#                                   'parents': [(134,
#                                                135,
#                                                136,
#                                                137,
#                                                138,
#                                                139,
#                                                140,
#                                                141)],
#                                   'size': 5,
#                                   'start': 135,
#                                   'text': '[1, '
#                                           '2]'}}
# r1-----------------------------------------------------------------
# {(6, 7, 8, 9, 10, 11, 12, 13, 14): {'children': [],
#                                     'end': 14,
#                                     'parents': [],
#                                     'size': 9,
#                                     'start': 6,
#                                     'text': '<p>a</p>'},
#  (14, 15, 16, 17, 18, 19, 20, 21, 22): {'children': [],
#                                         'end': 22,
#                                         'parents': [],
#                                         'size': 9,
#                                         'start': 14,
#                                         'text': '<p>a</p>'},
#  (22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89): {'children': [],
#                                                                                                                                                                                                                                                                                     'end': 89,
#                                                                                                                                                                                                                                                                                     'parents': [],
#                                                                                                                                                                                                                                                                                     'size': 68,
#                                                                                                                                                                                                                                                                                     'start': 22,
#                                                                                                                                                                                                                                                                                     'text': '<p>The '
#                                                                                                                                                                                                                                                                                             'HTML '
#                                                                                                                                                                                                                                                                                             '<code>button</code> '
#                                                                                                                                                                                                                                                                                             'tag '
#                                                                                                                                                                                                                                                                                             'defines '
#                                                                                                                                                                                                                                                                                             'a '
#                                                                                                                                                                                                                                                                                             'clickable '
#                                                                                                                                                                                                                                                                                             'button.</p>'},
#  (89, 90, 91, 92, 93, 94, 95, 96, 97): {'children': [],
#                                         'end': 97,
#                                         'parents': [],
#                                         'size': 9,
#                                         'start': 89,
#                                         'text': '<p>x</p>'},
#  (97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194): {'children': [],
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'end': 194,
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'parents': [],
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'size': 98,
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'start': 97,
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'text': '<p>The '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'CSS '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    '<code>background-color</code> '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'property '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'defines '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'the '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'background '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'color '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'of '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'an '
#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'element.</p>'}}
# r2-----------------------------------------------------------------
# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],
#                                                                               'end': 16,
#                                                                               'parents': [],
#                                                                               'size': 17,
#                                                                               'start': 0,
#                                                                               'text': '[1bla[2bla/2]/1]'}},
#  ('[1', '/2]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],
#                                                                   'end': 13,
#                                                                   'parents': [],
#                                                                   'size': 14,
#                                                                   'start': 0,
#                                                                   'text': '[1bla[2bla/2]'}},
#  ('[2', '/1]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],
#                                                                'end': 16,
#                                                                'parents': [],
#                                                                'size': 12,
#                                                                'start': 5,
#                                                                'text': '[2bla/2]/1]'}},
#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],
#                                                    'end': 13,
#                                                    'parents': [],
#                                                    'size': 9,
#                                                    'start': 5,
#                                                    'text': '[2bla/2]'}}}
# r3-----------------------------------------------------------------
# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],
#                                                                               'end': 16,
#                                                                               'parents': [],
#                                                                               'size': 17,
#                                                                               'start': 0,
#                                                                               'text': '[1bla[2bla/2]/1]'}},
#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],
#                                                    'end': 13,
#                                                    'parents': [],
#                                                    'size': 9,
#                                                    'start': 5,
#                                                    'text': '[2bla/2]'}}}
# r4-----------------------------------------------------------------
# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],
#                                                                               'end': 16,
#                                                                               'parents': [],
#                                                                               'size': 17,
#                                                                               'start': 0,
#                                                                               'text': '[1bla[2bla/2]/1]'}},
#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],
#                                                    'end': 13,
#                                                    'parents': [],
#                                                    'size': 9,
#                                                    'start': 5,
#                                                    'text': '[2bla/2]'}}}
```


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/parifinder",
    "name": "parifinder",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "webscraping,html,parsing",
    "author": "Johannes Fischer",
    "author_email": "aulasparticularesdealemaosp@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/51/b1/29731142c02192070af86a7c792d3eb7d2f5b90f6188a28df91d84b75a98/parifinder-0.10.tar.gz",
    "platform": null,
    "description": "\r\n# extracts structured data from text using user-defined delimiters (strings or regex)\r\n\r\n## Tested against Windows / Python 3.11 / Anaconda\r\n\r\n## pip install parifinder\r\n\r\nparifinder extracts structured data from text using user-defined delimiters (strings or regex), making it versatile for data processing.\r\n\r\n## Advantages\r\n\r\n### Flexibility: \r\n\r\nThe function can handle a wide range of scenarios, making it versatile for parsing text with various delimiters. It can handle both single and multiple pairs of delimiters, whether they are simple strings or complex regular expressions. This flexibility makes it suitable for different use cases.\r\n\r\n### Scalability: \r\n\r\nIt can parse multiple pairs of delimiters within a given text, which is especially useful when dealing with documents or data containing nested elements.\r\n\r\n### Pure Python: \r\n\r\nIt uses only Python's standard library\r\n\r\n\r\n```python\r\nfrom parifinder import parse_pairs\r\nfrom pprint import pprint\r\n\r\ntext_0 = \"\"\"[[1, 2, 2], [5], [2, 3]], 12: [[4, 4, 4], [12, 0], [6, 6]], 3: [[1, 2]][[1, 2, 2], [5], [2, 3]], 12: [[4, 4, 4], [12, 0], [6, 6]], 3: [[1, 2]]\"\"\"\r\ns1_0 = \"[\"\r\ns2_0 = \"]\"\r\nr0 = parse_pairs(string=text_0, s1=s1_0, s2=s2_0, str_regex=False)\r\nprint(\"r0-----------------------------------------------------------------\")\r\npprint(r0, indent=1, width=1)\r\n\r\ntext_1 = \"<body><p>a</p><p>a</p><p>The HTML <code>button</code> tag defines a clickable button.</p><p>x</p><p>The CSS <code>background-color</code> property defines the background color of an element.</p></body></html>\"\r\ns1_1 = \"<p>\"\r\ns2_1 = \"</p>\"\r\nr1 = parse_pairs(string=text_1, s1=s1_1, s2=s2_1, str_regex=False)\r\nprint(\"r1-----------------------------------------------------------------\")\r\npprint(r1, indent=1, width=1)\r\n\r\ntext_2 = \"[1bla[2bla/2]/1]\"\r\ns1_2 = r\"\\[\\d\"\r\ns2_2 = r\"/\\d]\"\r\nr2 = parse_pairs(string=text_2, s1=s1_2, s2=s2_2, str_regex=True)\r\nprint(\"r2-----------------------------------------------------------------\")\r\npprint(r2, indent=1, width=1)\r\n\r\ntext_3 = \"[1bla[2bla/2]/1]\"\r\ns1_3 = [(\"[1\", \"/1]\"), (\"[2\", \"/2]\")]\r\ns2_3 = None\r\nr3 = parse_pairs(string=text_3, s1=s1_3, s2=s2_3, str_regex=False)\r\nprint(\"r3-----------------------------------------------------------------\")\r\npprint(r3, indent=1, width=1)\r\n\r\ntext_4 = \"[1bla[2bla/2]/1]\"\r\ns1_4 = [\"[1\", \"[2\"]\r\ns2_4 = [\"/1]\", \"/2]\"]\r\nr4 = parse_pairs(string=text_4, s1=s1_4, s2=s2_4, str_regex=False)\r\nprint(\"r4-----------------------------------------------------------------\")\r\npprint(r4, indent=1, width=1)\r\n\r\n\r\n# r0-----------------------------------------------------------------\r\n# {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23): {'children': [(1,\r\n#                                                                                                         2,\r\n#                                                                                                         3,\r\n#                                                                                                         4,\r\n#                                                                                                         5,\r\n#                                                                                                         6,\r\n#                                                                                                         7,\r\n#                                                                                                         8,\r\n#                                                                                                         9),\r\n#                                                                                                        (17,\r\n#                                                                                                         18,\r\n#                                                                                                         19,\r\n#                                                                                                         20,\r\n#                                                                                                         21,\r\n#                                                                                                         22),\r\n#                                                                                                        (12,\r\n#                                                                                                         13,\r\n#                                                                                                         14)],\r\n#                                                                                           'end': 23,\r\n#                                                                                           'parents': [],\r\n#                                                                                           'size': 23,\r\n#                                                                                           'start': 0,\r\n#                                                                                           'text': '[[1, '\r\n#                                                                                                   '2, '\r\n#                                                                                                   '2], '\r\n#                                                                                                   '[5], '\r\n#                                                                                                   '[2, '\r\n#                                                                                                   '3]]'},\r\n#  (1, 2, 3, 4, 5, 6, 7, 8, 9): {'children': [],\r\n#                                'end': 9,\r\n#                                'parents': [(0,\r\n#                                             1,\r\n#                                             2,\r\n#                                             3,\r\n#                                             4,\r\n#                                             5,\r\n#                                             6,\r\n#                                             7,\r\n#                                             8,\r\n#                                             9,\r\n#                                             10,\r\n#                                             11,\r\n#                                             12,\r\n#                                             13,\r\n#                                             14,\r\n#                                             15,\r\n#                                             16,\r\n#                                             17,\r\n#                                             18,\r\n#                                             19,\r\n#                                             20,\r\n#                                             21,\r\n#                                             22,\r\n#                                             23)],\r\n#                                'size': 8,\r\n#                                'start': 1,\r\n#                                'text': '[1, '\r\n#                                        '2, '\r\n#                                        '2]'},\r\n#  (12, 13, 14): {'children': [],\r\n#                 'end': 14,\r\n#                 'parents': [(0,\r\n#                              1,\r\n#                              2,\r\n#                              3,\r\n#                              4,\r\n#                              5,\r\n#                              6,\r\n#                              7,\r\n#                              8,\r\n#                              9,\r\n#                              10,\r\n#                              11,\r\n#                              12,\r\n#                              13,\r\n#                              14,\r\n#                              15,\r\n#                              16,\r\n#                              17,\r\n#                              18,\r\n#                              19,\r\n#                              20,\r\n#                              21,\r\n#                              22,\r\n#                              23)],\r\n#                 'size': 2,\r\n#                 'start': 12,\r\n#                 'text': '[5]'},\r\n#  (17, 18, 19, 20, 21, 22): {'children': [],\r\n#                             'end': 22,\r\n#                             'parents': [(0,\r\n#                                          1,\r\n#                                          2,\r\n#                                          3,\r\n#                                          4,\r\n#                                          5,\r\n#                                          6,\r\n#                                          7,\r\n#                                          8,\r\n#                                          9,\r\n#                                          10,\r\n#                                          11,\r\n#                                          12,\r\n#                                          13,\r\n#                                          14,\r\n#                                          15,\r\n#                                          16,\r\n#                                          17,\r\n#                                          18,\r\n#                                          19,\r\n#                                          20,\r\n#                                          21,\r\n#                                          22,\r\n#                                          23)],\r\n#                             'size': 5,\r\n#                             'start': 17,\r\n#                             'text': '[2, '\r\n#                                     '3]'},\r\n#  (30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57): {'children': [(31,\r\n#                                                                                                                                   32,\r\n#                                                                                                                                   33,\r\n#                                                                                                                                   34,\r\n#                                                                                                                                   35,\r\n#                                                                                                                                   36,\r\n#                                                                                                                                   37,\r\n#                                                                                                                                   38,\r\n#                                                                                                                                   39),\r\n#                                                                                                                                  (42,\r\n#                                                                                                                                   43,\r\n#                                                                                                                                   44,\r\n#                                                                                                                                   45,\r\n#                                                                                                                                   46,\r\n#                                                                                                                                   47,\r\n#                                                                                                                                   48),\r\n#                                                                                                                                  (51,\r\n#                                                                                                                                   52,\r\n#                                                                                                                                   53,\r\n#                                                                                                                                   54,\r\n#                                                                                                                                   55,\r\n#                                                                                                                                   56)],\r\n#                                                                                                                     'end': 57,\r\n#                                                                                                                     'parents': [],\r\n#                                                                                                                     'size': 27,\r\n#                                                                                                                     'start': 30,\r\n#                                                                                                                     'text': '[[4, '\r\n#                                                                                                                             '4, '\r\n#                                                                                                                             '4], '\r\n#                                                                                                                             '[12, '\r\n#                                                                                                                             '0], '\r\n#                                                                                                                             '[6, '\r\n#                                                                                                                             '6]]'},\r\n#  (31, 32, 33, 34, 35, 36, 37, 38, 39): {'children': [],\r\n#                                         'end': 39,\r\n#                                         'parents': [(30,\r\n#                                                      31,\r\n#                                                      32,\r\n#                                                      33,\r\n#                                                      34,\r\n#                                                      35,\r\n#                                                      36,\r\n#                                                      37,\r\n#                                                      38,\r\n#                                                      39,\r\n#                                                      40,\r\n#                                                      41,\r\n#                                                      42,\r\n#                                                      43,\r\n#                                                      44,\r\n#                                                      45,\r\n#                                                      46,\r\n#                                                      47,\r\n#                                                      48,\r\n#                                                      49,\r\n#                                                      50,\r\n#                                                      51,\r\n#                                                      52,\r\n#                                                      53,\r\n#                                                      54,\r\n#                                                      55,\r\n#                                                      56,\r\n#                                                      57)],\r\n#                                         'size': 8,\r\n#                                         'start': 31,\r\n#                                         'text': '[4, '\r\n#                                                 '4, '\r\n#                                                 '4]'},\r\n#  (42, 43, 44, 45, 46, 47, 48): {'children': [],\r\n#                                 'end': 48,\r\n#                                 'parents': [(30,\r\n#                                              31,\r\n#                                              32,\r\n#                                              33,\r\n#                                              34,\r\n#                                              35,\r\n#                                              36,\r\n#                                              37,\r\n#                                              38,\r\n#                                              39,\r\n#                                              40,\r\n#                                              41,\r\n#                                              42,\r\n#                                              43,\r\n#                                              44,\r\n#                                              45,\r\n#                                              46,\r\n#                                              47,\r\n#                                              48,\r\n#                                              49,\r\n#                                              50,\r\n#                                              51,\r\n#                                              52,\r\n#                                              53,\r\n#                                              54,\r\n#                                              55,\r\n#                                              56,\r\n#                                              57)],\r\n#                                 'size': 6,\r\n#                                 'start': 42,\r\n#                                 'text': '[12, '\r\n#                                         '0]'},\r\n#  (51, 52, 53, 54, 55, 56): {'children': [],\r\n#                             'end': 56,\r\n#                             'parents': [(30,\r\n#                                          31,\r\n#                                          32,\r\n#                                          33,\r\n#                                          34,\r\n#                                          35,\r\n#                                          36,\r\n#                                          37,\r\n#                                          38,\r\n#                                          39,\r\n#                                          40,\r\n#                                          41,\r\n#                                          42,\r\n#                                          43,\r\n#                                          44,\r\n#                                          45,\r\n#                                          46,\r\n#                                          47,\r\n#                                          48,\r\n#                                          49,\r\n#                                          50,\r\n#                                          51,\r\n#                                          52,\r\n#                                          53,\r\n#                                          54,\r\n#                                          55,\r\n#                                          56,\r\n#                                          57)],\r\n#                             'size': 5,\r\n#                             'start': 51,\r\n#                             'text': '[6, '\r\n#                                     '6]'},\r\n#  (63, 64, 65, 66, 67, 68, 69, 70): {'children': [(64,\r\n#                                                   65,\r\n#                                                   66,\r\n#                                                   67,\r\n#                                                   68,\r\n#                                                   69)],\r\n#                                     'end': 70,\r\n#                                     'parents': [],\r\n#                                     'size': 7,\r\n#                                     'start': 63,\r\n#                                     'text': '[[1, '\r\n#                                             '2]]'},\r\n#  (64, 65, 66, 67, 68, 69): {'children': [],\r\n#                             'end': 69,\r\n#                             'parents': [(63,\r\n#                                          64,\r\n#                                          65,\r\n#                                          66,\r\n#                                          67,\r\n#                                          68,\r\n#                                          69,\r\n#                                          70)],\r\n#                             'size': 5,\r\n#                             'start': 64,\r\n#                             'text': '[1, '\r\n#                                     '2]'},\r\n#  (71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94): {'children': [(72,\r\n#                                                                                                                   73,\r\n#                                                                                                                   74,\r\n#                                                                                                                   75,\r\n#                                                                                                                   76,\r\n#                                                                                                                   77,\r\n#                                                                                                                   78,\r\n#                                                                                                                   79,\r\n#                                                                                                                   80),\r\n#                                                                                                                  (88,\r\n#                                                                                                                   89,\r\n#                                                                                                                   90,\r\n#                                                                                                                   91,\r\n#                                                                                                                   92,\r\n#                                                                                                                   93),\r\n#                                                                                                                  (83,\r\n#                                                                                                                   84,\r\n#                                                                                                                   85)],\r\n#                                                                                                     'end': 94,\r\n#                                                                                                     'parents': [],\r\n#                                                                                                     'size': 23,\r\n#                                                                                                     'start': 71,\r\n#                                                                                                     'text': '[[1, '\r\n#                                                                                                             '2, '\r\n#                                                                                                             '2], '\r\n#                                                                                                             '[5], '\r\n#                                                                                                             '[2, '\r\n#                                                                                                             '3]]'},\r\n#  (72, 73, 74, 75, 76, 77, 78, 79, 80): {'children': [],\r\n#                                         'end': 80,\r\n#                                         'parents': [(71,\r\n#                                                      72,\r\n#                                                      73,\r\n#                                                      74,\r\n#                                                      75,\r\n#                                                      76,\r\n#                                                      77,\r\n#                                                      78,\r\n#                                                      79,\r\n#                                                      80,\r\n#                                                      81,\r\n#                                                      82,\r\n#                                                      83,\r\n#                                                      84,\r\n#                                                      85,\r\n#                                                      86,\r\n#                                                      87,\r\n#                                                      88,\r\n#                                                      89,\r\n#                                                      90,\r\n#                                                      91,\r\n#                                                      92,\r\n#                                                      93,\r\n#                                                      94)],\r\n#                                         'size': 8,\r\n#                                         'start': 72,\r\n#                                         'text': '[1, '\r\n#                                                 '2, '\r\n#                                                 '2]'},\r\n#  (83, 84, 85): {'children': [],\r\n#                 'end': 85,\r\n#                 'parents': [(71,\r\n#                              72,\r\n#                              73,\r\n#                              74,\r\n#                              75,\r\n#                              76,\r\n#                              77,\r\n#                              78,\r\n#                              79,\r\n#                              80,\r\n#                              81,\r\n#                              82,\r\n#                              83,\r\n#                              84,\r\n#                              85,\r\n#                              86,\r\n#                              87,\r\n#                              88,\r\n#                              89,\r\n#                              90,\r\n#                              91,\r\n#                              92,\r\n#                              93,\r\n#                              94)],\r\n#                 'size': 2,\r\n#                 'start': 83,\r\n#                 'text': '[5]'},\r\n#  (88, 89, 90, 91, 92, 93): {'children': [],\r\n#                             'end': 93,\r\n#                             'parents': [(71,\r\n#                                          72,\r\n#                                          73,\r\n#                                          74,\r\n#                                          75,\r\n#                                          76,\r\n#                                          77,\r\n#                                          78,\r\n#                                          79,\r\n#                                          80,\r\n#                                          81,\r\n#                                          82,\r\n#                                          83,\r\n#                                          84,\r\n#                                          85,\r\n#                                          86,\r\n#                                          87,\r\n#                                          88,\r\n#                                          89,\r\n#                                          90,\r\n#                                          91,\r\n#                                          92,\r\n#                                          93,\r\n#                                          94)],\r\n#                             'size': 5,\r\n#                             'start': 88,\r\n#                             'text': '[2, '\r\n#                                     '3]'},\r\n#  (101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128): {'children': [(102,\r\n#                                                                                                                                                               103,\r\n#                                                                                                                                                               104,\r\n#                                                                                                                                                               105,\r\n#                                                                                                                                                               106,\r\n#                                                                                                                                                               107,\r\n#                                                                                                                                                               108,\r\n#                                                                                                                                                               109,\r\n#                                                                                                                                                               110),\r\n#                                                                                                                                                              (113,\r\n#                                                                                                                                                               114,\r\n#                                                                                                                                                               115,\r\n#                                                                                                                                                               116,\r\n#                                                                                                                                                               117,\r\n#                                                                                                                                                               118,\r\n#                                                                                                                                                               119),\r\n#                                                                                                                                                              (122,\r\n#                                                                                                                                                               123,\r\n#                                                                                                                                                               124,\r\n#                                                                                                                                                               125,\r\n#                                                                                                                                                               126,\r\n#                                                                                                                                                               127)],\r\n#                                                                                                                                                 'end': 128,\r\n#                                                                                                                                                 'parents': [],\r\n#                                                                                                                                                 'size': 27,\r\n#                                                                                                                                                 'start': 101,\r\n#                                                                                                                                                 'text': '[[4, '\r\n#                                                                                                                                                         '4, '\r\n#                                                                                                                                                         '4], '\r\n#                                                                                                                                                         '[12, '\r\n#                                                                                                                                                         '0], '\r\n#                                                                                                                                                         '[6, '\r\n#                                                                                                                                                         '6]]'},\r\n#  (102, 103, 104, 105, 106, 107, 108, 109, 110): {'children': [],\r\n#                                                  'end': 110,\r\n#                                                  'parents': [(101,\r\n#                                                               102,\r\n#                                                               103,\r\n#                                                               104,\r\n#                                                               105,\r\n#                                                               106,\r\n#                                                               107,\r\n#                                                               108,\r\n#                                                               109,\r\n#                                                               110,\r\n#                                                               111,\r\n#                                                               112,\r\n#                                                               113,\r\n#                                                               114,\r\n#                                                               115,\r\n#                                                               116,\r\n#                                                               117,\r\n#                                                               118,\r\n#                                                               119,\r\n#                                                               120,\r\n#                                                               121,\r\n#                                                               122,\r\n#                                                               123,\r\n#                                                               124,\r\n#                                                               125,\r\n#                                                               126,\r\n#                                                               127,\r\n#                                                               128)],\r\n#                                                  'size': 8,\r\n#                                                  'start': 102,\r\n#                                                  'text': '[4, '\r\n#                                                          '4, '\r\n#                                                          '4]'},\r\n#  (113, 114, 115, 116, 117, 118, 119): {'children': [],\r\n#                                        'end': 119,\r\n#                                        'parents': [(101,\r\n#                                                     102,\r\n#                                                     103,\r\n#                                                     104,\r\n#                                                     105,\r\n#                                                     106,\r\n#                                                     107,\r\n#                                                     108,\r\n#                                                     109,\r\n#                                                     110,\r\n#                                                     111,\r\n#                                                     112,\r\n#                                                     113,\r\n#                                                     114,\r\n#                                                     115,\r\n#                                                     116,\r\n#                                                     117,\r\n#                                                     118,\r\n#                                                     119,\r\n#                                                     120,\r\n#                                                     121,\r\n#                                                     122,\r\n#                                                     123,\r\n#                                                     124,\r\n#                                                     125,\r\n#                                                     126,\r\n#                                                     127,\r\n#                                                     128)],\r\n#                                        'size': 6,\r\n#                                        'start': 113,\r\n#                                        'text': '[12, '\r\n#                                                '0]'},\r\n#  (122, 123, 124, 125, 126, 127): {'children': [],\r\n#                                   'end': 127,\r\n#                                   'parents': [(101,\r\n#                                                102,\r\n#                                                103,\r\n#                                                104,\r\n#                                                105,\r\n#                                                106,\r\n#                                                107,\r\n#                                                108,\r\n#                                                109,\r\n#                                                110,\r\n#                                                111,\r\n#                                                112,\r\n#                                                113,\r\n#                                                114,\r\n#                                                115,\r\n#                                                116,\r\n#                                                117,\r\n#                                                118,\r\n#                                                119,\r\n#                                                120,\r\n#                                                121,\r\n#                                                122,\r\n#                                                123,\r\n#                                                124,\r\n#                                                125,\r\n#                                                126,\r\n#                                                127,\r\n#                                                128)],\r\n#                                   'size': 5,\r\n#                                   'start': 122,\r\n#                                   'text': '[6, '\r\n#                                           '6]'},\r\n#  (134, 135, 136, 137, 138, 139, 140, 141): {'children': [(135,\r\n#                                                           136,\r\n#                                                           137,\r\n#                                                           138,\r\n#                                                           139,\r\n#                                                           140)],\r\n#                                             'end': 141,\r\n#                                             'parents': [],\r\n#                                             'size': 7,\r\n#                                             'start': 134,\r\n#                                             'text': '[[1, '\r\n#                                                     '2]]'},\r\n#  (135, 136, 137, 138, 139, 140): {'children': [],\r\n#                                   'end': 140,\r\n#                                   'parents': [(134,\r\n#                                                135,\r\n#                                                136,\r\n#                                                137,\r\n#                                                138,\r\n#                                                139,\r\n#                                                140,\r\n#                                                141)],\r\n#                                   'size': 5,\r\n#                                   'start': 135,\r\n#                                   'text': '[1, '\r\n#                                           '2]'}}\r\n# r1-----------------------------------------------------------------\r\n# {(6, 7, 8, 9, 10, 11, 12, 13, 14): {'children': [],\r\n#                                     'end': 14,\r\n#                                     'parents': [],\r\n#                                     'size': 9,\r\n#                                     'start': 6,\r\n#                                     'text': '<p>a</p>'},\r\n#  (14, 15, 16, 17, 18, 19, 20, 21, 22): {'children': [],\r\n#                                         'end': 22,\r\n#                                         'parents': [],\r\n#                                         'size': 9,\r\n#                                         'start': 14,\r\n#                                         'text': '<p>a</p>'},\r\n#  (22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89): {'children': [],\r\n#                                                                                                                                                                                                                                                                                     'end': 89,\r\n#                                                                                                                                                                                                                                                                                     'parents': [],\r\n#                                                                                                                                                                                                                                                                                     'size': 68,\r\n#                                                                                                                                                                                                                                                                                     'start': 22,\r\n#                                                                                                                                                                                                                                                                                     'text': '<p>The '\r\n#                                                                                                                                                                                                                                                                                             'HTML '\r\n#                                                                                                                                                                                                                                                                                             '<code>button</code> '\r\n#                                                                                                                                                                                                                                                                                             'tag '\r\n#                                                                                                                                                                                                                                                                                             'defines '\r\n#                                                                                                                                                                                                                                                                                             'a '\r\n#                                                                                                                                                                                                                                                                                             'clickable '\r\n#                                                                                                                                                                                                                                                                                             'button.</p>'},\r\n#  (89, 90, 91, 92, 93, 94, 95, 96, 97): {'children': [],\r\n#                                         'end': 97,\r\n#                                         'parents': [],\r\n#                                         'size': 9,\r\n#                                         'start': 89,\r\n#                                         'text': '<p>x</p>'},\r\n#  (97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194): {'children': [],\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'end': 194,\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'parents': [],\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'size': 98,\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'start': 97,\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            'text': '<p>The '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'CSS '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    '<code>background-color</code> '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'property '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'defines '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'the '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'background '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'color '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'of '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'an '\r\n#                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    'element.</p>'}}\r\n# r2-----------------------------------------------------------------\r\n# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],\r\n#                                                                               'end': 16,\r\n#                                                                               'parents': [],\r\n#                                                                               'size': 17,\r\n#                                                                               'start': 0,\r\n#                                                                               'text': '[1bla[2bla/2]/1]'}},\r\n#  ('[1', '/2]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],\r\n#                                                                   'end': 13,\r\n#                                                                   'parents': [],\r\n#                                                                   'size': 14,\r\n#                                                                   'start': 0,\r\n#                                                                   'text': '[1bla[2bla/2]'}},\r\n#  ('[2', '/1]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],\r\n#                                                                'end': 16,\r\n#                                                                'parents': [],\r\n#                                                                'size': 12,\r\n#                                                                'start': 5,\r\n#                                                                'text': '[2bla/2]/1]'}},\r\n#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],\r\n#                                                    'end': 13,\r\n#                                                    'parents': [],\r\n#                                                    'size': 9,\r\n#                                                    'start': 5,\r\n#                                                    'text': '[2bla/2]'}}}\r\n# r3-----------------------------------------------------------------\r\n# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],\r\n#                                                                               'end': 16,\r\n#                                                                               'parents': [],\r\n#                                                                               'size': 17,\r\n#                                                                               'start': 0,\r\n#                                                                               'text': '[1bla[2bla/2]/1]'}},\r\n#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],\r\n#                                                    'end': 13,\r\n#                                                    'parents': [],\r\n#                                                    'size': 9,\r\n#                                                    'start': 5,\r\n#                                                    'text': '[2bla/2]'}}}\r\n# r4-----------------------------------------------------------------\r\n# {('[1', '/1]'): {(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16): {'children': [],\r\n#                                                                               'end': 16,\r\n#                                                                               'parents': [],\r\n#                                                                               'size': 17,\r\n#                                                                               'start': 0,\r\n#                                                                               'text': '[1bla[2bla/2]/1]'}},\r\n#  ('[2', '/2]'): {(5, 6, 7, 8, 9, 10, 11, 12, 13): {'children': [],\r\n#                                                    'end': 13,\r\n#                                                    'parents': [],\r\n#                                                    'size': 9,\r\n#                                                    'start': 5,\r\n#                                                    'text': '[2bla/2]'}}}\r\n```\r\n\r\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "extracts structured data from text using user-defined delimiters (strings or regex)",
    "version": "0.10",
    "project_urls": {
        "Homepage": "https://github.com/hansalemaos/parifinder"
    },
    "split_keywords": [
        "webscraping",
        "html",
        "parsing"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "9137989178c4f74ba41d23a6d99af5eabbaf1ddab92b6e8f9c29ad2485f950a4",
                "md5": "a70d380d498974b173caf0ff6506db38",
                "sha256": "8303552a9b79fa03f37b765dd3ba1f75e94faf5c18d1509e48b25a53904bfc9f"
            },
            "downloads": -1,
            "filename": "parifinder-0.10-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "a70d380d498974b173caf0ff6506db38",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 22270,
            "upload_time": "2023-10-14T22:38:12",
            "upload_time_iso_8601": "2023-10-14T22:38:12.549680Z",
            "url": "https://files.pythonhosted.org/packages/91/37/989178c4f74ba41d23a6d99af5eabbaf1ddab92b6e8f9c29ad2485f950a4/parifinder-0.10-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "51b129731142c02192070af86a7c792d3eb7d2f5b90f6188a28df91d84b75a98",
                "md5": "1e67f1daa3046ced3830d334fa44d3f0",
                "sha256": "1443ce07cef731f57e74c55744614fa1a52b6e1f8873f24be2ee1c82543f3906"
            },
            "downloads": -1,
            "filename": "parifinder-0.10.tar.gz",
            "has_sig": false,
            "md5_digest": "1e67f1daa3046ced3830d334fa44d3f0",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 29865,
            "upload_time": "2023-10-14T22:38:14",
            "upload_time_iso_8601": "2023-10-14T22:38:14.828496Z",
            "url": "https://files.pythonhosted.org/packages/51/b1/29731142c02192070af86a7c792d3eb7d2f5b90f6188a28df91d84b75a98/parifinder-0.10.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-10-14 22:38:14",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "hansalemaos",
    "github_project": "parifinder",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "parifinder"
}
        
Elapsed time: 0.27255s