# PYRipGREP
Use the insanely fast regex engine RIPGREP https://github.com/BurntSushi/ripgrep as a python module! Search results are converted directly to dict/numpy/pandas/generator
You can use the class ```PyRipGREP``` as you would use ripgrep, but you will get a string as result. Because of that, I created the class ```RePatterns ``` where your results are directly converted to dict/numpy/pandas/generator
```python
#Install
pip install PyRipGrep
```
Check it out:
You'll find the files xaa.txt / xab.txt here: https://github.com/hansalemaos/PYRipGREP/tree/main/textfilesfortests
Make sure to have rg.exe in your path or pass the path when you create the instance:
```python
RePatterns(executeable=r"c:\path\rg.exe")
```
```python
outputtype = "np"
suchennach = ["weniger", "mehr"]
filetosearch = [
r"F:\woerterbuecher\wtxt\xaa.txt", # download here: https://github.com/hansalemaos/PYRipGREP/blob/main/textfilesfortests/xaa.txt
r"F:\woerterbuecher\wtxt\xab.txt", #download here: https://github.com/hansalemaos/PYRipGREP/blob/main/textfilesfortests/xab.txt
]
np_or_df = "np"
binary = True
dfa_size = "1G" # Yes, I have a lot of RAM hahaha
ignore_case = True
df = RePatterns(executeable=r"rg.exe").find_all_in_files(
re_expression=suchennach,
path_to_search=filetosearch,
outputtype=outputtype,
binary=binary,
dfa_size=dfa_size,
ignore_case=ignore_case,
)
print(f"{df=}")
suchennach = ["sein"]
df2 = RePatterns().find_all_in_files(
re_expression=suchennach,
path_to_search=filetosearch,
outputtype=outputtype,
binary=binary,
dfa_size=dfa_size,
ignore_case=ignore_case,
)
print(f"{df2=}")
df3 = RePatterns().find_all_in_files(
re_expression=[r"Buch"],
path_to_search=filetosearch,
outputtype=outputtype,
binary=False,
dfa_size=dfa_size,
ignore_case=ignore_case,
)
print(f"{df3=}")
dateistrings = [
"Das ist ein neues\nHaus Maus Buch",
"Was kostet das neue Buch?\nBuch Haus Maus",
]
df4 = RePatterns().find_all_in_var_json(
re_expression=[r"Buch", "Haus"],
variable=dateistrings[0],
outputtype=outputtype,
binary=True,
ignore_case=True,
)
print(f"{df4=}")
df5 = RePatterns().find_all_in_var(
re_expression=["mein", r"Buch"],
variable="Das ist mein Buch. Wo hast du das Buch gekauft?",
outputtype=outputtype,
binary=False,
dfa_size=dfa_size,
ignore_case=ignore_case,
)
print(f"{df5=}")
df6 = RePatterns().sub_in_files(
re_expression=[r"Buch", "Haus"],
repl="Auto",
path_to_search=filetosearch,
outputtype=outputtype,
binary=False,
dfa_size=dfa_size,
ignore_case=ignore_case,
)
print(f"{df6=}")
df7 = RePatterns().find_all_in_files_json(
re_expression=[r"Buch", "Haus"],
search_in=filetosearch,
outputtype=outputtype,
binary=True,
ignore_case=True,
)
print(f"{df7=}")
df8 = RePatterns().find_all_in_files_json(
re_expression=[r"Buch", "Haus"],
search_in=r"F:\nur_df",
outputtype=outputtype,
binary=True,
ignore_case=True,
)
print(f"{df8=}")
text = r"""Guy Reffitt, der am 6. Januar am Sturm aufs US-Kapitol teilnahm, muss für sieben Jahre ins Gefängnis. Der stern hat seine Familie anderthalb Jahre lang begleitet – bis zum Urteil gestern in Washington. Über einen Tag vor Gericht, der Amerikas ganze Verlorenheit offenbart.
Am Ende ist es eine 18 Jahre junge Frau aus Texas, gerade mit der High School fertig, die den Satz des Tages sagt: "Wenn mein Vater so lange ins Gefängnis muss", sagt sie, "dann verdient Trump lebenslang."
Es ist Peyton Reffitt, die Tochter eines Mannes, der am 6. Januar 2021 am Sturm aufs Kapitol teilnahm. Der stern hat die ganze Familie, die nicht mehr ganz ist, seitdem begleitet. Gestern wurde Peytons Vater, Guy Reffitt, in Washington zu über sieben Jahren Haft verurteilt. Bei niemandem sonst, der am 6. Januar dabei war, fiel das Urteil bisher so hoch aus."""
df9 = RePatterns().find_all_in_files(
re_expression=r"\d+\s+\w{5}",
path_to_search=filetosearch[0],
outputtype=outputtype,
)
print(f"{df9=}")
df10 = RePatterns().find_all_in_files(
re_expression=r"\d+\s+\w{5}",
path_to_search=r"F:\nur_df",
outputtype=outputtype,
)
print(f"{df10=}")
df11 = RePatterns().sub_in_files(
re_expression=r"\d+\s+(\w{5})",
repl="$1",
path_to_search=r"F:\nur_df",
outputtype=outputtype,
)
print(f"{df11=}")
df12 = RePatterns().find_all_in_var(
re_expression=r"\d+\.?\s+\w{5}", variable=text, outputtype=outputtype
)
print(f"{df12=}")
df13 = RePatterns().sub_all_in_var(
re_expression=r"\d+\.?\s+(\w{5})",
repl="dudu $1",
variable=text,
outputtype=outputtype,
)
print(f"{df13=}")
df14 = RePatterns().find_all_in_var_json(
re_expression=r"\d+\.?\s+(\w{5})[.?!]", variable=text, outputtype=outputtype
)
print(f"{df14=}")
suchennach = ["Sein"]
dfxx = RePatterns().find_all_in_files(
re_expression=r"\w\w[ener]\b",
path_to_search=filetosearch[1],
outputtype="df",
binary=True,
dfa_size="1G",
ignore_case=True,
)
print(f"{dfxx=}")
```
Output:
```python
df=array([['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '15243', '15242',
'Mehr'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '22162', '22161',
'mehr'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '26981', '26980',
'mehr'],
...,
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52397917', '52397916',
'mehr'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52403287', '52403286',
'mehr'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52404523', '52404522',
'mehr']], dtype='<U30')
df2=array([['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '4966', '4965', 'sein'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '5021', '5020', 'sein'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '7164', '7163', 'Sein'],
...,
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52415836', '52415835',
'sein'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52420887', '52420886',
'sein'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52422346', '52422345',
'Sein']], dtype='<U30')
df3=array([['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '1051', '1050', 'buch'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '28055', '28054',
'buch'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '31815', '31814',
'Buch'],
...,
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52302767', '52302766',
'buch'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52322927', '52322926',
'Buch'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52323198', '52323197',
'Buch']], dtype='<U30')
df4=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129C8820>
df5=array([['<stdin>', '1', '9', '8', 'mein'],
['<stdin>', '1', '14', '13', 'Buch'],
['<stdin>', '1', '35', '34', 'Buch']], dtype='<U7')
df6=array([['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '515', '514', 'Auto'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '543', '542', 'Auto'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '3358', '3357', 'Auto'],
...,
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '52423818', '52423817',
'Auto'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '52426297', '52426296',
'Auto'],
['F:\\woerterbuecher\\wtxt\\xab.txt', '1', '52426444', '52426443',
'Auto']], dtype='<U30')
df7=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129B4DD0>
df8=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129E8890>
df9=array([['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '493', '492',
'1904 verfa'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '840', '839',
'1925 übern'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '890', '889',
'1935 schuf'],
...,
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52428295', '52428294',
'2001 Bürge'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52428359', '52428358',
'1991 Bürge'],
['F:\\woerterbuecher\\wtxt\\xaa.txt', '1', '52428418', '52428417',
'1979 Bürge']], dtype='<U30')
df10=array([['F:\\nur_df', '1', '205', '204', '30 Kilom'],
['F:\\nur_df', '1', '245', '244', '30 Kilom'],
['F:\\nur_df', '1', '292', '291', '60 Kilom'],
...,
['F:\\nur_df', '1', '2016132', '2016131', '75 Jahre'],
['F:\\nur_df', '1', '2016203', '2016202', '2005 emeri'],
['F:\\nur_df', '1', '2017110', '2017109', '85 Jahre']],
dtype='<U14')
df11=array([['F:\\nur_df', '1', '205', '204', 'Kilom'],
['F:\\nur_df', '1', '242', '241', 'Kilom'],
['F:\\nur_df', '1', '286', '285', 'Kilom'],
...,
['F:\\nur_df', '1', '2111612', '2111611', 'Carlo'],
['F:\\nur_df', '1', '2111911', '2111910', 'gelan'],
['F:\\nur_df', '1', '2113124', '2113123', 'verfü']], dtype='<U9')
df12=array([['<stdin>', '1', '21', '20', '6. Janua'],
['<stdin>', '1', '303', '302', '18 Jahre'],
['<stdin>', '1', '551', '550', '6. Janua'],
['<stdin>', '1', '799', '798', '6. Janua']], dtype='<U8')
df13=array([['<stdin>', '1', '21', '20', 'dudu Janua'],
['<stdin>', '1', '305', '304', 'dudu Jahre'],
['<stdin>', '1', '555', '554', 'dudu Janua'],
['<stdin>', '1', '805', '804', 'dudu Janua']], dtype='<U10')
df14=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129E8E40>
dfxx= aa_filename aa_line ... aa_byte_offset_o aa_string
0 F:\woerterbuecher\wtxt\xab.txt 1 ... 10 von
1 F:\woerterbuecher\wtxt\xab.txt 1 ... 33 tin
2 F:\woerterbuecher\wtxt\xab.txt 1 ... 46 ber
3 F:\woerterbuecher\wtxt\xab.txt 1 ... 78 ber
4 F:\woerterbuecher\wtxt\xab.txt 1 ... 85 ton
... ... ... ... ...
3035300 F:\woerterbuecher\wtxt\xab.txt 1 ... 52428744 che
3035301 F:\woerterbuecher\wtxt\xab.txt 1 ... 52428756 che
3035302 F:\woerterbuecher\wtxt\xab.txt 1 ... 52428775 rde
3035303 F:\woerterbuecher\wtxt\xab.txt 1 ... 52428782 der
3035304 F:\woerterbuecher\wtxt\xab.txt 1 ... 52428790 ten
[3035305 rows x 5 columns]
```
This is how you can use the class PyRipGREP directly (output as string!):
```python
dfa_size: str = "1G",
regexstart = PyRipGREP()
search_for = _to_list(re_expression)
for suche in search_for:
regexstart.regexp(option=suche, activated=True, multi_allowed=True)
(
regexstart
.binary(activated=True)
.byte_offset(activated=True)
.context_separator(option=" ")
.dfa_size_limit(option=dfa_size)
.field_match_separator(option= "ÇÇ")
.ignore_case(activated=True)
.null_data(activated=True)
.line_number(activated=True)
.no_ignore(activated=True)
.multiline(activated=True)
.multiline_dotall(activated=True)
.block_buffered(activated=True)
.crlf(activated=True)
.no_config(activated=True)
.only_matching(activated=True)
.trim(activated=True)
.vimgrep(activated=True)
.with_filename(activated=True)
.add_target_file_or_folder('c:\\whatever.txt')
)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/PYRipGREP",
"name": "PYRipGREP",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "regex,ripgrep,grep,re,regular,expressions",
"author": "Johannes Fischer",
"author_email": "<aulasparticularesdealemaosp@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/d1/1c/22f9a58b1fac2f5a00f2175348ae0a9b8d7b9536b17f35ccefa54c1ec924/PYRipGREP-0.21.tar.gz",
"platform": null,
"description": "\n# PYRipGREP\n\n\n\nUse the insanely fast regex engine RIPGREP https://github.com/BurntSushi/ripgrep as a python module! Search results are converted directly to dict/numpy/pandas/generator\n\n\n\nYou can use the class ```PyRipGREP``` as you would use ripgrep, but you will get a string as result. Because of that, I created the class ```RePatterns ``` where your results are directly converted to dict/numpy/pandas/generator\n\n\n\n```python\n\n#Install\n\npip install PyRipGrep\n\n```\n\n\n\nCheck it out:\n\nYou'll find the files xaa.txt / xab.txt here: https://github.com/hansalemaos/PYRipGREP/tree/main/textfilesfortests\n\nMake sure to have rg.exe in your path or pass the path when you create the instance: \n\n\n\n```python\n\nRePatterns(executeable=r\"c:\\path\\rg.exe\")\n\n```\n\n\n\n```python\n\n outputtype = \"np\"\n\n\n\n suchennach = [\"weniger\", \"mehr\"]\n\n\n\n filetosearch = [\n\n r\"F:\\woerterbuecher\\wtxt\\xaa.txt\", # download here: https://github.com/hansalemaos/PYRipGREP/blob/main/textfilesfortests/xaa.txt\n\n r\"F:\\woerterbuecher\\wtxt\\xab.txt\", #download here: https://github.com/hansalemaos/PYRipGREP/blob/main/textfilesfortests/xab.txt\n\n ]\n\n np_or_df = \"np\"\n\n binary = True\n\n dfa_size = \"1G\" # Yes, I have a lot of RAM hahaha\n\n ignore_case = True\n\n\n\n df = RePatterns(executeable=r\"rg.exe\").find_all_in_files(\n\n re_expression=suchennach,\n\n path_to_search=filetosearch,\n\n outputtype=outputtype,\n\n binary=binary,\n\n dfa_size=dfa_size,\n\n ignore_case=ignore_case,\n\n )\n\n print(f\"{df=}\")\n\n\n\n suchennach = [\"sein\"]\n\n\n\n df2 = RePatterns().find_all_in_files(\n\n re_expression=suchennach,\n\n path_to_search=filetosearch,\n\n outputtype=outputtype,\n\n binary=binary,\n\n dfa_size=dfa_size,\n\n ignore_case=ignore_case,\n\n )\n\n print(f\"{df2=}\")\n\n\n\n df3 = RePatterns().find_all_in_files(\n\n re_expression=[r\"Buch\"],\n\n path_to_search=filetosearch,\n\n outputtype=outputtype,\n\n binary=False,\n\n dfa_size=dfa_size,\n\n ignore_case=ignore_case,\n\n )\n\n print(f\"{df3=}\")\n\n\n\n dateistrings = [\n\n \"Das ist ein neues\\nHaus Maus Buch\",\n\n \"Was kostet das neue Buch?\\nBuch Haus Maus\",\n\n ]\n\n df4 = RePatterns().find_all_in_var_json(\n\n re_expression=[r\"Buch\", \"Haus\"],\n\n variable=dateistrings[0],\n\n outputtype=outputtype,\n\n binary=True,\n\n ignore_case=True,\n\n )\n\n print(f\"{df4=}\")\n\n\n\n df5 = RePatterns().find_all_in_var(\n\n re_expression=[\"mein\", r\"Buch\"],\n\n variable=\"Das ist mein Buch. Wo hast du das Buch gekauft?\",\n\n outputtype=outputtype,\n\n binary=False,\n\n dfa_size=dfa_size,\n\n ignore_case=ignore_case,\n\n )\n\n print(f\"{df5=}\")\n\n\n\n df6 = RePatterns().sub_in_files(\n\n re_expression=[r\"Buch\", \"Haus\"],\n\n repl=\"Auto\",\n\n path_to_search=filetosearch,\n\n outputtype=outputtype,\n\n binary=False,\n\n dfa_size=dfa_size,\n\n ignore_case=ignore_case,\n\n )\n\n print(f\"{df6=}\")\n\n\n\n df7 = RePatterns().find_all_in_files_json(\n\n re_expression=[r\"Buch\", \"Haus\"],\n\n search_in=filetosearch,\n\n outputtype=outputtype,\n\n binary=True,\n\n ignore_case=True,\n\n )\n\n print(f\"{df7=}\")\n\n\n\n df8 = RePatterns().find_all_in_files_json(\n\n re_expression=[r\"Buch\", \"Haus\"],\n\n search_in=r\"F:\\nur_df\",\n\n outputtype=outputtype,\n\n binary=True,\n\n ignore_case=True,\n\n )\n\n print(f\"{df8=}\")\n\n\n\n text = r\"\"\"Guy Reffitt, der am 6. Januar am Sturm aufs US-Kapitol teilnahm, muss f\u00fcr sieben Jahre ins Gef\u00e4ngnis. Der stern hat seine Familie anderthalb Jahre lang begleitet \u2013 bis zum Urteil gestern in Washington. \u00dcber einen Tag vor Gericht, der Amerikas ganze Verlorenheit offenbart.\n\n Am Ende ist es eine 18 Jahre junge Frau aus Texas, gerade mit der High School fertig, die den Satz des Tages sagt: \"Wenn mein Vater so lange ins Gef\u00e4ngnis muss\", sagt sie, \"dann verdient Trump lebenslang.\"\n\n\n\n Es ist Peyton Reffitt, die Tochter eines Mannes, der am 6. Januar 2021 am Sturm aufs Kapitol teilnahm. Der stern hat die ganze Familie, die nicht mehr ganz ist, seitdem begleitet. Gestern wurde Peytons Vater, Guy Reffitt, in Washington zu \u00fcber sieben Jahren Haft verurteilt. Bei niemandem sonst, der am 6. Januar dabei war, fiel das Urteil bisher so hoch aus.\"\"\"\n\n\n\n df9 = RePatterns().find_all_in_files(\n\n re_expression=r\"\\d+\\s+\\w{5}\",\n\n path_to_search=filetosearch[0],\n\n outputtype=outputtype,\n\n )\n\n print(f\"{df9=}\")\n\n df10 = RePatterns().find_all_in_files(\n\n re_expression=r\"\\d+\\s+\\w{5}\",\n\n path_to_search=r\"F:\\nur_df\",\n\n outputtype=outputtype,\n\n )\n\n print(f\"{df10=}\")\n\n df11 = RePatterns().sub_in_files(\n\n re_expression=r\"\\d+\\s+(\\w{5})\",\n\n repl=\"$1\",\n\n path_to_search=r\"F:\\nur_df\",\n\n outputtype=outputtype,\n\n )\n\n print(f\"{df11=}\")\n\n df12 = RePatterns().find_all_in_var(\n\n re_expression=r\"\\d+\\.?\\s+\\w{5}\", variable=text, outputtype=outputtype\n\n )\n\n print(f\"{df12=}\")\n\n df13 = RePatterns().sub_all_in_var(\n\n re_expression=r\"\\d+\\.?\\s+(\\w{5})\",\n\n repl=\"dudu $1\",\n\n variable=text,\n\n outputtype=outputtype,\n\n )\n\n print(f\"{df13=}\")\n\n df14 = RePatterns().find_all_in_var_json(\n\n re_expression=r\"\\d+\\.?\\s+(\\w{5})[.?!]\", variable=text, outputtype=outputtype\n\n )\n\n print(f\"{df14=}\")\n\n\n\n suchennach = [\"Sein\"]\n\n\n\n dfxx = RePatterns().find_all_in_files(\n\n re_expression=r\"\\w\\w[ener]\\b\",\n\n path_to_search=filetosearch[1],\n\n outputtype=\"df\",\n\n binary=True,\n\n dfa_size=\"1G\",\n\n ignore_case=True,\n\n )\n\n print(f\"{dfxx=}\")\n\n```\n\n\n\nOutput: \n\n\n\n```python\n\n df=array([['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '15243', '15242',\n\n 'Mehr'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '22162', '22161',\n\n 'mehr'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '26981', '26980',\n\n 'mehr'],\n\n ...,\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52397917', '52397916',\n\n 'mehr'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52403287', '52403286',\n\n 'mehr'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52404523', '52404522',\n\n 'mehr']], dtype='<U30')\n\ndf2=array([['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '4966', '4965', 'sein'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '5021', '5020', 'sein'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '7164', '7163', 'Sein'],\n\n ...,\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52415836', '52415835',\n\n 'sein'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52420887', '52420886',\n\n 'sein'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52422346', '52422345',\n\n 'Sein']], dtype='<U30')\n\ndf3=array([['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '1051', '1050', 'buch'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '28055', '28054',\n\n 'buch'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '31815', '31814',\n\n 'Buch'],\n\n ...,\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52302767', '52302766',\n\n 'buch'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52322927', '52322926',\n\n 'Buch'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52323198', '52323197',\n\n 'Buch']], dtype='<U30')\n\ndf4=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129C8820>\n\ndf5=array([['<stdin>', '1', '9', '8', 'mein'],\n\n ['<stdin>', '1', '14', '13', 'Buch'],\n\n ['<stdin>', '1', '35', '34', 'Buch']], dtype='<U7')\n\ndf6=array([['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '515', '514', 'Auto'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '543', '542', 'Auto'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '3358', '3357', 'Auto'],\n\n ...,\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '52423818', '52423817',\n\n 'Auto'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '52426297', '52426296',\n\n 'Auto'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xab.txt', '1', '52426444', '52426443',\n\n 'Auto']], dtype='<U30')\n\ndf7=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129B4DD0>\n\ndf8=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129E8890>\n\ndf9=array([['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '493', '492',\n\n '1904 verfa'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '840', '839',\n\n '1925 \u00fcbern'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '890', '889',\n\n '1935 schuf'],\n\n ...,\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52428295', '52428294',\n\n '2001 B\u00fcrge'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52428359', '52428358',\n\n '1991 B\u00fcrge'],\n\n ['F:\\\\woerterbuecher\\\\wtxt\\\\xaa.txt', '1', '52428418', '52428417',\n\n '1979 B\u00fcrge']], dtype='<U30')\n\ndf10=array([['F:\\\\nur_df', '1', '205', '204', '30 Kilom'],\n\n ['F:\\\\nur_df', '1', '245', '244', '30 Kilom'],\n\n ['F:\\\\nur_df', '1', '292', '291', '60 Kilom'],\n\n ...,\n\n ['F:\\\\nur_df', '1', '2016132', '2016131', '75 Jahre'],\n\n ['F:\\\\nur_df', '1', '2016203', '2016202', '2005 emeri'],\n\n ['F:\\\\nur_df', '1', '2017110', '2017109', '85 Jahre']],\n\n dtype='<U14')\n\ndf11=array([['F:\\\\nur_df', '1', '205', '204', 'Kilom'],\n\n ['F:\\\\nur_df', '1', '242', '241', 'Kilom'],\n\n ['F:\\\\nur_df', '1', '286', '285', 'Kilom'],\n\n ...,\n\n ['F:\\\\nur_df', '1', '2111612', '2111611', 'Carlo'],\n\n ['F:\\\\nur_df', '1', '2111911', '2111910', 'gelan'],\n\n ['F:\\\\nur_df', '1', '2113124', '2113123', 'verf\u00fc']], dtype='<U9')\n\ndf12=array([['<stdin>', '1', '21', '20', '6. Janua'],\n\n ['<stdin>', '1', '303', '302', '18 Jahre'],\n\n ['<stdin>', '1', '551', '550', '6. Janua'],\n\n ['<stdin>', '1', '799', '798', '6. Janua']], dtype='<U8')\n\ndf13=array([['<stdin>', '1', '21', '20', 'dudu Janua'],\n\n ['<stdin>', '1', '305', '304', 'dudu Jahre'],\n\n ['<stdin>', '1', '555', '554', 'dudu Janua'],\n\n ['<stdin>', '1', '805', '804', 'dudu Janua']], dtype='<U10')\n\ndf14=<generator object RePatterns._generator_json.<locals>.<genexpr> at 0x00000000129E8E40>\n\ndfxx= aa_filename aa_line ... aa_byte_offset_o aa_string\n\n0 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 10 von\n\n1 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 33 tin\n\n2 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 46 ber\n\n3 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 78 ber\n\n4 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 85 ton\n\n ... ... ... ... ...\n\n3035300 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 52428744 che\n\n3035301 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 52428756 che\n\n3035302 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 52428775 rde\n\n3035303 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 52428782 der\n\n3035304 F:\\woerterbuecher\\wtxt\\xab.txt 1 ... 52428790 ten\n\n[3035305 rows x 5 columns]\n\n```\n\n\n\nThis is how you can use the class PyRipGREP directly (output as string!):\n\n\n\n```python\n\n dfa_size: str = \"1G\",\n\n regexstart = PyRipGREP()\n\n search_for = _to_list(re_expression)\n\n for suche in search_for:\n\n regexstart.regexp(option=suche, activated=True, multi_allowed=True)\n\n\n\n (\n\n regexstart\n\n .binary(activated=True)\n\n .byte_offset(activated=True) \n\n .context_separator(option=\" \")\n\n .dfa_size_limit(option=dfa_size)\n\n .field_match_separator(option= \"\u00c7\u00c7\") \n\n .ignore_case(activated=True)\n\n .null_data(activated=True)\n\n .line_number(activated=True)\n\n .no_ignore(activated=True)\n\n .multiline(activated=True)\n\n .multiline_dotall(activated=True)\n\n .block_buffered(activated=True)\n\n .crlf(activated=True)\n\n .no_config(activated=True)\n\n .only_matching(activated=True)\n\n .trim(activated=True)\n\n .vimgrep(activated=True)\n\n .with_filename(activated=True)\n\n .add_target_file_or_folder('c:\\\\whatever.txt')\n\n )\n\n```\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Use insanely fast regex engine RIPGREP as a python module! Search results are captured and converted to dict/numpy/pandas/generator",
"version": "0.21",
"split_keywords": [
"regex",
"ripgrep",
"grep",
"re",
"regular",
"expressions"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "7118a72e3c3327e5c4f801d6595e18bd",
"sha256": "b24f066f6dd57a9a0caf80b29708ad29d6b1ec765b7096ebf74eaef03fe207f8"
},
"downloads": -1,
"filename": "PYRipGREP-0.21-py3-none-any.whl",
"has_sig": false,
"md5_digest": "7118a72e3c3327e5c4f801d6595e18bd",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 74942,
"upload_time": "2022-10-02T05:27:34",
"upload_time_iso_8601": "2022-10-02T05:27:34.338477Z",
"url": "https://files.pythonhosted.org/packages/e9/02/c61c919bcf292dff251ba296b63e9d5a36fa7e81d36e4735bf29d8d712d5/PYRipGREP-0.21-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "970bfc3608e6580eae80d028717714b9",
"sha256": "0830cc1c5a22330c8f24070c8e561d5450e3211059fbed9c9504de0729cac606"
},
"downloads": -1,
"filename": "PYRipGREP-0.21.tar.gz",
"has_sig": false,
"md5_digest": "970bfc3608e6580eae80d028717714b9",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 75465,
"upload_time": "2022-10-02T05:27:36",
"upload_time_iso_8601": "2022-10-02T05:27:36.127227Z",
"url": "https://files.pythonhosted.org/packages/d1/1c/22f9a58b1fac2f5a00f2175348ae0a9b8d7b9536b17f35ccefa54c1ec924/PYRipGREP-0.21.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-10-02 05:27:36",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "hansalemaos",
"github_project": "PYRipGREP",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "pyripgrep"
}