# A function to get the difference between two files and return a pandas DataFrame
## pip install textcompari
### Tested against Windows 10 / Python 3.11 / Anaconda
### Important!
The module will be compiled when you import it for the first time. Cython and a C/C++ compiler must be installed!
```python
from rapidfuzz import fuzz
from textcompari import get_file_diff
"""
A function to get the difference between two files and return a pandas DataFrame.
:param afile: A file to compare (str, bytes, tuple, list, np.ndarray)
:param bfile: Another file to compare (str, bytes, tuple, list, np.ndarray)
:param window_shifts: The number of shifts for the window (default 5)
:param min_fuzz_match: The minimum fuzzy match score (default 80)
:param fuzz_scorer: The fuzzy scorer function (default fuzz.WRatio)
:param cpus: The number of CPUs to use (default 5)
:return: A pandas DataFrame containing the difference between the files
"""
afile = r"C:\Users\hansc\Downloads\difffindertest\test1_1.txt"
bfile = r"C:\Users\hansc\Downloads\difffindertest\test1_2.txt"
df = get_file_diff(
afile, bfile, window_shifts=300, min_fuzz_match=99, fuzz_scorer=fuzz.WRatio, cpus=5
)
print(df)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/textcompari",
"name": "textcompari",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "diff,files,pandas,DataFrame",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/39/05/ebe2fa07f6e4d7c39330c7c9a91c6a91f7e3b981343860fd17ce89acbaa1/textcompari-0.10.tar.gz",
"platform": null,
"description": "\r\n# A function to get the difference between two files and return a pandas DataFrame\r\n\r\n## pip install textcompari\r\n\r\n### Tested against Windows 10 / Python 3.11 / Anaconda \r\n\r\n### Important!\r\nThe module will be compiled when you import it for the first time. Cython and a C/C++ compiler must be installed!\r\n\r\n```python\r\nfrom rapidfuzz import fuzz\r\nfrom textcompari import get_file_diff\r\n\r\n\"\"\"\r\nA function to get the difference between two files and return a pandas DataFrame.\r\n\r\n:param afile: A file to compare (str, bytes, tuple, list, np.ndarray)\r\n:param bfile: Another file to compare (str, bytes, tuple, list, np.ndarray)\r\n:param window_shifts: The number of shifts for the window (default 5)\r\n:param min_fuzz_match: The minimum fuzzy match score (default 80)\r\n:param fuzz_scorer: The fuzzy scorer function (default fuzz.WRatio)\r\n:param cpus: The number of CPUs to use (default 5)\r\n:return: A pandas DataFrame containing the difference between the files\r\n\"\"\"\r\n\r\n\r\nafile = r\"C:\\Users\\hansc\\Downloads\\difffindertest\\test1_1.txt\"\r\nbfile = r\"C:\\Users\\hansc\\Downloads\\difffindertest\\test1_2.txt\"\r\n\r\ndf = get_file_diff(\r\n afile, bfile, window_shifts=300, min_fuzz_match=99, fuzz_scorer=fuzz.WRatio, cpus=5\r\n)\r\nprint(df)\r\n\r\n\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "A function to get the difference between two files and return a pandas DataFrame",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/textcompari"
},
"split_keywords": [
"diff",
"files",
"pandas",
"dataframe"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "ba391318f7265e2d63a51da33dd4b19abd04522bd22ce503ab8daeafdc8763f2",
"md5": "d5b7b5c1bad44ba1fc7be94543fc8bbd",
"sha256": "316d254e9fc8e4e7ce2a869089f76fc3f7ab7a90712d7b9cc787e61caf8b282c"
},
"downloads": -1,
"filename": "textcompari-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "d5b7b5c1bad44ba1fc7be94543fc8bbd",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 29208,
"upload_time": "2024-02-07T07:36:43",
"upload_time_iso_8601": "2024-02-07T07:36:43.258183Z",
"url": "https://files.pythonhosted.org/packages/ba/39/1318f7265e2d63a51da33dd4b19abd04522bd22ce503ab8daeafdc8763f2/textcompari-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "3905ebe2fa07f6e4d7c39330c7c9a91c6a91f7e3b981343860fd17ce89acbaa1",
"md5": "618c500c1daa0dbcad67c70f1643d967",
"sha256": "a641c7563f8ab0d7755cbdf9ae66d2034a9cc2b5a14ad2cb4b52e815c0a30fc5"
},
"downloads": -1,
"filename": "textcompari-0.10.tar.gz",
"has_sig": false,
"md5_digest": "618c500c1daa0dbcad67c70f1643d967",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 28930,
"upload_time": "2024-02-07T07:36:45",
"upload_time_iso_8601": "2024-02-07T07:36:45.136828Z",
"url": "https://files.pythonhosted.org/packages/39/05/ebe2fa07f6e4d7c39330c7c9a91c6a91f7e3b981343860fd17ce89acbaa1/textcompari-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-02-07 07:36:45",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "textcompari",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "textcompari"
}