# tesseract hocr to pandas DataFrame
## pip install tesserhocr2df
### Tested against Windows 10 / Python 3.11 / Anaconda
```PY
from tesserhocr2df import text2df
from PrettyColorPrinter import add_printer
add_printer(1)
df = text2df(
img=r"C:\Users\hansc\Desktop\2024-03-29 02_18_09-C__ProgramData_BlueStacks_nxt.png",
add_after_tesseract_path="",
add_at_the_end="-l eng+por --psm 3",
tesseractpath=r"C:\Program Files\Tesseract-OCR\tesseract.exe",
add_imgs=True,
)
print(df)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/tesserhocr2df",
"name": "tesserhocr2df",
"maintainer": null,
"docs_url": null,
"requires_python": null,
"maintainer_email": null,
"keywords": "tesseract, hocr",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/cd/d2/515331b0ad602bb0fdad53e663fb5d71d4c3f9f33e903d2524eafc428a24/tesserhocr2df-0.10.tar.gz",
"platform": null,
"description": "\r\n# tesseract hocr to pandas DataFrame\r\n\r\n## pip install tesserhocr2df\r\n\r\n### Tested against Windows 10 / Python 3.11 / Anaconda \r\n\r\n\r\n```PY\r\nfrom tesserhocr2df import text2df\r\nfrom PrettyColorPrinter import add_printer\r\n\r\nadd_printer(1)\r\ndf = text2df(\r\n img=r\"C:\\Users\\hansc\\Desktop\\2024-03-29 02_18_09-C__ProgramData_BlueStacks_nxt.png\",\r\n add_after_tesseract_path=\"\",\r\n add_at_the_end=\"-l eng+por --psm 3\",\r\n tesseractpath=r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\",\r\n add_imgs=True,\r\n)\r\nprint(df)\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "tesseract hocr to pandas DataFrame",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/tesserhocr2df"
},
"split_keywords": [
"tesseract",
" hocr"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "b7cf5ba4417f46e66b8d25e3a94e3d29ad3b662b4cc56bccc8e385531a9771bb",
"md5": "28c03e6586e34ed23979be82ce19b652",
"sha256": "f081300b383c3961b873652c4ef27d19323b762a9d970146422bd2082e282112"
},
"downloads": -1,
"filename": "tesserhocr2df-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "28c03e6586e34ed23979be82ce19b652",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 25014,
"upload_time": "2024-03-29T05:47:28",
"upload_time_iso_8601": "2024-03-29T05:47:28.611731Z",
"url": "https://files.pythonhosted.org/packages/b7/cf/5ba4417f46e66b8d25e3a94e3d29ad3b662b4cc56bccc8e385531a9771bb/tesserhocr2df-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "cdd2515331b0ad602bb0fdad53e663fb5d71d4c3f9f33e903d2524eafc428a24",
"md5": "b1d1c533c3ccc148bf75c8fe69b28cfa",
"sha256": "bb25a2125cd24a63b64f5c65362a82298ceca115004475ab85d25685d7f78b81"
},
"downloads": -1,
"filename": "tesserhocr2df-0.10.tar.gz",
"has_sig": false,
"md5_digest": "b1d1c533c3ccc148bf75c8fe69b28cfa",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 24987,
"upload_time": "2024-03-29T05:47:30",
"upload_time_iso_8601": "2024-03-29T05:47:30.022766Z",
"url": "https://files.pythonhosted.org/packages/cd/d2/515331b0ad602bb0fdad53e663fb5d71d4c3f9f33e903d2524eafc428a24/tesserhocr2df-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-03-29 05:47:30",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "tesserhocr2df",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "tesserhocr2df"
}