# A basic document parsing and loading utility.
Currently a **placeholder** for when this project is ready in the near future.
The ``docp`` project is a CPython library for extracting text from binary documents (e.g. PDF, DOCX, etc.) into Python objects, which can be used across various applications, ranging from simple plain-text extraction to loading the text into a Chroma database for LLM use.
## Installation
Coming soon ...
## Toolset
Coming soon ...
## Using the Library
Coming soon ...
## Additional Information
Coming soon ...
Raw data
{
"_id": null,
"home_page": null,
"name": "docp",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": null,
"keywords": "document, library, parsing, utility, utilities",
"author": null,
"author_email": "The Developers <development@s3dev.uk>",
"download_url": null,
"platform": null,
"description": "# A basic document parsing and loading utility.\n\nCurrently a **placeholder** for when this project is ready in the near future.\n\nThe ``docp`` project is a CPython library for extracting text from binary documents (e.g. PDF, DOCX, etc.) into Python objects, which can be used across various applications, ranging from simple plain-text extraction to loading the text into a Chroma database for LLM use.\n\n\n## Installation\nComing soon ...\n\n\n## Toolset\nComing soon ...\n\n\n## Using the Library\nComing soon ...\n\n\n## Additional Information\nComing soon ...\n\n",
"bugtrack_url": null,
"license": "GNU GPL-3",
"summary": "A basic document parsing and loading utility.",
"version": "0.0.0.dev1",
"project_urls": {
"Documentation": "https://github.com/s3dev/docp",
"Homepage": "https://github.com/s3dev/docp",
"Repository": "https://github.com/s3dev/docp"
},
"split_keywords": [
"document",
" library",
" parsing",
" utility",
" utilities"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "1e72cea70dbd6e730520ac63a90a9ba32f3469e084f76d44fd3e3c745fa366c0",
"md5": "a84444263a0f3ed725fc7504c8c2ef9d",
"sha256": "eba7a123a32f4684471d3ab9f2475e29f2d82408d5624b4ccb04f23aba72ff2d"
},
"downloads": -1,
"filename": "docp-0.0.0.dev1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a84444263a0f3ed725fc7504c8c2ef9d",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 13757,
"upload_time": "2025-01-06T15:37:36",
"upload_time_iso_8601": "2025-01-06T15:37:36.905838Z",
"url": "https://files.pythonhosted.org/packages/1e/72/cea70dbd6e730520ac63a90a9ba32f3469e084f76d44fd3e3c745fa366c0/docp-0.0.0.dev1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-01-06 15:37:36",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "s3dev",
"github_project": "docp",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "pandas",
"specs": [
[
"==",
"2.2.3"
]
]
},
{
"name": "pdfplumber",
"specs": [
[
"==",
"0.11.5"
]
]
},
{
"name": "unidecode",
"specs": [
[
"==",
"1.3.8"
]
]
},
{
"name": "utils4",
"specs": [
[
"==",
"1.7.0"
]
]
}
],
"lcname": "docp"
}