# Arxix Export
**Arxiv Export** is a Python library that allows you to search, download, and manage scientific articles from [arXiv.org](https://arxiv.org/). It is useful for automating paper downloads and obtaining structured information about articles.
## Installation
```bash
pip install arxiv-export
```
## Usage Example
```python
import asyncio
from arxiv_export_documents import export_papers
async def main():
search_query = "quantum computing"
download_path = "./arxiv_papers"
max_results = 5
async for paper in export_papers(
search=search_query,
path_download=download_path,
max_results=max_results
):
print(f"Downloaded paper: {paper.title}")
print(f"Authors: {', '.join(paper.authors)}")
print(f"Summary: {paper.summary}")
print(f"Link: {paper.link}")
print(f"Path: {paper.path}")
print(f"Documents: {len(paper.documents)}")
print("-" * 80)
if __name__ == "__main__":
asyncio.run(main())
```
## Features
- Search for articles on arXiv using keywords.
- Automatically download article PDFs.
- Access metadata such as title, authors, abstract, link, and local path.
- Manage multiple results with a single command.
## Main Parameters
- `search`: search string (e.g., `"quantum computing"`).
- `path_download`: path to save the PDFs.
- `max_results`: maximum number of articles to download.
### Vector Database for LLMs
The `documents` property provides a list of `Document` files intended for ingestion into a vector database. These files are commonly used to supply structured data to language models (LLMs), supporting semantic search and advanced analysis.
## License
This library is distributed under the MIT license.
Raw data
{
"_id": null,
"home_page": null,
"name": "arxiv-export-documents",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.12",
"maintainer_email": null,
"keywords": "arxiv, export, papers",
"author": null,
"author_email": "Giuseppe Zileni <giuseppe.zileni@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/59/f5/7834508350d69abff1fb920672f8a2f71c56329d66daa1a8bef157913604/arxiv_export_documents-0.1.5.tar.gz",
"platform": null,
"description": "# Arxix Export\n\n**Arxiv Export** is a Python library that allows you to search, download, and manage scientific articles from [arXiv.org](https://arxiv.org/). It is useful for automating paper downloads and obtaining structured information about articles.\n\n## Installation\n\n```bash\npip install arxiv-export\n```\n\n## Usage Example\n\n```python\nimport asyncio\nfrom arxiv_export_documents import export_papers\n\n\nasync def main():\n search_query = \"quantum computing\"\n download_path = \"./arxiv_papers\"\n max_results = 5\n\n async for paper in export_papers(\n search=search_query,\n path_download=download_path,\n max_results=max_results\n ):\n print(f\"Downloaded paper: {paper.title}\")\n print(f\"Authors: {', '.join(paper.authors)}\")\n print(f\"Summary: {paper.summary}\")\n print(f\"Link: {paper.link}\")\n print(f\"Path: {paper.path}\")\n print(f\"Documents: {len(paper.documents)}\")\n print(\"-\" * 80)\n\n\nif __name__ == \"__main__\":\n asyncio.run(main())\n```\n\n## Features\n\n- Search for articles on arXiv using keywords.\n- Automatically download article PDFs.\n- Access metadata such as title, authors, abstract, link, and local path.\n- Manage multiple results with a single command.\n\n## Main Parameters\n\n- `search`: search string (e.g., `\"quantum computing\"`).\n- `path_download`: path to save the PDFs.\n- `max_results`: maximum number of articles to download.\n\n### Vector Database for LLMs\n\nThe `documents` property provides a list of `Document` files intended for ingestion into a vector database. These files are commonly used to supply structured data to language models (LLMs), supporting semantic search and advanced analysis.\n\n## License\n\nThis library is distributed under the MIT license.\n",
"bugtrack_url": null,
"license": null,
"summary": "Export arxiv papers to pdf formats",
"version": "0.1.5",
"project_urls": {
"Homepage": "https://gzileni.github.io/arxiv-export-documents",
"Repository": "https://github.com/gzileni/arxiv-export-documents.git"
},
"split_keywords": [
"arxiv",
" export",
" papers"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "8ce0eebf264f842d8fb2d31bc868a99b0b55ca4a5487c6994a9d51afce19db28",
"md5": "ab95cb077a4987bc92edfbbbc23c326f",
"sha256": "2ef6c21a6964f70c43926c0bd6af0369f231d902e348cc1633403b10e37031b5"
},
"downloads": -1,
"filename": "arxiv_export_documents-0.1.5-py3-none-any.whl",
"has_sig": false,
"md5_digest": "ab95cb077a4987bc92edfbbbc23c326f",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.12",
"size": 6467,
"upload_time": "2025-07-31T15:25:36",
"upload_time_iso_8601": "2025-07-31T15:25:36.355218Z",
"url": "https://files.pythonhosted.org/packages/8c/e0/eebf264f842d8fb2d31bc868a99b0b55ca4a5487c6994a9d51afce19db28/arxiv_export_documents-0.1.5-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "59f57834508350d69abff1fb920672f8a2f71c56329d66daa1a8bef157913604",
"md5": "59892822b5907712c532585aee6e7dcf",
"sha256": "1e39eddb83c0279996b6642361dc2bc96d976e55f8e737aae862f3f6124796b5"
},
"downloads": -1,
"filename": "arxiv_export_documents-0.1.5.tar.gz",
"has_sig": false,
"md5_digest": "59892822b5907712c532585aee6e7dcf",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.12",
"size": 6007,
"upload_time": "2025-07-31T15:25:37",
"upload_time_iso_8601": "2025-07-31T15:25:37.409576Z",
"url": "https://files.pythonhosted.org/packages/59/f5/7834508350d69abff1fb920672f8a2f71c56329d66daa1a8bef157913604/arxiv_export_documents-0.1.5.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-07-31 15:25:37",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "gzileni",
"github_project": "arxiv-export-documents",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "aiohappyeyeballs",
"specs": [
[
"==",
"2.6.1"
]
]
},
{
"name": "aiohttp",
"specs": [
[
"==",
"3.12.15"
]
]
},
{
"name": "aiosignal",
"specs": [
[
"==",
"1.4.0"
]
]
},
{
"name": "annotated-types",
"specs": [
[
"==",
"0.7.0"
]
]
},
{
"name": "anyio",
"specs": [
[
"==",
"4.9.0"
]
]
},
{
"name": "attrs",
"specs": [
[
"==",
"25.3.0"
]
]
},
{
"name": "certifi",
"specs": [
[
"==",
"2025.7.14"
]
]
},
{
"name": "charset-normalizer",
"specs": [
[
"==",
"3.4.2"
]
]
},
{
"name": "dataclasses-json",
"specs": [
[
"==",
"0.6.7"
]
]
},
{
"name": "frozenlist",
"specs": [
[
"==",
"1.7.0"
]
]
},
{
"name": "greenlet",
"specs": [
[
"==",
"3.2.3"
]
]
},
{
"name": "h11",
"specs": [
[
"==",
"0.16.0"
]
]
},
{
"name": "httpcore",
"specs": [
[
"==",
"1.0.9"
]
]
},
{
"name": "httpx",
"specs": [
[
"==",
"0.28.1"
]
]
},
{
"name": "httpx-sse",
"specs": [
[
"==",
"0.4.1"
]
]
},
{
"name": "idna",
"specs": [
[
"==",
"3.10"
]
]
},
{
"name": "jsonpatch",
"specs": [
[
"==",
"1.33"
]
]
},
{
"name": "jsonpointer",
"specs": [
[
"==",
"3.0.0"
]
]
},
{
"name": "langchain",
"specs": [
[
"==",
"0.3.27"
]
]
},
{
"name": "langchain-community",
"specs": [
[
"==",
"0.3.27"
]
]
},
{
"name": "langchain-core",
"specs": [
[
"==",
"0.3.72"
]
]
},
{
"name": "langchain-text-splitters",
"specs": [
[
"==",
"0.3.9"
]
]
},
{
"name": "langsmith",
"specs": [
[
"==",
"0.4.8"
]
]
},
{
"name": "marshmallow",
"specs": [
[
"==",
"3.26.1"
]
]
},
{
"name": "multidict",
"specs": [
[
"==",
"6.6.3"
]
]
},
{
"name": "mypy_extensions",
"specs": [
[
"==",
"1.1.0"
]
]
},
{
"name": "numpy",
"specs": [
[
"==",
"1.26.0"
]
]
},
{
"name": "orjson",
"specs": [
[
"==",
"3.11.1"
]
]
},
{
"name": "packaging",
"specs": [
[
"==",
"25.0"
]
]
},
{
"name": "propcache",
"specs": [
[
"==",
"0.3.2"
]
]
},
{
"name": "pydantic",
"specs": [
[
"==",
"2.11.7"
]
]
},
{
"name": "pydantic-settings",
"specs": [
[
"==",
"2.10.1"
]
]
},
{
"name": "pydantic_core",
"specs": [
[
"==",
"2.33.2"
]
]
},
{
"name": "python-dotenv",
"specs": [
[
"==",
"1.1.1"
]
]
},
{
"name": "PyYAML",
"specs": [
[
"==",
"6.0.2"
]
]
},
{
"name": "requests",
"specs": [
[
"==",
"2.32.4"
]
]
},
{
"name": "requests-toolbelt",
"specs": [
[
"==",
"1.0.0"
]
]
},
{
"name": "sniffio",
"specs": [
[
"==",
"1.3.1"
]
]
},
{
"name": "SQLAlchemy",
"specs": [
[
"==",
"2.0.42"
]
]
},
{
"name": "tenacity",
"specs": [
[
"==",
"9.1.2"
]
]
},
{
"name": "typing-inspect",
"specs": [
[
"==",
"0.9.0"
]
]
},
{
"name": "typing-inspection",
"specs": [
[
"==",
"0.4.1"
]
]
},
{
"name": "typing_extensions",
"specs": [
[
"==",
"4.14.1"
]
]
},
{
"name": "urllib3",
"specs": [
[
"==",
"2.5.0"
]
]
},
{
"name": "yarl",
"specs": [
[
"==",
"1.20.1"
]
]
},
{
"name": "zstandard",
"specs": [
[
"==",
"0.23.0"
]
]
}
],
"lcname": "arxiv-export-documents"
}