Name | scrapy-webarchive JSON |
Version |
0.3.0
JSON |
| download |
home_page | None |
Summary | A webarchive extension for Scrapy |
upload_time | 2025-01-13 09:32:07 |
maintainer | None |
docs_url | None |
author | None |
requires_python | <3.13,>=3.7 |
license | None |
keywords |
scrapy
webarchive
warc
wacz
|
VCS |
 |
bugtrack_url |
|
requirements |
No requirements were recorded.
|
Travis-CI |
No Travis.
|
coveralls test coverage |
No coveralls.
|
# Scrapy Webarchive
[](https://github.com/q-m/scrapy-webarchive/actions/workflows/docs.yml)
Scrapy Webarchive is a plugin for Scrapy that allows users to capture and export web archives in the WARC and WACZ formats during crawling.
## Features
* Save web crawls in WACZ format (multiple storages supported; local and cloud).
* Crawl against WACZ format archives.
* Integrate seamlessly with Scrapy’s spider request and response cycle.
## Compatibility
* Python 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12
## Documentation
Documentation is available online at [developers.thequestionmark.org/scrapy-webarchive/](https://developers.thequestionmark.org/scrapy-webarchive/)
Raw data
{
"_id": null,
"home_page": null,
"name": "scrapy-webarchive",
"maintainer": null,
"docs_url": null,
"requires_python": "<3.13,>=3.7",
"maintainer_email": null,
"keywords": "Scrapy, Webarchive, WARC, WACZ",
"author": null,
"author_email": null,
"download_url": "https://files.pythonhosted.org/packages/63/b3/de9e88a8d34e360343b16d68642f314fd2d306f9a8630bb50924a88da248/scrapy_webarchive-0.3.0.tar.gz",
"platform": null,
"description": "# Scrapy Webarchive\n\n[](https://github.com/q-m/scrapy-webarchive/actions/workflows/docs.yml)\n\nScrapy Webarchive is a plugin for Scrapy that allows users to capture and export web archives in the WARC and WACZ formats during crawling.\n\n## Features\n\n* Save web crawls in WACZ format (multiple storages supported; local and cloud).\n* Crawl against WACZ format archives.\n* Integrate seamlessly with Scrapy\u2019s spider request and response cycle.\n\n## Compatibility\n\n* Python 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12\n\n## Documentation\n\nDocumentation is available online at [developers.thequestionmark.org/scrapy-webarchive/](https://developers.thequestionmark.org/scrapy-webarchive/)\n",
"bugtrack_url": null,
"license": null,
"summary": "A webarchive extension for Scrapy",
"version": "0.3.0",
"project_urls": {
"Documentation": "https://developers.thequestionmark.org/scrapy-webarchive/",
"Repository": "https://github.com/q-m/scrapy-webarchive"
},
"split_keywords": [
"scrapy",
" webarchive",
" warc",
" wacz"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "dbdcd21e06a88dbce1ffe6c2b233953645d1b2c2a44ab501551e36bedd90d879",
"md5": "8d085903f6027fbfb457665f8843eac4",
"sha256": "507420556c411e5ee7b56c6bc6aa62cb939d867a73fc4e5a4e9e1ddf4adb50fb"
},
"downloads": -1,
"filename": "scrapy_webarchive-0.3.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "8d085903f6027fbfb457665f8843eac4",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<3.13,>=3.7",
"size": 20519,
"upload_time": "2025-01-13T09:32:04",
"upload_time_iso_8601": "2025-01-13T09:32:04.965900Z",
"url": "https://files.pythonhosted.org/packages/db/dc/d21e06a88dbce1ffe6c2b233953645d1b2c2a44ab501551e36bedd90d879/scrapy_webarchive-0.3.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "63b3de9e88a8d34e360343b16d68642f314fd2d306f9a8630bb50924a88da248",
"md5": "65d40aec357f6ce0ee96064992e7a924",
"sha256": "82f61e2c6786ea9830c56b6cfaa22e9d1387c3d989b206ba86de3af6f3b66727"
},
"downloads": -1,
"filename": "scrapy_webarchive-0.3.0.tar.gz",
"has_sig": false,
"md5_digest": "65d40aec357f6ce0ee96064992e7a924",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<3.13,>=3.7",
"size": 21036,
"upload_time": "2025-01-13T09:32:07",
"upload_time_iso_8601": "2025-01-13T09:32:07.097394Z",
"url": "https://files.pythonhosted.org/packages/63/b3/de9e88a8d34e360343b16d68642f314fd2d306f9a8630bb50924a88da248/scrapy_webarchive-0.3.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-01-13 09:32:07",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "q-m",
"github_project": "scrapy-webarchive",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"tox": true,
"lcname": "scrapy-webarchive"
}