pysitemap
=========
Sitemap generator
installing
----------
::
pip install sitemap-generator
requirements
------------
::
asyncio
aiofile
aiohttp
example
-------
::
import sys
import logging
from pysitemap import crawler
from pysitemap.parsers.lxml_parser import Parser
if __name__ == '__main__':
if '--iocp' in sys.argv:
from asyncio import events, windows_events
sys.argv.remove('--iocp')
logging.info('using iocp')
el = windows_events.ProactorEventLoop()
events.set_event_loop(el)
# root_url = sys.argv[1]
root_url = 'https://www.haikson.com'
crawler(
root_url, out_file='debug/sitemap.xml', exclude_urls=[".pdf", ".jpg", ".zip"],
http_request_options={"ssl": False}, parser=Parser
)
TODO
-----
- big sites with count of pages more then 100K will use more then 100MB
memory. Move queue and done lists into database. Write Queue and Done
backend classes based on
- Lists
- SQLite database
- Redis
- Write api for extending by user backends
changelog
---------
Raw data
{
"_id": null,
"home_page": "https://github.com/Haikson/sitemap-generator",
"name": "sitemap-generator",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "",
"author": "Kamo Petrosyan",
"author_email": "kamo@haikson.com",
"download_url": "https://files.pythonhosted.org/packages/7c/d9/a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631/sitemap-generator-0.9.13.tar.gz",
"platform": null,
"description": "pysitemap\n=========\n\nSitemap generator\n\ninstalling\n----------\n\n::\n\n pip install sitemap-generator\n\nrequirements\n------------\n\n::\n\n asyncio\n aiofile\n aiohttp\n\nexample\n-------\n\n::\n\n import sys\n import logging\n from pysitemap import crawler\n from pysitemap.parsers.lxml_parser import Parser\n\n if __name__ == '__main__':\n if '--iocp' in sys.argv:\n from asyncio import events, windows_events\n sys.argv.remove('--iocp')\n logging.info('using iocp')\n el = windows_events.ProactorEventLoop()\n events.set_event_loop(el)\n\n # root_url = sys.argv[1]\n root_url = 'https://www.haikson.com'\n crawler(\n root_url, out_file='debug/sitemap.xml', exclude_urls=[\".pdf\", \".jpg\", \".zip\"],\n http_request_options={\"ssl\": False}, parser=Parser\n )\n\nTODO\n-----\n\n- big sites with count of pages more then 100K will use more then 100MB\n memory. Move queue and done lists into database. Write Queue and Done\n backend classes based on\n- Lists\n- SQLite database\n- Redis\n- Write api for extending by user backends\n\nchangelog\n---------\n\n",
"bugtrack_url": null,
"license": "GPL3",
"summary": "web crawler and sitemap generator.",
"version": "0.9.13",
"project_urls": {
"Homepage": "https://github.com/Haikson/sitemap-generator"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "4448855480617478c341732174421c69e9350fdec7efd7d6df8d203ee89fb14c",
"md5": "07cab60bcb0733c510eec8bf3c2282a9",
"sha256": "1eb690631895f5940269747f08e966c3fcc0efebd8a3d934ce2549aaceef0885"
},
"downloads": -1,
"filename": "sitemap_generator-0.9.13-py3-none-any.whl",
"has_sig": false,
"md5_digest": "07cab60bcb0733c510eec8bf3c2282a9",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 15950,
"upload_time": "2023-07-08T09:33:58",
"upload_time_iso_8601": "2023-07-08T09:33:58.342364Z",
"url": "https://files.pythonhosted.org/packages/44/48/855480617478c341732174421c69e9350fdec7efd7d6df8d203ee89fb14c/sitemap_generator-0.9.13-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "7cd9a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631",
"md5": "c1e13d2fc27e433f344217e84de5f148",
"sha256": "62ed54b45e7d3c3380a10bc877f3f213a4b13ac188d168da7d7aae10902c9327"
},
"downloads": -1,
"filename": "sitemap-generator-0.9.13.tar.gz",
"has_sig": false,
"md5_digest": "c1e13d2fc27e433f344217e84de5f148",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 13515,
"upload_time": "2023-07-08T09:34:01",
"upload_time_iso_8601": "2023-07-08T09:34:01.044145Z",
"url": "https://files.pythonhosted.org/packages/7c/d9/a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631/sitemap-generator-0.9.13.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-07-08 09:34:01",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "Haikson",
"github_project": "sitemap-generator",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "aiohttp",
"specs": []
},
{
"name": "asyncio",
"specs": []
},
{
"name": "aiofile",
"specs": []
},
{
"name": "lxml",
"specs": []
},
{
"name": "cssselect",
"specs": []
}
],
"lcname": "sitemap-generator"
}