sitemap-generator


Namesitemap-generator JSON
Version 0.9.13 PyPI version JSON
download
home_pagehttps://github.com/Haikson/sitemap-generator
Summaryweb crawler and sitemap generator.
upload_time2023-07-08 09:34:01
maintainer
docs_urlNone
authorKamo Petrosyan
requires_python
licenseGPL3
keywords
VCS
bugtrack_url
requirements aiohttp asyncio aiofile lxml cssselect
Travis-CI No Travis.
coveralls test coverage No coveralls.
            pysitemap
=========

Sitemap generator

installing
----------

::

    pip install sitemap-generator

requirements
------------

::

    asyncio
    aiofile
    aiohttp

example
-------

::

    import sys
    import logging
    from pysitemap import crawler
    from pysitemap.parsers.lxml_parser import Parser

    if __name__ == '__main__':
        if '--iocp' in sys.argv:
            from asyncio import events, windows_events
            sys.argv.remove('--iocp')
            logging.info('using iocp')
            el = windows_events.ProactorEventLoop()
            events.set_event_loop(el)

        # root_url = sys.argv[1]
        root_url = 'https://www.haikson.com'
        crawler(
            root_url, out_file='debug/sitemap.xml', exclude_urls=[".pdf", ".jpg", ".zip"],
            http_request_options={"ssl": False}, parser=Parser
        )

TODO
-----

-  big sites with count of pages more then 100K will use more then 100MB
   memory. Move queue and done lists into database. Write Queue and Done
   backend classes based on
-  Lists
-  SQLite database
-  Redis
-  Write api for extending by user backends

changelog
---------


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/Haikson/sitemap-generator",
    "name": "sitemap-generator",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "",
    "author": "Kamo Petrosyan",
    "author_email": "kamo@haikson.com",
    "download_url": "https://files.pythonhosted.org/packages/7c/d9/a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631/sitemap-generator-0.9.13.tar.gz",
    "platform": null,
    "description": "pysitemap\n=========\n\nSitemap generator\n\ninstalling\n----------\n\n::\n\n    pip install sitemap-generator\n\nrequirements\n------------\n\n::\n\n    asyncio\n    aiofile\n    aiohttp\n\nexample\n-------\n\n::\n\n    import sys\n    import logging\n    from pysitemap import crawler\n    from pysitemap.parsers.lxml_parser import Parser\n\n    if __name__ == '__main__':\n        if '--iocp' in sys.argv:\n            from asyncio import events, windows_events\n            sys.argv.remove('--iocp')\n            logging.info('using iocp')\n            el = windows_events.ProactorEventLoop()\n            events.set_event_loop(el)\n\n        # root_url = sys.argv[1]\n        root_url = 'https://www.haikson.com'\n        crawler(\n            root_url, out_file='debug/sitemap.xml', exclude_urls=[\".pdf\", \".jpg\", \".zip\"],\n            http_request_options={\"ssl\": False}, parser=Parser\n        )\n\nTODO\n-----\n\n-  big sites with count of pages more then 100K will use more then 100MB\n   memory. Move queue and done lists into database. Write Queue and Done\n   backend classes based on\n-  Lists\n-  SQLite database\n-  Redis\n-  Write api for extending by user backends\n\nchangelog\n---------\n\n",
    "bugtrack_url": null,
    "license": "GPL3",
    "summary": "web crawler and sitemap generator.",
    "version": "0.9.13",
    "project_urls": {
        "Homepage": "https://github.com/Haikson/sitemap-generator"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "4448855480617478c341732174421c69e9350fdec7efd7d6df8d203ee89fb14c",
                "md5": "07cab60bcb0733c510eec8bf3c2282a9",
                "sha256": "1eb690631895f5940269747f08e966c3fcc0efebd8a3d934ce2549aaceef0885"
            },
            "downloads": -1,
            "filename": "sitemap_generator-0.9.13-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "07cab60bcb0733c510eec8bf3c2282a9",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 15950,
            "upload_time": "2023-07-08T09:33:58",
            "upload_time_iso_8601": "2023-07-08T09:33:58.342364Z",
            "url": "https://files.pythonhosted.org/packages/44/48/855480617478c341732174421c69e9350fdec7efd7d6df8d203ee89fb14c/sitemap_generator-0.9.13-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "7cd9a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631",
                "md5": "c1e13d2fc27e433f344217e84de5f148",
                "sha256": "62ed54b45e7d3c3380a10bc877f3f213a4b13ac188d168da7d7aae10902c9327"
            },
            "downloads": -1,
            "filename": "sitemap-generator-0.9.13.tar.gz",
            "has_sig": false,
            "md5_digest": "c1e13d2fc27e433f344217e84de5f148",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 13515,
            "upload_time": "2023-07-08T09:34:01",
            "upload_time_iso_8601": "2023-07-08T09:34:01.044145Z",
            "url": "https://files.pythonhosted.org/packages/7c/d9/a67678449c608eba9ad2eebe9c4189d18cb529f1a3f889abc246b2666631/sitemap-generator-0.9.13.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-07-08 09:34:01",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "Haikson",
    "github_project": "sitemap-generator",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "aiohttp",
            "specs": []
        },
        {
            "name": "asyncio",
            "specs": []
        },
        {
            "name": "aiofile",
            "specs": []
        },
        {
            "name": "lxml",
            "specs": []
        },
        {
            "name": "cssselect",
            "specs": []
        }
    ],
    "lcname": "sitemap-generator"
}
        
Elapsed time: 0.11702s