crawler


Namecrawler JSON
Version 0.0.2 PyPI version JSON
download
home_pageUNKNOWN
SummaryWeb Scraping Framework based on py3 asyncio
upload_time2016-06-15 09:47:49
maintainerNone
docs_urlNone
authorGregory Petukhov
requires_pythonNone
licenseMIT
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            =======
Crawler
=======

.. image:: https://travis-ci.org/lorien/crawler.png?branch=master
    :target: https://travis-ci.org/lorien/crawler

.. image:: https://coveralls.io/repos/lorien/crawler/badge.svg?branch=master
    :target: https://coveralls.io/r/lorien/crawler?branch=master

.. image:: https://pypip.in/download/crawler/badge.svg?period=month
    :target: https://pypi.python.org/pypi/crawler

.. image:: https://pypip.in/version/crawler/badge.svg
    :target: https://pypi.python.org/pypi/crawler

.. image:: https://landscape.io/github/lorien/crawler/master/landscape.png
   :target: https://landscape.io/github/lorien/crawler/master

Web scraping framework based on py3 asyncio & aiohttp libraries.


Usage Example
=============

.. code:: python

    import re
    from itertools import islice

    from crawler import Crawler, Request

    RE_TITLE = re.compile(r'<title>([^<]+)</title>', re.S | re.I)

    class TestCrawler(Crawler):
        def task_generator(self):
            for host in islice(open('var/domains.txt'), 100):
                host = host.strip()
                if host:
                    yield Request('http://%s/' % host, tag='page')

        def handler_page(self, req, res):
            print('Result of request to {}'.format(req.url))
            try:
                title = RE_TITLE.search(res.body).group(1)
            except AttributeError:
                title = 'N/A'
            print('Title: {}'.format(title))

    bot = TestCrawler(concurrency=10)
    bot.run()


Installation
============

.. code:: bash

    pip install crawler


Dependencies
============

* Python>=3.4
* aiohttp
            

Raw data

            {
    "_id": null,
    "home_page": "UNKNOWN",
    "name": "crawler",
    "maintainer": null,
    "docs_url": null,
    "requires_python": null,
    "maintainer_email": null,
    "keywords": null,
    "author": "Gregory Petukhov",
    "author_email": "lorien@lorien.name",
    "download_url": "https://files.pythonhosted.org/packages/8d/42/2b042beebf63f6d490d38b698f06ee4fdd16a1d32fa2373a6b662a37a33d/crawler-0.0.2.tar.gz",
    "platform": "UNKNOWN",
    "description": "=======\nCrawler\n=======\n\n.. image:: https://travis-ci.org/lorien/crawler.png?branch=master\n    :target: https://travis-ci.org/lorien/crawler\n\n.. image:: https://coveralls.io/repos/lorien/crawler/badge.svg?branch=master\n    :target: https://coveralls.io/r/lorien/crawler?branch=master\n\n.. image:: https://pypip.in/download/crawler/badge.svg?period=month\n    :target: https://pypi.python.org/pypi/crawler\n\n.. image:: https://pypip.in/version/crawler/badge.svg\n    :target: https://pypi.python.org/pypi/crawler\n\n.. image:: https://landscape.io/github/lorien/crawler/master/landscape.png\n   :target: https://landscape.io/github/lorien/crawler/master\n\nWeb scraping framework based on py3 asyncio & aiohttp libraries.\n\n\nUsage Example\n=============\n\n.. code:: python\n\n    import re\n    from itertools import islice\n\n    from crawler import Crawler, Request\n\n    RE_TITLE = re.compile(r'<title>([^<]+)</title>', re.S | re.I)\n\n    class TestCrawler(Crawler):\n        def task_generator(self):\n            for host in islice(open('var/domains.txt'), 100):\n                host = host.strip()\n                if host:\n                    yield Request('http://%s/' % host, tag='page')\n\n        def handler_page(self, req, res):\n            print('Result of request to {}'.format(req.url))\n            try:\n                title = RE_TITLE.search(res.body).group(1)\n            except AttributeError:\n                title = 'N/A'\n            print('Title: {}'.format(title))\n\n    bot = TestCrawler(concurrency=10)\n    bot.run()\n\n\nInstallation\n============\n\n.. code:: bash\n\n    pip install crawler\n\n\nDependencies\n============\n\n* Python>=3.4\n* aiohttp",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Web Scraping Framework based on py3 asyncio",
    "version": "0.0.2",
    "project_urls": {
        "Download": "UNKNOWN",
        "Homepage": "UNKNOWN"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "8d422b042beebf63f6d490d38b698f06ee4fdd16a1d32fa2373a6b662a37a33d",
                "md5": "272f2a88e1376ac09f2d310405ff2bb8",
                "sha256": "b6b5bcc2f2a64ac60251bee1494bd7ea98605ef1a8bf87db5194bea4bdd420d2"
            },
            "downloads": -1,
            "filename": "crawler-0.0.2.tar.gz",
            "has_sig": false,
            "md5_digest": "272f2a88e1376ac09f2d310405ff2bb8",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 6028,
            "upload_time": "2016-06-15T09:47:49",
            "upload_time_iso_8601": "2016-06-15T09:47:49.609926Z",
            "url": "https://files.pythonhosted.org/packages/8d/42/2b042beebf63f6d490d38b698f06ee4fdd16a1d32fa2373a6b662a37a33d/crawler-0.0.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2016-06-15 09:47:49",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "crawler"
}
        
Elapsed time: 0.59526s