selectorlib


Nameselectorlib JSON
Version 0.16.0 PyPI version JSON
download
home_pagehttps://github.com/scrapehero/selectorlib
SummaryA library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them
upload_time2020-01-08 14:50:58
maintainer
docs_urlNone
authorscrapehero
requires_python
licenseMIT license
keywords selectorlib
VCS
bugtrack_url
requirements Click pyyaml parsel
Travis-CI
coveralls test coverage No coveralls.
            ===========
selectorlib
===========


.. image:: https://img.shields.io/pypi/v/selectorlib.svg
        :target: https://pypi.python.org/pypi/selectorlib

.. image:: https://img.shields.io/travis/scrapehero/selectorlib.svg
        :target: https://travis-ci.org/scrapehero/selectorlib

.. image:: https://readthedocs.org/projects/selectorlib/badge/?version=latest
        :target: https://selectorlib.readthedocs.io/en/latest/?badge=latest
        :alt: Documentation Status


.. image:: https://pyup.io/repos/github/scrapehero/selectorlib/shield.svg
     :target: https://pyup.io/repos/github/scrapehero/selectorlib/
     :alt: Updates



A library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them

* Free software: MIT license
* Documentation: https://selectorlib.readthedocs.io.


Example
--------

>>> from selectorlib import Extractor
>>> yaml_string = """
    title:
        css: "h1"
        type: Text
    link:
        css: "h2 a"
        type: Link
    """
>>> extractor = Extractor.from_yaml_string(yaml_string)
>>> html = """
    <h1>Title</h1>
    <h2>Usage
        <a class="headerlink" href="http://test">ΒΆ</a>
    </h2>
    """
>>> extractor.extract(html)
{'title': 'Title', 'link': 'http://test'}


=======
History
=======



            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/scrapehero/selectorlib",
    "name": "selectorlib",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "selectorlib",
    "author": "scrapehero",
    "author_email": "pypi@scrapehero.com",
    "download_url": "https://files.pythonhosted.org/packages/00/57/f1ae8c2740980eec4a142fd8b1f6603294bba079add5e29bf62f51503c96/selectorlib-0.16.0.tar.gz",
    "platform": "",
    "description": "===========\nselectorlib\n===========\n\n\n.. image:: https://img.shields.io/pypi/v/selectorlib.svg\n        :target: https://pypi.python.org/pypi/selectorlib\n\n.. image:: https://img.shields.io/travis/scrapehero/selectorlib.svg\n        :target: https://travis-ci.org/scrapehero/selectorlib\n\n.. image:: https://readthedocs.org/projects/selectorlib/badge/?version=latest\n        :target: https://selectorlib.readthedocs.io/en/latest/?badge=latest\n        :alt: Documentation Status\n\n\n.. image:: https://pyup.io/repos/github/scrapehero/selectorlib/shield.svg\n     :target: https://pyup.io/repos/github/scrapehero/selectorlib/\n     :alt: Updates\n\n\n\nA library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them\n\n* Free software: MIT license\n* Documentation: https://selectorlib.readthedocs.io.\n\n\nExample\n--------\n\n>>> from selectorlib import Extractor\n>>> yaml_string = \"\"\"\n    title:\n        css: \"h1\"\n        type: Text\n    link:\n        css: \"h2 a\"\n        type: Link\n    \"\"\"\n>>> extractor = Extractor.from_yaml_string(yaml_string)\n>>> html = \"\"\"\n    <h1>Title</h1>\n    <h2>Usage\n        <a class=\"headerlink\" href=\"http://test\">\u00b6</a>\n    </h2>\n    \"\"\"\n>>> extractor.extract(html)\n{'title': 'Title', 'link': 'http://test'}\n\n\n=======\nHistory\n=======\n\n\n",
    "bugtrack_url": null,
    "license": "MIT license",
    "summary": "A library to read a YML file with Xpath or CSS Selectors and extract data from HTML pages using them",
    "version": "0.16.0",
    "project_urls": {
        "Homepage": "https://github.com/scrapehero/selectorlib"
    },
    "split_keywords": [
        "selectorlib"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "1e3e7ad0a01b07c066cf79c431324970869345e4d249242d70f20e939a5c630b",
                "md5": "8aa1a3ca4568c3d278192b6d8766f1e9",
                "sha256": "007c156d01ab8de86f72f0914d66d704f6fb4ebda0db0e04f204083d63e55bc6"
            },
            "downloads": -1,
            "filename": "selectorlib-0.16.0-py2.py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "8aa1a3ca4568c3d278192b6d8766f1e9",
            "packagetype": "bdist_wheel",
            "python_version": "py2.py3",
            "requires_python": null,
            "size": 5832,
            "upload_time": "2020-01-08T14:50:57",
            "upload_time_iso_8601": "2020-01-08T14:50:57.800178Z",
            "url": "https://files.pythonhosted.org/packages/1e/3e/7ad0a01b07c066cf79c431324970869345e4d249242d70f20e939a5c630b/selectorlib-0.16.0-py2.py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "0057f1ae8c2740980eec4a142fd8b1f6603294bba079add5e29bf62f51503c96",
                "md5": "da86499c58c7e12a63e77ab592240e61",
                "sha256": "154f7c5f39bc825a0717b24ca2cc0b95dcb2caa1669239f3d4ac6e41af398731"
            },
            "downloads": -1,
            "filename": "selectorlib-0.16.0.tar.gz",
            "has_sig": false,
            "md5_digest": "da86499c58c7e12a63e77ab592240e61",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 188826,
            "upload_time": "2020-01-08T14:50:58",
            "upload_time_iso_8601": "2020-01-08T14:50:58.964482Z",
            "url": "https://files.pythonhosted.org/packages/00/57/f1ae8c2740980eec4a142fd8b1f6603294bba079add5e29bf62f51503c96/selectorlib-0.16.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2020-01-08 14:50:58",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "scrapehero",
    "github_project": "selectorlib",
    "travis_ci": true,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "Click",
            "specs": [
                [
                    ">=",
                    "6.0"
                ]
            ]
        },
        {
            "name": "pyyaml",
            "specs": [
                [
                    ">=",
                    "3.12"
                ]
            ]
        },
        {
            "name": "parsel",
            "specs": [
                [
                    ">=",
                    "1.5.1"
                ]
            ]
        }
    ],
    "tox": true,
    "lcname": "selectorlib"
}
        
Elapsed time: 0.18878s