pyselenscrapr


Namepyselenscrapr JSON
Version 0.0.81 PyPI version JSON
download
home_pageNone
SummaryA web scraping library for selenium and beautifulsoup
upload_time2024-05-21 10:24:28
maintainerNone
docs_urlNone
authorNone
requires_python>=3.7
licenseNone
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            PySelenScrapr
=============

Here is a sample code for a good scraper logic with some validation
logic. It helps to build a good scraper with validation steps and repeat
logic.

Installation
------------

.. code:: bash

    pip install pyselenscrapr


Usage
-----

.. code:: python

    from pyselenscraper.ScrapingBot import ScrapingBot, TakeScreenshotModes
    from pyselenscraper.ScrapingLogic import ScrapingLogic
    from pyselenscraper.ScrapingStep import ScrapingStep, ScrapingStepInterval, ScrapingStepErrorHandling

    ...
    # Initialize selenium driver
    driver = webdriver.Remote("http://localhost:4444/wd/hub", options=ff_options)
    ...

    robot = ScrapingBot(driver, take_screenshots_mode=TakeScreenshotModes.Always)

    try:

       search_command = "wikipedia matrix movies"

       robot.add_step(ScrapingStep("Check if there is a visible 'Accept all' or 'decline all' button",
                                   lambda l: l.get_best_element("//button//div[contains(text(), 'All')]").click(),
                                   interval=ScrapingStepInterval.BeforeAnyStep,
                                   error_handling=ScrapingStepErrorHandling.Ignore,
                                   can_execute=lambda l: l.current_url and "google" in l.current_url and l.is_visible("//button//div[contains(text(), 'All')]"),
                                   was_executed=lambda l: not l.is_visible("//button//div[contains(text(), 'All')]"),),
                      step_group="default")

       ...
       # Your scraper Logic here
       ...

       finished_success = robot.run()

       if finished_success:
           print("The robot finished successfully")
       else:
           print("The robot did not finish successfully")

    except Exception as e:
       log.error(e)
    finally:
       driver.quit()


Documentation
~~~~~~~~~~~~~

The documentation is available at https://pyselenscrapr.readthedocs.io/.

License
-------

This project is licensed under the MIT License - see the
`LICENSE.md <LICENSE.md>`__ file for details

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "pyselenscrapr",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.7",
    "maintainer_email": null,
    "keywords": null,
    "author": null,
    "author_email": "donnercody <donnercody86@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/0b/35/d723e0bffd3314626ecec4f3ba90c0e598df406e6788d60b425a94653471/pyselenscrapr-0.0.81.tar.gz",
    "platform": null,
    "description": "PySelenScrapr\n=============\n\nHere is a sample code for a good scraper logic with some validation\nlogic. It helps to build a good scraper with validation steps and repeat\nlogic.\n\nInstallation\n------------\n\n.. code:: bash\n\n    pip install pyselenscrapr\n\n\nUsage\n-----\n\n.. code:: python\n\n    from pyselenscraper.ScrapingBot import ScrapingBot, TakeScreenshotModes\n    from pyselenscraper.ScrapingLogic import ScrapingLogic\n    from pyselenscraper.ScrapingStep import ScrapingStep, ScrapingStepInterval, ScrapingStepErrorHandling\n\n    ...\n    # Initialize selenium driver\n    driver = webdriver.Remote(\"http://localhost:4444/wd/hub\", options=ff_options)\n    ...\n\n    robot = ScrapingBot(driver, take_screenshots_mode=TakeScreenshotModes.Always)\n\n    try:\n\n       search_command = \"wikipedia matrix movies\"\n\n       robot.add_step(ScrapingStep(\"Check if there is a visible 'Accept all' or 'decline all' button\",\n                                   lambda l: l.get_best_element(\"//button//div[contains(text(), 'All')]\").click(),\n                                   interval=ScrapingStepInterval.BeforeAnyStep,\n                                   error_handling=ScrapingStepErrorHandling.Ignore,\n                                   can_execute=lambda l: l.current_url and \"google\" in l.current_url and l.is_visible(\"//button//div[contains(text(), 'All')]\"),\n                                   was_executed=lambda l: not l.is_visible(\"//button//div[contains(text(), 'All')]\"),),\n                      step_group=\"default\")\n\n       ...\n       # Your scraper Logic here\n       ...\n\n       finished_success = robot.run()\n\n       if finished_success:\n           print(\"The robot finished successfully\")\n       else:\n           print(\"The robot did not finish successfully\")\n\n    except Exception as e:\n       log.error(e)\n    finally:\n       driver.quit()\n\n\nDocumentation\n~~~~~~~~~~~~~\n\nThe documentation is available at https://pyselenscrapr.readthedocs.io/.\n\nLicense\n-------\n\nThis project is licensed under the MIT License - see the\n`LICENSE.md <LICENSE.md>`__ file for details\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "A web scraping library for selenium and beautifulsoup",
    "version": "0.0.81",
    "project_urls": null,
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "a9a53eabc40696eaf595fb5da082854c08e1ea7bf72551ca09bae6565462e276",
                "md5": "126c4b35e358b22a7b9c217c53a384b9",
                "sha256": "3faace5e72a66bf22b429878c69835924b8fa712606516f7bad0fe8261f5b5c1"
            },
            "downloads": -1,
            "filename": "pyselenscrapr-0.0.81-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "126c4b35e358b22a7b9c217c53a384b9",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.7",
            "size": 13724,
            "upload_time": "2024-05-21T10:24:26",
            "upload_time_iso_8601": "2024-05-21T10:24:26.901947Z",
            "url": "https://files.pythonhosted.org/packages/a9/a5/3eabc40696eaf595fb5da082854c08e1ea7bf72551ca09bae6565462e276/pyselenscrapr-0.0.81-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "0b35d723e0bffd3314626ecec4f3ba90c0e598df406e6788d60b425a94653471",
                "md5": "a2be10159bf8df646113dea17cd93bcd",
                "sha256": "0b94dccb614fc0652d2f37b5f055ee64d7dda7e216754ead4ae38bf17528bcd4"
            },
            "downloads": -1,
            "filename": "pyselenscrapr-0.0.81.tar.gz",
            "has_sig": false,
            "md5_digest": "a2be10159bf8df646113dea17cd93bcd",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.7",
            "size": 15587,
            "upload_time": "2024-05-21T10:24:28",
            "upload_time_iso_8601": "2024-05-21T10:24:28.021757Z",
            "url": "https://files.pythonhosted.org/packages/0b/35/d723e0bffd3314626ecec4f3ba90c0e598df406e6788d60b425a94653471/pyselenscrapr-0.0.81.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-05-21 10:24:28",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "pyselenscrapr"
}
        
Elapsed time: 0.28748s