# scrapetools
A collection of tools to aid in web scraping.<br>
Install using:
<pre>pip install scrapeTools</pre>
scrapeTools contains four modules: emailScraper, linkScraper, phoneScraper, and inputScraper.<br>
Only linkScraper contains a class.<br>
<br>
Basic usage:<br>
<pre>
from scrapeTools.emailScraper import scrapeEmails
from scrapeTools.phoneScraper import scrapePhoneNumbers
from scrapeTools.linkScraper import LinkScraper
from scrapeTools.inputScraper import scrapeInputs
import requests
url = 'https://somewebsite.com'
source = requests.get(url).text
emails = scrapeEmails(source)
phoneNumbers = scrapePhoneNumbers(source)
linkScraper = LinkScraper(source, url)
linkScraper.scrapePage()
# links can be accessed and filtered via the getLinks() function
sameSiteLinks = linkScraper.getLinks(sameSiteOnly=True)
sameSiteImageLinks =linkScraper.getLinks(linkType='img', sameSiteOnly=True)
externalImageLinks = linkScraper.getLinks(linkType='img', excludedLinks=sameSiteImageLinks)
# scrapeInputs() returns a tuple of BeautifulSoup Tag elements for various user input elements
forms, inputs, buttons, selects, textAreas = scrapeInputs(source)
</pre>
Raw data
{
"_id": null,
"home_page": "",
"name": "scrapeTools",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": "",
"keywords": "email,html,scrape,scraping,web,webscraping",
"author": "Matt Manes",
"author_email": "",
"download_url": "https://files.pythonhosted.org/packages/5e/ed/8fa15224ae1efea718972cfaffa381ccfcb0c2cc1275dc365f17b1ef9fad/scrapetools-0.2.1.tar.gz",
"platform": null,
"description": "# scrapetools\nA collection of tools to aid in web scraping.<br>\nInstall using:\n<pre>pip install scrapeTools</pre>\nscrapeTools contains four modules: emailScraper, linkScraper, phoneScraper, and inputScraper.<br>\nOnly linkScraper contains a class.<br>\n<br>\nBasic usage:<br>\n<pre>\nfrom scrapeTools.emailScraper import scrapeEmails\nfrom scrapeTools.phoneScraper import scrapePhoneNumbers\nfrom scrapeTools.linkScraper import LinkScraper\nfrom scrapeTools.inputScraper import scrapeInputs\nimport requests\n\nurl = 'https://somewebsite.com'\nsource = requests.get(url).text\n\nemails = scrapeEmails(source)\n\nphoneNumbers = scrapePhoneNumbers(source)\n\nlinkScraper = LinkScraper(source, url)\nlinkScraper.scrapePage()\n# links can be accessed and filtered via the getLinks() function\nsameSiteLinks = linkScraper.getLinks(sameSiteOnly=True)\nsameSiteImageLinks =linkScraper.getLinks(linkType='img', sameSiteOnly=True)\nexternalImageLinks = linkScraper.getLinks(linkType='img', excludedLinks=sameSiteImageLinks)\n\n# scrapeInputs() returns a tuple of BeautifulSoup Tag elements for various user input elements\nforms, inputs, buttons, selects, textAreas = scrapeInputs(source)\n</pre>\n",
"bugtrack_url": null,
"license": "",
"summary": "A collection of tools to aid in web scraping.",
"version": "0.2.1",
"split_keywords": [
"email",
"html",
"scrape",
"scraping",
"web",
"webscraping"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "29997cfad4973e871e77527c9f50a045b8de6c6b3c83f1dfce10ee76ae18fa75",
"md5": "f8b4bb7167ebd3439b25a33eaecdf969",
"sha256": "0d2906b18390b0f32e6a721d12847eb7be6bbf111d99509cbd642c0a61cae904"
},
"downloads": -1,
"filename": "scrapetools-0.2.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "f8b4bb7167ebd3439b25a33eaecdf969",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 8282,
"upload_time": "2023-01-18T22:56:45",
"upload_time_iso_8601": "2023-01-18T22:56:45.786857Z",
"url": "https://files.pythonhosted.org/packages/29/99/7cfad4973e871e77527c9f50a045b8de6c6b3c83f1dfce10ee76ae18fa75/scrapetools-0.2.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "5eed8fa15224ae1efea718972cfaffa381ccfcb0c2cc1275dc365f17b1ef9fad",
"md5": "7d6d79db64895870141560d7e52da94a",
"sha256": "7b266d02447f2d1f1d662f17d089d02b290c77278cb2372bdd7bc1f38e204cbc"
},
"downloads": -1,
"filename": "scrapetools-0.2.1.tar.gz",
"has_sig": false,
"md5_digest": "7d6d79db64895870141560d7e52da94a",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 21074,
"upload_time": "2023-01-18T22:56:47",
"upload_time_iso_8601": "2023-01-18T22:56:47.457856Z",
"url": "https://files.pythonhosted.org/packages/5e/ed/8fa15224ae1efea718972cfaffa381ccfcb0c2cc1275dc365f17b1ef9fad/scrapetools-0.2.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-01-18 22:56:47",
"github": false,
"gitlab": false,
"bitbucket": false,
"lcname": "scrapetools"
}