# pyhtmltext
pyhtmltext is a usefull and flexible tool for extracting text from html.
# Help
See [documentation](docs/USAGE.md) for more details.
# Installation
```
pip install pyhtmltext
```
# Simple usage
```
from pyhtmltext import Extractor
html_string = '''<h2 class="widget-title"><span aria-hidden="true" class="icon-get-started"></span>Getting Started</h2><p>Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages. The following pages are a useful first step to get on your way writing programs with Python!</p>'''
extractor = Extractor(html=html_string)
# Extracting whole text from html with separator
extractor.extract_text()
#> "Getting Started|separator|Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages. The following pages are a useful first step to get on your way writing programs with Python!"
# Extracting sentences from html
extractor.extract_sentences()
#> ['Getting Started', "Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages.", 'The following pages are a useful first step to get on your way writing programs with Python!']
```
Raw data
{
"_id": null,
"home_page": "https://github.com/MaksimJames/pyhtmltext",
"name": "pyhtmltext",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.6",
"maintainer_email": "",
"keywords": "",
"author": "Maksim Prilepsky",
"author_email": "maksimprilepsky@yandex.ru",
"download_url": "https://files.pythonhosted.org/packages/6a/4e/cbccf033f2a8b7c4f77d85dd4d14e450a4722d8c49753d79d95e6a6f8eb9/pyhtmltext-0.1.tar.gz",
"platform": null,
"description": "\n# pyhtmltext\n\npyhtmltext is a usefull and flexible tool for extracting text from html.\n\n# Help\nSee [documentation](docs/USAGE.md) for more details.\n\n# Installation\n```\n pip install pyhtmltext\n```\n\n# Simple usage\n```\n from pyhtmltext import Extractor\n\n\n html_string = '''<h2 class=\"widget-title\"><span aria-hidden=\"true\" class=\"icon-get-started\"></span>Getting Started</h2><p>Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages. The following pages are a useful first step to get on your way writing programs with Python!</p>'''\n\n extractor = Extractor(html=html_string)\n\n # Extracting whole text from html with separator\n extractor.extract_text()\n #> \"Getting Started|separator|Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages. The following pages are a useful first step to get on your way writing programs with Python!\"\n\n # Extracting sentences from html\n extractor.extract_sentences()\n #> ['Getting Started', \"Python can be easy to pick up whether you're a first time programmer or you're experienced with other languages.\", 'The following pages are a useful first step to get on your way writing programs with Python!']\n```\n",
"bugtrack_url": null,
"license": "",
"summary": "Usefull tool for extracting text and sentences from html",
"version": "0.1",
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "085d0feba8103f73308409baba425f471f763087d281700a5384cd181bc7709d",
"md5": "5c1f97afc56a4e90a93d87eebcaa672a",
"sha256": "18b1f4d528ca9eb4bf09d789f82cfecc82e35436961284228b5639d39e1e588d"
},
"downloads": -1,
"filename": "pyhtmltext-0.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "5c1f97afc56a4e90a93d87eebcaa672a",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.6",
"size": 5237,
"upload_time": "2023-01-03T12:56:26",
"upload_time_iso_8601": "2023-01-03T12:56:26.817771Z",
"url": "https://files.pythonhosted.org/packages/08/5d/0feba8103f73308409baba425f471f763087d281700a5384cd181bc7709d/pyhtmltext-0.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "6a4ecbccf033f2a8b7c4f77d85dd4d14e450a4722d8c49753d79d95e6a6f8eb9",
"md5": "77f3e76aa4e016afb24ed286b04755bc",
"sha256": "b7dfd1cf60227b3072a6cbd73e7d67085d7ca8022e42f95ae16a39911053927d"
},
"downloads": -1,
"filename": "pyhtmltext-0.1.tar.gz",
"has_sig": false,
"md5_digest": "77f3e76aa4e016afb24ed286b04755bc",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6",
"size": 4475,
"upload_time": "2023-01-03T12:56:28",
"upload_time_iso_8601": "2023-01-03T12:56:28.468591Z",
"url": "https://files.pythonhosted.org/packages/6a/4e/cbccf033f2a8b7c4f77d85dd4d14e450a4722d8c49753d79d95e6a6f8eb9/pyhtmltext-0.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-01-03 12:56:28",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "MaksimJames",
"github_project": "pyhtmltext",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "pyhtmltext"
}