# html-to-dash
Convert HTML to dash format.
# Installation
`pip install html-to-dash`
# Examples
## Basic usage
```python
from html_to_dash import parse_html
element_str = """
<div>
<div class='bg-gray-800' style='color:red;margin:10px'>
<svg aria-label="Ripples. Logo" role="img" xmlns="http://www.w3.org/2000/svg"></svg>
<a href="#" id="link1">A</a>
</div>
<div>text</div>
<div><a href="#" id="link1">a1</a>tail1<a href="#" id="link2">a2</a>tail2</div>
</div>
"""
parse_html(element_str)
```
Print:
```
# Tags : Unsupported [svg] removed.
Result:
html.Div(
children=[
html.Div(
className="bg-gray-800",
style={"color": "red", "margin": "10px"},
children=[html.A(href="#", id="link1", children=["A"])],
),
html.Div(children=["text"]),
html.Div(
children=[
html.A(href="#", id="link1", children=["a1"]),
html.Span(children=["tail1"]),
html.A(href="#", id="link2", children=["a2"]),
html.Span(children=["tail2"]),
]
),
]
)
```
- By default, only tags in the dash.html module are supported.
- Tags and attributes are checked, and those that are not supported are automatically removed.
- The tags and attributes are case-insensitive.
- If the provided HTML string is unclosed, div will be automatically added as the root tag.
- The html, body, and head tags will be automatically removed without notification, as these tags may be automatically supplemented by the lxml module and are not supported in dash.
- The tail(Text after element's end tag, but before the next sibling element's start tag) will automatically be converted into the text of a span tag.
## Enable dash_svg
Use [dash-svg](https://github.com/stevej2608/dash-svg) module to render SVG tags.
```python
from html_to_dash import parse_html
element_str = """
<svg xmlns=" http://www.w3.org/2000/svg " version="1.1" width="300" height="300">
<rect x="100" y="100" width="100" height="100" fill="#e74c3c"></rect>
<polygon points="100,100 200,100 150,50" fill="#c0392b"></polygon>
<polygon points="200,100 200,200 250,150" fill="#f39c12"></polygon>
<polygon points="100,100 150,50 150,150 100,200" fill="#f1c40f"></polygon>
<polygon points="150,50 200,100 250,50 200,0" fill="#2ecc71"></polygon>
<polygon points="100,200 150,150 200,200 150,250" fill="#3498db"></polygon>
</svg>
"""
parse_html(element_str, enable_dash_svg=True)
```
Print:
```
Result:
dash_svg.Svg(
xmlns=" http://www.w3.org/2000/svg ",
version="1.1",
width="300",
height="300",
children=[
dash_svg.Rect(x="100", y="100", width="100", height="100", fill="#e74c3c"),
dash_svg.Polygon(points="100,100 200,100 150,50", fill="#c0392b"),
dash_svg.Polygon(points="200,100 200,200 250,150", fill="#f39c12"),
dash_svg.Polygon(points="100,100 150,50 150,150 100,200", fill="#f1c40f"),
dash_svg.Polygon(points="150,50 200,100 250,50 200,0", fill="#2ecc71"),
dash_svg.Polygon(points="100,200 150,150 200,200 150,250", fill="#3498db"),
],
)
```
- In the dash application, `import dash_svg` module will render normally.
- The dash_svg has higher priority than dash.html, but lower priority than extra module.
## Expanded usage
```python
from html_to_dash import parse_html
element_str = """
<html>
<body>
<div>
<input type="text" id="username" name="username" aria-label="Enter your username" aria-required="true">
<div class='bg-gray-800' style='color:red;margin:10px'>
<a href="#" id="link1">A</a>
</div>
<div>text</div>
<svg></svg>
<script></script>
<div><a href="#" id="link2">B</a></div>
</div>
</body>
</html>
"""
extra_mod = [{"dcc": {"Input": ["id", "type", "placeholder", "aria-*"]}}]
def tag_attr_func(tag, items):
if tag == "Input":
k, v = items
if "-" in k:
return f'**{{"{k}": "{v}"}}'
parsed_ret = parse_html(
element_str,
tag_map={"svg": "img"},
skip_tags=['script'],
extra_mod=extra_mod,
tag_attr_func=tag_attr_func,
if_return=True,
)
print(parsed_ret)
```
Print:
```
# Tags : Unsupported [script] removed.
# Attrs: Unsupported [name] in dcc.Input removed.
html.Div(
children=[
dcc.Input(
type="text",
id="username",
**{"aria-label": "Enter your username"},
**{"aria-required": "true"}
),
html.Div(
className="bg-gray-800",
style={"color": "red", "margin": "10px"},
children=[html.A(href="#", id="link1", children=["A"])],
),
html.Div(children=["text"]),
html.Img(),
html.Div(children=[html.A(href="#", id="link2", children=["B"])]),
]
)
```
- The \* sign is supported as a wildcard, like data-\*, aria-\*.
- Both class and className can be handled correctly.
- In fact, attributes with the "-" symbol are processed by default, which is only used here as an example. Similarly, the style attribute can be handled correctly.
- If tag_map param is provided, will convert the corresponding tag names in the HTML based on the dict content before formal processing.
- Tag in skip_tags will remove itself and its text.The priority of tag_map is higher than skip_tags.
- Supports any custom module, not limited to HTML and DCC. Essentially, it is the processing of strings.
- Custom module prioritize in order and above the default dash.html module.
- The tag_attr_func param is a function that handle attribute formatting under the tag.
When adding quotation marks within a string, `double quotation marks` should be added to avoid the black module being unable to parse.
For example,`f'**{{"{k}": "{v}"}}'` instead of `f"**{{'{k}': '{v}'}}"`、`f'{k}="{v}"'` instead of `f"{k}='{v}'"`
- If the HTML structure is huge, set huge_tree to True.
# References
- https://github.com/mhowell86/convert-html-to-dash
- https://github.com/xhluca/convert-html-to-dash
Raw data
{
"_id": null,
"home_page": "https://github.com/PierXuY/html-to-dash",
"name": "html-to-dash",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.6",
"maintainer_email": "",
"keywords": "html,dash",
"author": "PierXuY",
"author_email": "",
"download_url": "https://files.pythonhosted.org/packages/38/a6/4523a71ffa90f4cae32d3c81c9da6ee20991014722fa3741f3c880558db2/html-to-dash-0.2.7.tar.gz",
"platform": null,
"description": "# html-to-dash\r\n\r\nConvert HTML to dash format.\r\n\r\n\r\n\r\n# Installation\r\n\r\n`pip install html-to-dash`\r\n\r\n\r\n\r\n# Examples\r\n\r\n## Basic usage\r\n\r\n```python\r\n\r\nfrom html_to_dash import parse_html\r\n\r\nelement_str = \"\"\"\r\n\r\n<div>\r\n\r\n <div class='bg-gray-800' style='color:red;margin:10px'>\r\n\r\n <svg aria-label=\"Ripples. Logo\" role=\"img\" xmlns=\"http://www.w3.org/2000/svg\"></svg>\r\n\r\n <a href=\"#\" id=\"link1\">A</a>\r\n\r\n </div>\r\n\r\n <div>text</div>\r\n\r\n <div><a href=\"#\" id=\"link1\">a1</a>tail1<a href=\"#\" id=\"link2\">a2</a>tail2</div>\r\n\r\n</div>\r\n\r\n\"\"\"\r\n\r\nparse_html(element_str)\r\n\r\n```\r\n\r\nPrint:\r\n\r\n```\r\n\r\n# Tags : Unsupported [svg] removed.\r\n\r\nResult:\r\n\r\nhtml.Div(\r\n\r\n children=[\r\n\r\n html.Div(\r\n\r\n className=\"bg-gray-800\",\r\n\r\n style={\"color\": \"red\", \"margin\": \"10px\"},\r\n\r\n children=[html.A(href=\"#\", id=\"link1\", children=[\"A\"])],\r\n\r\n ),\r\n\r\n html.Div(children=[\"text\"]),\r\n\r\n html.Div(\r\n\r\n children=[\r\n\r\n html.A(href=\"#\", id=\"link1\", children=[\"a1\"]),\r\n\r\n html.Span(children=[\"tail1\"]),\r\n\r\n html.A(href=\"#\", id=\"link2\", children=[\"a2\"]),\r\n\r\n html.Span(children=[\"tail2\"]),\r\n\r\n ]\r\n\r\n ),\r\n\r\n ]\r\n\r\n)\r\n\r\n```\r\n\r\n- By default, only tags in the dash.html module are supported.\r\n\r\n- Tags and attributes are checked, and those that are not supported are automatically removed.\r\n\r\n- The tags and attributes are case-insensitive.\r\n\r\n- If the provided HTML string is unclosed, div will be automatically added as the root tag.\r\n\r\n- The html, body, and head tags will be automatically removed without notification, as these tags may be automatically supplemented by the lxml module and are not supported in dash.\r\n\r\n- The tail(Text after element's end tag, but before the next sibling element's start tag) will automatically be converted into the text of a span tag.\r\n\r\n\r\n\r\n## Enable dash_svg\r\n\r\nUse [dash-svg](https://github.com/stevej2608/dash-svg) module to render SVG tags.\r\n\r\n```python\r\n\r\nfrom html_to_dash import parse_html\r\n\r\n\r\n\r\nelement_str = \"\"\"\r\n\r\n<svg xmlns=\" http://www.w3.org/2000/svg \" version=\"1.1\" width=\"300\" height=\"300\">\r\n\r\n <rect x=\"100\" y=\"100\" width=\"100\" height=\"100\" fill=\"#e74c3c\"></rect>\r\n\r\n <polygon points=\"100,100 200,100 150,50\" fill=\"#c0392b\"></polygon>\r\n\r\n <polygon points=\"200,100 200,200 250,150\" fill=\"#f39c12\"></polygon>\r\n\r\n <polygon points=\"100,100 150,50 150,150 100,200\" fill=\"#f1c40f\"></polygon>\r\n\r\n <polygon points=\"150,50 200,100 250,50 200,0\" fill=\"#2ecc71\"></polygon>\r\n\r\n <polygon points=\"100,200 150,150 200,200 150,250\" fill=\"#3498db\"></polygon>\r\n\r\n</svg>\r\n\r\n\"\"\"\r\n\r\n\r\n\r\nparse_html(element_str, enable_dash_svg=True)\r\n\r\n```\r\n\r\nPrint:\r\n\r\n```\r\n\r\nResult:\r\n\r\ndash_svg.Svg(\r\n\r\n xmlns=\" http://www.w3.org/2000/svg \",\r\n\r\n version=\"1.1\",\r\n\r\n width=\"300\",\r\n\r\n height=\"300\",\r\n\r\n children=[\r\n\r\n dash_svg.Rect(x=\"100\", y=\"100\", width=\"100\", height=\"100\", fill=\"#e74c3c\"),\r\n\r\n dash_svg.Polygon(points=\"100,100 200,100 150,50\", fill=\"#c0392b\"),\r\n\r\n dash_svg.Polygon(points=\"200,100 200,200 250,150\", fill=\"#f39c12\"),\r\n\r\n dash_svg.Polygon(points=\"100,100 150,50 150,150 100,200\", fill=\"#f1c40f\"),\r\n\r\n dash_svg.Polygon(points=\"150,50 200,100 250,50 200,0\", fill=\"#2ecc71\"),\r\n\r\n dash_svg.Polygon(points=\"100,200 150,150 200,200 150,250\", fill=\"#3498db\"),\r\n\r\n ],\r\n\r\n)\r\n\r\n```\r\n\r\n- In the dash application, `import dash_svg` module will render normally.\r\n\r\n- The dash_svg has higher priority than dash.html, but lower priority than extra module.\r\n\r\n\r\n\r\n## Expanded usage\r\n\r\n```python\r\n\r\nfrom html_to_dash import parse_html\r\n\r\nelement_str = \"\"\"\r\n\r\n<html>\r\n\r\n<body>\r\n\r\n<div>\r\n\r\n <input type=\"text\" id=\"username\" name=\"username\" aria-label=\"Enter your username\" aria-required=\"true\">\r\n\r\n <div class='bg-gray-800' style='color:red;margin:10px'>\r\n\r\n <a href=\"#\" id=\"link1\">A</a>\r\n\r\n </div>\r\n\r\n <div>text</div>\r\n\r\n <svg></svg>\r\n\r\n <script></script>\r\n\r\n <div><a href=\"#\" id=\"link2\">B</a></div>\r\n\r\n</div>\r\n\r\n</body>\r\n\r\n</html>\r\n\r\n\"\"\"\r\n\r\n\r\n\r\nextra_mod = [{\"dcc\": {\"Input\": [\"id\", \"type\", \"placeholder\", \"aria-*\"]}}]\r\n\r\n\r\n\r\ndef tag_attr_func(tag, items):\r\n\r\n if tag == \"Input\":\r\n\r\n k, v = items\r\n\r\n if \"-\" in k:\r\n\r\n return f'**{{\"{k}\": \"{v}\"}}'\r\n\r\n\r\n\r\nparsed_ret = parse_html(\r\n\r\n element_str,\r\n\r\n tag_map={\"svg\": \"img\"},\r\n\r\n skip_tags=['script'],\r\n\r\n extra_mod=extra_mod,\r\n\r\n tag_attr_func=tag_attr_func,\r\n\r\n if_return=True,\r\n\r\n)\r\n\r\nprint(parsed_ret)\r\n\r\n```\r\n\r\nPrint:\r\n\r\n```\r\n\r\n# Tags : Unsupported [script] removed.\r\n\r\n# Attrs: Unsupported [name] in dcc.Input removed.\r\n\r\nhtml.Div(\r\n\r\n children=[\r\n\r\n dcc.Input(\r\n\r\n type=\"text\",\r\n\r\n id=\"username\",\r\n\r\n **{\"aria-label\": \"Enter your username\"},\r\n\r\n **{\"aria-required\": \"true\"}\r\n\r\n ),\r\n\r\n html.Div(\r\n\r\n className=\"bg-gray-800\",\r\n\r\n style={\"color\": \"red\", \"margin\": \"10px\"},\r\n\r\n children=[html.A(href=\"#\", id=\"link1\", children=[\"A\"])],\r\n\r\n ),\r\n\r\n html.Div(children=[\"text\"]),\r\n\r\n html.Img(),\r\n\r\n html.Div(children=[html.A(href=\"#\", id=\"link2\", children=[\"B\"])]),\r\n\r\n ]\r\n\r\n)\r\n\r\n```\r\n\r\n- The \\* sign is supported as a wildcard, like data-\\*, aria-\\*.\r\n\r\n- Both class and className can be handled correctly.\r\n\r\n- In fact, attributes with the \"-\" symbol are processed by default, which is only used here as an example. Similarly, the style attribute can be handled correctly.\r\n\r\n- If tag_map param is provided, will convert the corresponding tag names in the HTML based on the dict content before formal processing.\r\n\r\n- Tag in skip_tags will remove itself and its text.The priority of tag_map is higher than skip_tags.\r\n\r\n- Supports any custom module, not limited to HTML and DCC. Essentially, it is the processing of strings.\r\n\r\n- Custom module prioritize in order and above the default dash.html module.\r\n\r\n- The tag_attr_func param is a function that handle attribute formatting under the tag. \r\n\r\n When adding quotation marks within a string, `double quotation marks` should be added to avoid the black module being unable to parse. \r\n\r\n For example,`f'**{{\"{k}\": \"{v}\"}}'` instead of `f\"**{{'{k}': '{v}'}}\"`\u3001`f'{k}=\"{v}\"'` instead of `f\"{k}='{v}'\"`\r\n\r\n- If the HTML structure is huge, set huge_tree to True.\r\n\r\n\r\n\r\n# References\r\n\r\n- https://github.com/mhowell86/convert-html-to-dash\r\n\r\n- https://github.com/xhluca/convert-html-to-dash\r\n\r\n",
"bugtrack_url": null,
"license": "",
"summary": "Convert HTML to dash format.",
"version": "0.2.7",
"project_urls": {
"Homepage": "https://github.com/PierXuY/html-to-dash"
},
"split_keywords": [
"html",
"dash"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "b89b9ce764bd762aaecbab16da0dada18d971a27e5eff34fe16d77ea7dfb01f3",
"md5": "dd64f6749c3414fae53a37d7d2ce6231",
"sha256": "c8f58852c21aa771f5c70dfefe40d469e0a38bc764e3e025596dcd70d80d5e4c"
},
"downloads": -1,
"filename": "html_to_dash-0.2.7-py3-none-any.whl",
"has_sig": false,
"md5_digest": "dd64f6749c3414fae53a37d7d2ce6231",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.6",
"size": 12035,
"upload_time": "2023-10-11T13:47:32",
"upload_time_iso_8601": "2023-10-11T13:47:32.596828Z",
"url": "https://files.pythonhosted.org/packages/b8/9b/9ce764bd762aaecbab16da0dada18d971a27e5eff34fe16d77ea7dfb01f3/html_to_dash-0.2.7-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "38a64523a71ffa90f4cae32d3c81c9da6ee20991014722fa3741f3c880558db2",
"md5": "554fdc34b4aabdaa122e3192e281ed96",
"sha256": "f014a567fb199249459f3662766d5804484036068c6ff6abd2571007f4b263b8"
},
"downloads": -1,
"filename": "html-to-dash-0.2.7.tar.gz",
"has_sig": false,
"md5_digest": "554fdc34b4aabdaa122e3192e281ed96",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6",
"size": 13162,
"upload_time": "2023-10-11T13:47:34",
"upload_time_iso_8601": "2023-10-11T13:47:34.381964Z",
"url": "https://files.pythonhosted.org/packages/38/a6/4523a71ffa90f4cae32d3c81c9da6ee20991014722fa3741f3c880558db2/html-to-dash-0.2.7.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-10-11 13:47:34",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "PierXuY",
"github_project": "html-to-dash",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "dash",
"specs": []
},
{
"name": "dash_svg",
"specs": []
},
{
"name": "black",
"specs": [
[
"~=",
"23.7.0"
]
]
},
{
"name": "lxml",
"specs": [
[
"~=",
"4.9.2"
]
]
},
{
"name": "setuptools",
"specs": [
[
"~=",
"68.2.2"
]
]
},
{
"name": "cssutils",
"specs": [
[
"~=",
"2.7.1"
]
]
}
],
"lcname": "html-to-dash"
}