```python
#as dataframe
#pip install a-pandas-ex-xml2df
#### UPDATE 08.12.2022 - xpath / snippets
from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df
import pandas as pd
pd_add_read_xml_files()
df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml',add_xpath_and_snippet=True)
print(df[1:6].to_string())
aa_all_keys aa_value aa_file aa_xpath aa_snippet
1 (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/description <description link="http://en.wikipedia.org/wiki/The_King_of_Limbs">
2 (artist, 0, album, 0, song, 0, length) 5:15 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length="5:15" title="Bloom"/>
3 (artist, 0, album, 0, song, 0, title) Bloom https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length="5:15" title="Bloom"/>
4 (artist, 0, album, 0, song, 1, length) 4:41 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length="4:41" title="Morning Mr Magpie"/>
5 (artist, 0, album, 0, song, 1, title) Morning Mr Magpie https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length="4:41" title="Morning Mr Magpie"/>
from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df
import pandas as pd
pd_add_read_xml_files()
df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')
pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')
Out[4]:
aa_all_keys aa_value
level_0 level_1 level_2 level_3 level_4 level_5 level_6
artist 0 album 0.0 description description NaN (artist, 0, album, 0, description, description) \n\tThe King of Limbs is the eighth studio alb...
link NaN (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs
song 0 length (artist, 0, album, 0, song, 0, length) 5:15
title (artist, 0, album, 0, song, 0, title) Bloom
1 length (artist, 0, album, 0, song, 1, length) 4:41
... ...
1 album 1.0 song 9 title (artist, 1, album, 1, song, 9, title) Magic Doors
10 length (artist, 1, album, 1, song, 10, length) 5:45
title (artist, 1, album, 1, song, 10, title) Threads
title NaN NaN (artist, 1, album, 1, title) Third
name NaN NaN NaN NaN (artist, 1, name) Portishead
[98 rows x 2 columns]
#dataframe and dict
xmlfileorstrin11 = r"C:\Users\Gamer\Documents\Downloads\00000001_untouched.xml"
link='https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml'
uu1=xml_to_dict(xmlfileorstrin11)
uu11=xml_to_df(xmlfileorstrin11)
with open(xmlfileorstrin11,encoding='utf-8') as f:
xmlfileorstring = f.read()
uu2=xml_to_dict(xmlfileorstrin11)
uu22=xml_to_df(xmlfileorstrin11)
uu3=xml_to_dict(link)
uu33=xml_to_df(link)
uu1
Out[12]:
{'folder': 'data',
'filename': '00000001_untouched.png',
'path': None,
'source': {'database': 'Unknown'},
'size': {'width': 1920, 'height': 1080, 'depth': 3},
'segmented': 0,
'object': [{'name': 'search_bar',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 753, 'ymin': 8, 'xmax': 1172, 'ymax': 52}},
{'name': 'home_text',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 42, 'ymin': 5, 'xmax': 158, 'ymax': 55}},
{'name': 'add_friends',
'pose': 'Unspecified',
'truncated': 0,
'occluded': 0,
'difficult': 0,
'bndbox': {'xmin': 44, 'ymin': 185, 'xmax': 152, 'ymax': 310}}]}
uu11
Out[14]:
aa_all_keys aa_value
level_0 level_1 level_2 level_3
filename NaN NaN NaN (filename,) 00000001_untouched.png
folder NaN NaN NaN (folder,) data
object 0 bndbox xmax (object, 0, bndbox, xmax) 1172
xmin (object, 0, bndbox, xmin) 753
ymax (object, 0, bndbox, ymax) 52
ymin (object, 0, bndbox, ymin) 8
difficult NaN (object, 0, difficult) 0
name NaN (object, 0, name) search_bar
occluded NaN (object, 0, occluded) 0
pose NaN (object, 0, pose) Unspecified
truncated NaN (object, 0, truncated) 0
1 bndbox xmax (object, 1, bndbox, xmax) 158
xmin (object, 1, bndbox, xmin) 42
ymax (object, 1, bndbox, ymax) 55
ymin (object, 1, bndbox, ymin) 5
difficult NaN (object, 1, difficult) 0
name NaN (object, 1, name) home_text
occluded NaN (object, 1, occluded) 0
pose NaN (object, 1, pose) Unspecified
truncated NaN (object, 1, truncated) 0
2 bndbox xmax (object, 2, bndbox, xmax) 152
xmin (object, 2, bndbox, xmin) 44
ymax (object, 2, bndbox, ymax) 310
ymin (object, 2, bndbox, ymin) 185
difficult NaN (object, 2, difficult) 0
name NaN (object, 2, name) add_friends
occluded NaN (object, 2, occluded) 0
pose NaN (object, 2, pose) Unspecified
truncated NaN (object, 2, truncated) 0
path NaN NaN NaN (path,) None
segmented NaN NaN NaN (segmented,) 0
size depth NaN NaN (size, depth) 3
height NaN NaN (size, height) 1080
width NaN NaN (size, width) 1920
source database NaN NaN (source, database) Unknown
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/a_pandas_ex_xml2df",
"name": "a-pandas-ex-xml2df",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "xml,DataFrame,dict,pandas,XML",
"author": "Johannes Fischer",
"author_email": "<aulasparticularesdealemaosp@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/ce/34/5e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189/a_pandas_ex_xml2df-0.13.tar.gz",
"platform": null,
"description": "\n```python\n\n#as dataframe\n\n#pip install a-pandas-ex-xml2df\n\n\n\n#### UPDATE 08.12.2022 - xpath / snippets \n\n\n\nfrom a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df\n\nimport pandas as pd\n\npd_add_read_xml_files()\n\ndf=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml',add_xpath_and_snippet=True)\n\nprint(df[1:6].to_string())\n\n aa_all_keys aa_value aa_file aa_xpath aa_snippet\n\n1 (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/description <description link=\"http://en.wikipedia.org/wiki/The_King_of_Limbs\">\n\n2 (artist, 0, album, 0, song, 0, length) 5:15 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length=\"5:15\" title=\"Bloom\"/>\n\n3 (artist, 0, album, 0, song, 0, title) Bloom https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[1] <song length=\"5:15\" title=\"Bloom\"/>\n\n4 (artist, 0, album, 0, song, 1, length) 4:41 https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length=\"4:41\" title=\"Morning Mr Magpie\"/>\n\n5 (artist, 0, album, 0, song, 1, title) Morning Mr Magpie https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml //artist[1]/album[1]/song[2] <song length=\"4:41\" title=\"Morning Mr Magpie\"/>\n\n\n\n \n\n\n\nfrom a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df\n\nimport pandas as pd\n\npd_add_read_xml_files()\n\ndf=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')\n\n\n\npd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')\n\nOut[4]: \n\n aa_all_keys aa_value\n\nlevel_0 level_1 level_2 level_3 level_4 level_5 level_6 \n\nartist 0 album 0.0 description description NaN (artist, 0, album, 0, description, description) \\n\\tThe King of Limbs is the eighth studio alb...\n\n link NaN (artist, 0, album, 0, description, link) http://en.wikipedia.org/wiki/The_King_of_Limbs\n\n song 0 length (artist, 0, album, 0, song, 0, length) 5:15\n\n title (artist, 0, album, 0, song, 0, title) Bloom\n\n 1 length (artist, 0, album, 0, song, 1, length) 4:41\n\n ... ...\n\n 1 album 1.0 song 9 title (artist, 1, album, 1, song, 9, title) Magic Doors\n\n 10 length (artist, 1, album, 1, song, 10, length) 5:45\n\n title (artist, 1, album, 1, song, 10, title) Threads\n\n title NaN NaN (artist, 1, album, 1, title) Third\n\n name NaN NaN NaN NaN (artist, 1, name) Portishead\n\n[98 rows x 2 columns]\n\n\n\n\n\n\n\n#dataframe and dict\n\nxmlfileorstrin11 = r\"C:\\Users\\Gamer\\Documents\\Downloads\\00000001_untouched.xml\"\n\nlink='https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml'\n\n\n\nuu1=xml_to_dict(xmlfileorstrin11)\n\nuu11=xml_to_df(xmlfileorstrin11)\n\n\n\nwith open(xmlfileorstrin11,encoding='utf-8') as f:\n\n xmlfileorstring = f.read()\n\nuu2=xml_to_dict(xmlfileorstrin11)\n\nuu22=xml_to_df(xmlfileorstrin11)\n\n\n\nuu3=xml_to_dict(link)\n\nuu33=xml_to_df(link)\n\n\n\nuu1\n\nOut[12]: \n\n{'folder': 'data',\n\n 'filename': '00000001_untouched.png',\n\n 'path': None,\n\n 'source': {'database': 'Unknown'},\n\n 'size': {'width': 1920, 'height': 1080, 'depth': 3},\n\n 'segmented': 0,\n\n 'object': [{'name': 'search_bar',\n\n 'pose': 'Unspecified',\n\n 'truncated': 0,\n\n 'occluded': 0,\n\n 'difficult': 0,\n\n 'bndbox': {'xmin': 753, 'ymin': 8, 'xmax': 1172, 'ymax': 52}},\n\n {'name': 'home_text',\n\n 'pose': 'Unspecified',\n\n 'truncated': 0,\n\n 'occluded': 0,\n\n 'difficult': 0,\n\n 'bndbox': {'xmin': 42, 'ymin': 5, 'xmax': 158, 'ymax': 55}},\n\n {'name': 'add_friends',\n\n 'pose': 'Unspecified',\n\n 'truncated': 0,\n\n 'occluded': 0,\n\n 'difficult': 0,\n\n 'bndbox': {'xmin': 44, 'ymin': 185, 'xmax': 152, 'ymax': 310}}]}\n\n \n\nuu11\n\nOut[14]: \n\n aa_all_keys aa_value\n\nlevel_0 level_1 level_2 level_3 \n\nfilename NaN NaN NaN (filename,) 00000001_untouched.png\n\nfolder NaN NaN NaN (folder,) data\n\nobject 0 bndbox xmax (object, 0, bndbox, xmax) 1172\n\n xmin (object, 0, bndbox, xmin) 753\n\n ymax (object, 0, bndbox, ymax) 52\n\n ymin (object, 0, bndbox, ymin) 8\n\n difficult NaN (object, 0, difficult) 0\n\n name NaN (object, 0, name) search_bar\n\n occluded NaN (object, 0, occluded) 0\n\n pose NaN (object, 0, pose) Unspecified\n\n truncated NaN (object, 0, truncated) 0\n\n 1 bndbox xmax (object, 1, bndbox, xmax) 158\n\n xmin (object, 1, bndbox, xmin) 42\n\n ymax (object, 1, bndbox, ymax) 55\n\n ymin (object, 1, bndbox, ymin) 5\n\n difficult NaN (object, 1, difficult) 0\n\n name NaN (object, 1, name) home_text\n\n occluded NaN (object, 1, occluded) 0\n\n pose NaN (object, 1, pose) Unspecified\n\n truncated NaN (object, 1, truncated) 0\n\n 2 bndbox xmax (object, 2, bndbox, xmax) 152\n\n xmin (object, 2, bndbox, xmin) 44\n\n ymax (object, 2, bndbox, ymax) 310\n\n ymin (object, 2, bndbox, ymin) 185\n\n difficult NaN (object, 2, difficult) 0\n\n name NaN (object, 2, name) add_friends\n\n occluded NaN (object, 2, occluded) 0\n\n pose NaN (object, 2, pose) Unspecified\n\n truncated NaN (object, 2, truncated) 0\n\npath NaN NaN NaN (path,) None\n\nsegmented NaN NaN NaN (segmented,) 0\n\nsize depth NaN NaN (size, depth) 3\n\n height NaN NaN (size, height) 1080\n\n width NaN NaN (size, width) 1920\n\nsource database NaN NaN (source, database) Unknown \n\n\n\n\n\n```\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "nested XML to dict/DataFrame",
"version": "0.13",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/a_pandas_ex_xml2df"
},
"split_keywords": [
"xml",
"dataframe",
"dict",
"pandas",
"xml"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "1edca15037d6e68249cbbcd70a7e15e2287efb50b960516ddb245eae4b0d983e",
"md5": "74007d6bd9cd2efa98320a52c1cf903b",
"sha256": "c3b453eef024d881647fcb794fde5faa136219f115ebf18764abad95e63aafc8"
},
"downloads": -1,
"filename": "a_pandas_ex_xml2df-0.13-py3-none-any.whl",
"has_sig": false,
"md5_digest": "74007d6bd9cd2efa98320a52c1cf903b",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 8345,
"upload_time": "2022-12-08T14:13:51",
"upload_time_iso_8601": "2022-12-08T14:13:51.447374Z",
"url": "https://files.pythonhosted.org/packages/1e/dc/a15037d6e68249cbbcd70a7e15e2287efb50b960516ddb245eae4b0d983e/a_pandas_ex_xml2df-0.13-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "ce345e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189",
"md5": "4d5398abea2b1d581adfce784e4f4342",
"sha256": "1d4fe4cccb792cb74a69530911b095ab658e3a87f882989698275dbda5ec9bed"
},
"downloads": -1,
"filename": "a_pandas_ex_xml2df-0.13.tar.gz",
"has_sig": false,
"md5_digest": "4d5398abea2b1d581adfce784e4f4342",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 7403,
"upload_time": "2022-12-08T14:13:53",
"upload_time_iso_8601": "2022-12-08T14:13:53.154852Z",
"url": "https://files.pythonhosted.org/packages/ce/34/5e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189/a_pandas_ex_xml2df-0.13.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-12-08 14:13:53",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "a_pandas_ex_xml2df",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "a_pandas_ex_plode_tool",
"specs": []
},
{
"name": "nestednop",
"specs": []
},
{
"name": "pandas",
"specs": []
},
{
"name": "requests",
"specs": []
}
],
"lcname": "a-pandas-ex-xml2df"
}