a-pandas-ex-xml2df


Namea-pandas-ex-xml2df JSON
Version 0.13 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/a_pandas_ex_xml2df
Summarynested XML to dict/DataFrame
upload_time2022-12-08 14:13:53
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords xml dataframe dict pandas xml
VCS
bugtrack_url
requirements a_pandas_ex_plode_tool nestednop pandas requests
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
```python

#as dataframe

#pip install a-pandas-ex-xml2df



#### UPDATE 08.12.2022 - xpath / snippets 



from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df

import pandas as pd

pd_add_read_xml_files()

df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml',add_xpath_and_snippet=True)

print(df[1:6].to_string())

                                aa_all_keys                                        aa_value                                                                                                           aa_file                          aa_xpath                                                              aa_snippet

1  (artist, 0, album, 0, description, link)  http://en.wikipedia.org/wiki/The_King_of_Limbs  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml  //artist[1]/album[1]/description     <description link="http://en.wikipedia.org/wiki/The_King_of_Limbs">

2    (artist, 0, album, 0, song, 0, length)                                            5:15  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[1]                                     <song length="5:15" title="Bloom"/>

3     (artist, 0, album, 0, song, 0, title)                                           Bloom  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[1]                                     <song length="5:15" title="Bloom"/>

4    (artist, 0, album, 0, song, 1, length)                                            4:41  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[2]                         <song length="4:41" title="Morning Mr Magpie"/>

5     (artist, 0, album, 0, song, 1, title)                               Morning Mr Magpie  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[2]                         <song length="4:41" title="Morning Mr Magpie"/>



 



from a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df

import pandas as pd

pd_add_read_xml_files()

df=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')



pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')

Out[4]: 

                                                                                                     aa_all_keys                                           aa_value

level_0 level_1 level_2 level_3 level_4     level_5     level_6                                                                                                    

artist  0       album   0.0     description description NaN      (artist, 0, album, 0, description, description)  \n\tThe King of Limbs is the eighth studio alb...

                                            link        NaN             (artist, 0, album, 0, description, link)     http://en.wikipedia.org/wiki/The_King_of_Limbs

                                song        0           length            (artist, 0, album, 0, song, 0, length)                                               5:15

                                                        title              (artist, 0, album, 0, song, 0, title)                                              Bloom

                                            1           length            (artist, 0, album, 0, song, 1, length)                                               4:41

                                                                                                          ...                                                ...

        1       album   1.0     song        9           title              (artist, 1, album, 1, song, 9, title)                                        Magic Doors

                                            10          length           (artist, 1, album, 1, song, 10, length)                                               5:45

                                                        title             (artist, 1, album, 1, song, 10, title)                                            Threads

                                title       NaN         NaN                         (artist, 1, album, 1, title)                                              Third

                name    NaN     NaN         NaN         NaN                                    (artist, 1, name)                                         Portishead

[98 rows x 2 columns]







#dataframe and dict

xmlfileorstrin11 = r"C:\Users\Gamer\Documents\Downloads\00000001_untouched.xml"

link='https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml'



uu1=xml_to_dict(xmlfileorstrin11)

uu11=xml_to_df(xmlfileorstrin11)



with open(xmlfileorstrin11,encoding='utf-8') as f:

    xmlfileorstring = f.read()

uu2=xml_to_dict(xmlfileorstrin11)

uu22=xml_to_df(xmlfileorstrin11)



uu3=xml_to_dict(link)

uu33=xml_to_df(link)



uu1

Out[12]: 

{'folder': 'data',

 'filename': '00000001_untouched.png',

 'path': None,

 'source': {'database': 'Unknown'},

 'size': {'width': 1920, 'height': 1080, 'depth': 3},

 'segmented': 0,

 'object': [{'name': 'search_bar',

   'pose': 'Unspecified',

   'truncated': 0,

   'occluded': 0,

   'difficult': 0,

   'bndbox': {'xmin': 753, 'ymin': 8, 'xmax': 1172, 'ymax': 52}},

  {'name': 'home_text',

   'pose': 'Unspecified',

   'truncated': 0,

   'occluded': 0,

   'difficult': 0,

   'bndbox': {'xmin': 42, 'ymin': 5, 'xmax': 158, 'ymax': 55}},

  {'name': 'add_friends',

   'pose': 'Unspecified',

   'truncated': 0,

   'occluded': 0,

   'difficult': 0,

   'bndbox': {'xmin': 44, 'ymin': 185, 'xmax': 152, 'ymax': 310}}]}

   

uu11

Out[14]: 

                                                    aa_all_keys                aa_value

level_0   level_1  level_2   level_3                                                   

filename  NaN      NaN       NaN                    (filename,)  00000001_untouched.png

folder    NaN      NaN       NaN                      (folder,)                    data

object    0        bndbox    xmax     (object, 0, bndbox, xmax)                    1172

                             xmin     (object, 0, bndbox, xmin)                     753

                             ymax     (object, 0, bndbox, ymax)                      52

                             ymin     (object, 0, bndbox, ymin)                       8

                   difficult NaN         (object, 0, difficult)                       0

                   name      NaN              (object, 0, name)              search_bar

                   occluded  NaN          (object, 0, occluded)                       0

                   pose      NaN              (object, 0, pose)             Unspecified

                   truncated NaN         (object, 0, truncated)                       0

          1        bndbox    xmax     (object, 1, bndbox, xmax)                     158

                             xmin     (object, 1, bndbox, xmin)                      42

                             ymax     (object, 1, bndbox, ymax)                      55

                             ymin     (object, 1, bndbox, ymin)                       5

                   difficult NaN         (object, 1, difficult)                       0

                   name      NaN              (object, 1, name)               home_text

                   occluded  NaN          (object, 1, occluded)                       0

                   pose      NaN              (object, 1, pose)             Unspecified

                   truncated NaN         (object, 1, truncated)                       0

          2        bndbox    xmax     (object, 2, bndbox, xmax)                     152

                             xmin     (object, 2, bndbox, xmin)                      44

                             ymax     (object, 2, bndbox, ymax)                     310

                             ymin     (object, 2, bndbox, ymin)                     185

                   difficult NaN         (object, 2, difficult)                       0

                   name      NaN              (object, 2, name)             add_friends

                   occluded  NaN          (object, 2, occluded)                       0

                   pose      NaN              (object, 2, pose)             Unspecified

                   truncated NaN         (object, 2, truncated)                       0

path      NaN      NaN       NaN                        (path,)                    None

segmented NaN      NaN       NaN                   (segmented,)                       0

size      depth    NaN       NaN                  (size, depth)                       3

          height   NaN       NaN                 (size, height)                    1080

          width    NaN       NaN                  (size, width)                    1920

source    database NaN       NaN             (source, database)                 Unknown   





```


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/a_pandas_ex_xml2df",
    "name": "a-pandas-ex-xml2df",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "xml,DataFrame,dict,pandas,XML",
    "author": "Johannes Fischer",
    "author_email": "<aulasparticularesdealemaosp@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/ce/34/5e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189/a_pandas_ex_xml2df-0.13.tar.gz",
    "platform": null,
    "description": "\n```python\n\n#as dataframe\n\n#pip install a-pandas-ex-xml2df\n\n\n\n#### UPDATE 08.12.2022 - xpath / snippets \n\n\n\nfrom a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df\n\nimport pandas as pd\n\npd_add_read_xml_files()\n\ndf=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml',add_xpath_and_snippet=True)\n\nprint(df[1:6].to_string())\n\n                                aa_all_keys                                        aa_value                                                                                                           aa_file                          aa_xpath                                                              aa_snippet\n\n1  (artist, 0, album, 0, description, link)  http://en.wikipedia.org/wiki/The_King_of_Limbs  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml  //artist[1]/album[1]/description     <description link=\"http://en.wikipedia.org/wiki/The_King_of_Limbs\">\n\n2    (artist, 0, album, 0, song, 0, length)                                            5:15  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[1]                                     <song length=\"5:15\" title=\"Bloom\"/>\n\n3     (artist, 0, album, 0, song, 0, title)                                           Bloom  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[1]                                     <song length=\"5:15\" title=\"Bloom\"/>\n\n4    (artist, 0, album, 0, song, 1, length)                                            4:41  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[2]                         <song length=\"4:41\" title=\"Morning Mr Magpie\"/>\n\n5     (artist, 0, album, 0, song, 1, title)                               Morning Mr Magpie  https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml      //artist[1]/album[1]/song[2]                         <song length=\"4:41\" title=\"Morning Mr Magpie\"/>\n\n\n\n \n\n\n\nfrom a_pandas_ex_xml2df import pd_add_read_xml_files, xml_to_dict, xml_to_df\n\nimport pandas as pd\n\npd_add_read_xml_files()\n\ndf=pd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')\n\n\n\npd.Q_Xml2df('https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml')\n\nOut[4]: \n\n                                                                                                     aa_all_keys                                           aa_value\n\nlevel_0 level_1 level_2 level_3 level_4     level_5     level_6                                                                                                    \n\nartist  0       album   0.0     description description NaN      (artist, 0, album, 0, description, description)  \\n\\tThe King of Limbs is the eighth studio alb...\n\n                                            link        NaN             (artist, 0, album, 0, description, link)     http://en.wikipedia.org/wiki/The_King_of_Limbs\n\n                                song        0           length            (artist, 0, album, 0, song, 0, length)                                               5:15\n\n                                                        title              (artist, 0, album, 0, song, 0, title)                                              Bloom\n\n                                            1           length            (artist, 0, album, 0, song, 1, length)                                               4:41\n\n                                                                                                          ...                                                ...\n\n        1       album   1.0     song        9           title              (artist, 1, album, 1, song, 9, title)                                        Magic Doors\n\n                                            10          length           (artist, 1, album, 1, song, 10, length)                                               5:45\n\n                                                        title             (artist, 1, album, 1, song, 10, title)                                            Threads\n\n                                title       NaN         NaN                         (artist, 1, album, 1, title)                                              Third\n\n                name    NaN     NaN         NaN         NaN                                    (artist, 1, name)                                         Portishead\n\n[98 rows x 2 columns]\n\n\n\n\n\n\n\n#dataframe and dict\n\nxmlfileorstrin11 = r\"C:\\Users\\Gamer\\Documents\\Downloads\\00000001_untouched.xml\"\n\nlink='https://gist.githubusercontent.com/jasonbaldridge/2597611/raw/c2c6a072c7d018c35aefad6b4739ac75247e5d92/music.xml'\n\n\n\nuu1=xml_to_dict(xmlfileorstrin11)\n\nuu11=xml_to_df(xmlfileorstrin11)\n\n\n\nwith open(xmlfileorstrin11,encoding='utf-8') as f:\n\n    xmlfileorstring = f.read()\n\nuu2=xml_to_dict(xmlfileorstrin11)\n\nuu22=xml_to_df(xmlfileorstrin11)\n\n\n\nuu3=xml_to_dict(link)\n\nuu33=xml_to_df(link)\n\n\n\nuu1\n\nOut[12]: \n\n{'folder': 'data',\n\n 'filename': '00000001_untouched.png',\n\n 'path': None,\n\n 'source': {'database': 'Unknown'},\n\n 'size': {'width': 1920, 'height': 1080, 'depth': 3},\n\n 'segmented': 0,\n\n 'object': [{'name': 'search_bar',\n\n   'pose': 'Unspecified',\n\n   'truncated': 0,\n\n   'occluded': 0,\n\n   'difficult': 0,\n\n   'bndbox': {'xmin': 753, 'ymin': 8, 'xmax': 1172, 'ymax': 52}},\n\n  {'name': 'home_text',\n\n   'pose': 'Unspecified',\n\n   'truncated': 0,\n\n   'occluded': 0,\n\n   'difficult': 0,\n\n   'bndbox': {'xmin': 42, 'ymin': 5, 'xmax': 158, 'ymax': 55}},\n\n  {'name': 'add_friends',\n\n   'pose': 'Unspecified',\n\n   'truncated': 0,\n\n   'occluded': 0,\n\n   'difficult': 0,\n\n   'bndbox': {'xmin': 44, 'ymin': 185, 'xmax': 152, 'ymax': 310}}]}\n\n   \n\nuu11\n\nOut[14]: \n\n                                                    aa_all_keys                aa_value\n\nlevel_0   level_1  level_2   level_3                                                   \n\nfilename  NaN      NaN       NaN                    (filename,)  00000001_untouched.png\n\nfolder    NaN      NaN       NaN                      (folder,)                    data\n\nobject    0        bndbox    xmax     (object, 0, bndbox, xmax)                    1172\n\n                             xmin     (object, 0, bndbox, xmin)                     753\n\n                             ymax     (object, 0, bndbox, ymax)                      52\n\n                             ymin     (object, 0, bndbox, ymin)                       8\n\n                   difficult NaN         (object, 0, difficult)                       0\n\n                   name      NaN              (object, 0, name)              search_bar\n\n                   occluded  NaN          (object, 0, occluded)                       0\n\n                   pose      NaN              (object, 0, pose)             Unspecified\n\n                   truncated NaN         (object, 0, truncated)                       0\n\n          1        bndbox    xmax     (object, 1, bndbox, xmax)                     158\n\n                             xmin     (object, 1, bndbox, xmin)                      42\n\n                             ymax     (object, 1, bndbox, ymax)                      55\n\n                             ymin     (object, 1, bndbox, ymin)                       5\n\n                   difficult NaN         (object, 1, difficult)                       0\n\n                   name      NaN              (object, 1, name)               home_text\n\n                   occluded  NaN          (object, 1, occluded)                       0\n\n                   pose      NaN              (object, 1, pose)             Unspecified\n\n                   truncated NaN         (object, 1, truncated)                       0\n\n          2        bndbox    xmax     (object, 2, bndbox, xmax)                     152\n\n                             xmin     (object, 2, bndbox, xmin)                      44\n\n                             ymax     (object, 2, bndbox, ymax)                     310\n\n                             ymin     (object, 2, bndbox, ymin)                     185\n\n                   difficult NaN         (object, 2, difficult)                       0\n\n                   name      NaN              (object, 2, name)             add_friends\n\n                   occluded  NaN          (object, 2, occluded)                       0\n\n                   pose      NaN              (object, 2, pose)             Unspecified\n\n                   truncated NaN         (object, 2, truncated)                       0\n\npath      NaN      NaN       NaN                        (path,)                    None\n\nsegmented NaN      NaN       NaN                   (segmented,)                       0\n\nsize      depth    NaN       NaN                  (size, depth)                       3\n\n          height   NaN       NaN                 (size, height)                    1080\n\n          width    NaN       NaN                  (size, width)                    1920\n\nsource    database NaN       NaN             (source, database)                 Unknown   \n\n\n\n\n\n```\n\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "nested XML to dict/DataFrame",
    "version": "0.13",
    "project_urls": {
        "Homepage": "https://github.com/hansalemaos/a_pandas_ex_xml2df"
    },
    "split_keywords": [
        "xml",
        "dataframe",
        "dict",
        "pandas",
        "xml"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "1edca15037d6e68249cbbcd70a7e15e2287efb50b960516ddb245eae4b0d983e",
                "md5": "74007d6bd9cd2efa98320a52c1cf903b",
                "sha256": "c3b453eef024d881647fcb794fde5faa136219f115ebf18764abad95e63aafc8"
            },
            "downloads": -1,
            "filename": "a_pandas_ex_xml2df-0.13-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "74007d6bd9cd2efa98320a52c1cf903b",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 8345,
            "upload_time": "2022-12-08T14:13:51",
            "upload_time_iso_8601": "2022-12-08T14:13:51.447374Z",
            "url": "https://files.pythonhosted.org/packages/1e/dc/a15037d6e68249cbbcd70a7e15e2287efb50b960516ddb245eae4b0d983e/a_pandas_ex_xml2df-0.13-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ce345e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189",
                "md5": "4d5398abea2b1d581adfce784e4f4342",
                "sha256": "1d4fe4cccb792cb74a69530911b095ab658e3a87f882989698275dbda5ec9bed"
            },
            "downloads": -1,
            "filename": "a_pandas_ex_xml2df-0.13.tar.gz",
            "has_sig": false,
            "md5_digest": "4d5398abea2b1d581adfce784e4f4342",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 7403,
            "upload_time": "2022-12-08T14:13:53",
            "upload_time_iso_8601": "2022-12-08T14:13:53.154852Z",
            "url": "https://files.pythonhosted.org/packages/ce/34/5e66dd5c169520ee691fbb00e8ba9e32d99ec28839a249e61744a6b0b189/a_pandas_ex_xml2df-0.13.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2022-12-08 14:13:53",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "hansalemaos",
    "github_project": "a_pandas_ex_xml2df",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "a_pandas_ex_plode_tool",
            "specs": []
        },
        {
            "name": "nestednop",
            "specs": []
        },
        {
            "name": "pandas",
            "specs": []
        },
        {
            "name": "requests",
            "specs": []
        }
    ],
    "lcname": "a-pandas-ex-xml2df"
}
        
Elapsed time: 0.55940s