diselect

Name	diselect JSON
Version	1.0.12 JSON
	download
home_page	https://github.com/zwolf21/diselect
Summary	smart and convenient dict flatten library for complex container nested with dict and list
upload_time	2024-01-15 10:56:43
maintainer
docs_url	None
author	HS Moon
requires_python	>=3.8
license	MIT
keywords	diselect dict select flatten dict flatten dict in list list of dict
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            ## 1. Introduction
### - A smart and convenience single function for extracting container value consisting of list and dict
### - Query a container with a complex structure, mainly generated from json, and flatten it into a dict with a single structure.
### - Get freedom from code that indexes complex json data one by one and writes for loops like this below...
> 
```python

# extracting from json loads data..

sample_from_json = json.loads('sample.json')

count = sample_from_json['count']
data_list = sample_from_json.get('data_list')

for cityinfo in data_list:
    for key, value in cityinfo.items():
        if key == 'city':
            city_name = value['names']['en']
        if key == 'subdivisions':
            subdiv = []
            for subsubdivision in value:
                sv = subsubdivision['names']['en']
                subdiv.append(sv)
                ...
                ...
            ...
        ...
        ...
    ...
    ...
    OMG...
    ...
    ....
```
---

## 2. Installation and Usage
 - Made in Python 3.8 or later
```shell
pip install diselect
```
```python
from diselect import diselect

# example
# list of dict in dict in list in dict in list of dict in....
sample_from_json = {
    'count': 1,
    'date': '2022-5-31',
    'data_list': [
        {
            "city": {
                "names": {
                    "en": "Songpa-gu"
                }
            },
            "continent": {
                "code": "AS",
                "names": {"pt-BR": "Ásia", "de": "Asien", "en": "Asia",}
            },
            "country": {
                "iso_code": "KR", 
                "names": {
                    "de": "Südkorea",
                    "en": "South Korea",
                }
            },
            "location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul"},

            # multiple childerns of list
            "subdivisions": [
                {
                    "iso_code": "11",
                    "names": {"zh-CN": "首尔特别市", "en": "Seoul", "ja": "ソウル特別市"}
                },
                {
                    "iso_code": "12",
                    "names": {"en": "Hangang"}
                }
            ],
            
        },
        {
            "city": {
                "names": {
                    "en": "Songpa-gu2"
                }
            },
            "continent": {
                "code": "AS2",
                "names": {"pt-BR": "Ásia2", "de": "Asien", "en": "Asia2",}
            },
            "country": {
                "iso_code": "KR2", 
                "names": {
                    "de": "Südkorea2",
                    "en": "South Korea2",
                }
            },
            "location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul2"},

            # multiple childerns of list
            "subdivisions": [
                {
                    "iso_code": "112",
                    "names": {"zh-CN": "首尔特别市", "en": "Seoul2", "ja": "ソウル特別市"}
                },
                {
                    "iso_code": "122",
                    "names": {"en": "Hangang2"}
                }
            ],
            
        },
    ]
}


```
```python
# Useage 1) Specify only the column name
# When taking the highest values in container without the risk of duplication

query_only_key = ['count', 'date'] # key name to column
for r in diselect(sample_from_json, query_only_key):
    print(r)

# results {'count': 1, 'date': '2022-5-31'}
```

```python
# Useage 2) Extract nested values
# parent paths tuple keys of target 'terminal' value
# If there are few parental generations, duplicate matching may occur.
# Exception when duplicate occurs

query_deep_path = [('city', 'names', 'en'), ('country', 'names', 'en')] # en is key of terminal value
for r in diselect(sample_from_json, query_deep_path):
    print(r)

# results 
# {('city', 'names', 'en'): 'Songpa-gu', ('country', 'names', 'en'): 'South Korea'}
# {('city', 'names', 'en'): 'Songpa-gu2', ('country', 'names', 'en'): 'South Korea2'}
```

```python
# Useage 3) Aliasing query to column name
# Change the query to an usable column name

query_aliases = {
    ('city', 'names', 'en'): 'city_name',
    ('country', 'names', 'en'): 'country_name',
    ('subdivisions', 'names', 'en'): 'subdivision_name'
}
# or
query_aliases = [
    {('city', 'names', 'en'): 'city_name'},
    {('country', 'names', 'en'): 'country_name'},
    {('subdivisions', 'names', 'en'): 'subdivision_names'}
]

for r in diselect(sample_from_json, query_aliases):
    print(r)

# results:
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': ['Seoul', 'Hangang']}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': ['Seoul2', 'Hangang2']}
# multiple children values of subdivision_names has coaleased to list ['Seoul', 'Hangang']
```
```python
# Useage 4) join listed children values
# pass tuple value of aliase and function

query_aliases_and_join_children = {
    ('city', 'names', 'en'): 'city_name',
    ('country', 'names', 'en'): 'country_name',
    ('subdivisions', 'names', 'en'): ('subdivision_names', ','.join), # alias, join function
}

for r in diselect(sample_from_json, query_aliases_and_join_children):
    print(r)

# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'Seoul,Hangang'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'Seoul2,Hangang2'}
# Soule, Hangang has joined with sep ','
```
```python
query_aliases_and_join_children = {
    ('city', 'names', 'en'): 'city_name',
    ('country', 'names', 'en'): 'country_name',
    ('subdivisions', 'names', 'en'): [
        'subdivision_names',
        ','.join, str.upper # alias, chaining function
    ]
}

for r in diselect(sample_from_json, query_aliases_and_join_children):
    print(r)
# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'SEOUL2,HANGANG2'}
```

```python
# Useage 5) merge muliple select
 
query = {
    (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[
        'address',
        '/' # if str, be a shorcut of join function
    ],
    (('latitude',), ('longitude',)): [
        'coordinate'
    ]
}
for r in diselect(sample_from_json, query):
    print(r)

# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': [37.5013, 127.1188]}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': [37.5013, 127.1188]}

# appling functions to coordinate...
query = {
    (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[ #tuple of multiple paths,
        'address', '/'
    ],
    (('latitude',), ('longitude',)): [ 
        'coordinate',
        str,    # convert individual float type elements to str for join
        ','     
    ]
}
for r in diselect(sample_from_json, query):
    print(r)

# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': '37.5013,127.1188'}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': '37.5013,127.1188'}
```


```python
# 4) Summary
query = {
    ('city', 'names', 'en'): 'city_name',
    ('continent', 'code'): 'continent_code',
    ('continent', 'names', 'en'): 'continent_name',
    ('country', 'iso_code'): 'country_code',
    ('country', 'names', 'en'): 'country_name',
    ('location', 'time_zone'): 'timezone',
    (('latitude',), ('longitude',)): [
        'coordinate',
        str, ','
    ],
    ('subdivisions', 'names', 'en'): [
        'subdivision_name',
        ',', str.upper
    ]
}

for r in diselect(container=sample_from_json, query=query):
    print(r)

# {'city_name': 'Songpa-gu', 'continent_code': 'AS', 'continent_name': 'Asia', 'country_code': 'KR', 'country_name': 'South Korea', 'timezone': 'Asia/Seoul', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'continent_code': 'AS2', 'continent_name': 'Asia2', 'country_code': 'KR2', 'country_name': 'South Korea2', 'timezone': 'Asia/Seoul2', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL2,HANGANG2'}
```

----

## 3. Arguments
### 1. container
    > nested with dict and list complex data
### 2. query
```python
query1 = {
    key1, key2,
    {(key3, key2): alias},
    {(key4, key5): (alias2, apply)},
}
query2 = [
    'column1', 'column2',
    {
        ('path1', 'path2'): 'alias1',
        ('patt1', 'path2', 'path3'): ('alias2', dateutil.parser.parse),
    },
    'column4'
]
```
- non-overlapping 'minimum' path of value item (need not be fullpath)
- parents path lists key of target 'terminal' value (target value must be scalar value, like str, int...)
- More detail is better to avoid duplication (...great-grandparent, grandparent, parent)
- You can mix dict and tuple
- The results column order of the output matches the order of the query
- alias: column name representing the query
- apply: function to be applied to value
### 3. caution
- If there is no query matching the key path of the container, a warning is output and it does not appear into the result column.
- If the matching of the query is duplicated, an exception is raised and a more detailed query is required.
- Consider the data structure of the container. Suggested queries are aggregated by matching top-level keys of matched with query.
```python
# date and count in the presented example data are single entities as top-level keys.
  # 'count': 1,
  # 'date': '2022-5-31',
  # 'data_list': [ ...
# but data_list is multiple row value
# Querying data from both tendencies at the same time leads to unpredictable behavior.

greedy_query = [
    # query for top level single context value
    'count', 'date', 
    # query for row values
    {
        ('city', 'names', 'en'): 'city_name',
        ('continent', 'code'): 'continent_code',
        ('continent', 'names', 'en'): 'continent_name',
        ('country', 'iso_code'): 'country_code',
        ('country', 'names', 'en'): 'country_name',
        ('location', 'time_zone'): 'timezone',
        ('subdivisions', 'names', 'en'): ('subdivision_name', ','), 
    }
]

for r in diselect(sample_from_json, greedy_query):
    print(r)

# results
# {'count': 1, 'date': '2022-5-31', 'city_name': ['Songpa-gu', 'Songpa-gu2'], 'continent_code': ['AS', 'AS2'], 'continent_name': ['Asia', 'Asia2'], 'country_code': ['KR', 'KR2'], 'country_name': ['South Korea', 'South Korea2'], 'timezone': ['Asia/Seoul', 'Asia/Seoul2'], 'subdivision_name': 'Seoul,Hangang,Seoul2,Hangang2'}

# The data is organized vertically with the top keys count and date. Maybe this is what you want.
# This can be used as a trick to get the column dataset


## Tip. separate query by structure for get two of them both
query_context = ['count', 'date']

query_list = {
    ('city', 'names', 'en'): 'city_name',
    ('continent', 'code'): 'continent_code',
    ('continent', 'names', 'en'): 'continent_name',
    ('country', 'iso_code'): 'country_code',
    ('country', 'names', 'en'): 'country_name',
    ('location', 'time_zone'): 'timezone',
    ('subdivisions', 'names', 'en'): ('subdivision_name', ','), 
}



[context_data] = list(diselect(sample_from_json, query_context)) # may one
count = context_data['count']
date = context_data['date']

# or may be simple and better just direct indexing when values are easy to access
count = sample_from_json['count']
date = sample_from_json['date']

data_list = list(diselect(sample_from_json, query_list)) # many

```

## 4. More Useages

### 1. typing values
    - value typing via apply function

```python
import dateutil

data = [
    {
        'place_id': 142213,
        'visit_count': '5',
        'visit_date': '2022/2/21',
        'rating': '2.5',
    },
    {
        'place_id': 154321,
        'visit_count': '12',
        'visit_date': '2022.3.7.',
        'rating': '4.5',
    },
]

parsed = diselect(data,
{
    'place_id': ('place_id', str),
    'visit_count': ('visit_count', int),
    'rating': ('point', float),
    'visit_date': ('visit_count', dateutil.parser.parse),
})
for row in parsed:
    print(row)
# results
# {'place_id': '142213', 'visit_count': datetime.datetime(2022, 2, 21, 0, 0), 'point': 2.5}
# {'place_id': '154321', 'visit_count': datetime.datetime(2022, 3, 7, 0, 0), 'point': 4.5}
```

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/zwolf21/diselect",
    "name": "diselect",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": "",
    "keywords": "diselect,dict select,flatten,dict flatten,dict in list,list of dict",
    "author": "HS Moon",
    "author_email": "pbr112@naver.com",
    "download_url": "https://files.pythonhosted.org/packages/a0/85/035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64/diselect-1.0.12.tar.gz",
    "platform": null,
    "description": "## 1. Introduction\r\n### - A smart and convenience single function for extracting container value consisting of list and dict\r\n### - Query a container with a complex structure, mainly generated from json, and flatten it into a dict with a single structure.\r\n### - Get freedom from code that indexes complex json data one by one and writes for loops like this below...\r\n> \r\n```python\r\n\r\n# extracting from json loads data..\r\n\r\nsample_from_json = json.loads('sample.json')\r\n\r\ncount = sample_from_json['count']\r\ndata_list = sample_from_json.get('data_list')\r\n\r\nfor cityinfo in data_list:\r\n    for key, value in cityinfo.items():\r\n        if key == 'city':\r\n            city_name = value['names']['en']\r\n        if key == 'subdivisions':\r\n            subdiv = []\r\n            for subsubdivision in value:\r\n                sv = subsubdivision['names']['en']\r\n                subdiv.append(sv)\r\n                ...\r\n                ...\r\n            ...\r\n        ...\r\n        ...\r\n    ...\r\n    ...\r\n    OMG...\r\n    ...\r\n    ....\r\n```\r\n---\r\n\r\n## 2. Installation and Usage\r\n - Made in Python 3.8 or later\r\n```shell\r\npip install diselect\r\n```\r\n```python\r\nfrom diselect import diselect\r\n\r\n# example\r\n# list of dict in dict in list in dict in list of dict in....\r\nsample_from_json = {\r\n    'count': 1,\r\n    'date': '2022-5-31',\r\n    'data_list': [\r\n        {\r\n            \"city\": {\r\n                \"names\": {\r\n                    \"en\": \"Songpa-gu\"\r\n                }\r\n            },\r\n            \"continent\": {\r\n                \"code\": \"AS\",\r\n                \"names\": {\"pt-BR\": \"\u00c1sia\", \"de\": \"Asien\", \"en\": \"Asia\",}\r\n            },\r\n            \"country\": {\r\n                \"iso_code\": \"KR\", \r\n                \"names\": {\r\n                    \"de\": \"S\u00fcdkorea\",\r\n                    \"en\": \"South Korea\",\r\n                }\r\n            },\r\n            \"location\": {\"latitude\": 37.5013, \"longitude\": 127.1188, \"time_zone\": \"Asia/Seoul\"},\r\n\r\n            # multiple childerns of list\r\n            \"subdivisions\": [\r\n                {\r\n                    \"iso_code\": \"11\",\r\n                    \"names\": {\"zh-CN\": \"\u9996\u5c14\u7279\u522b\u5e02\", \"en\": \"Seoul\", \"ja\": \"\u30bd\u30a6\u30eb\u7279\u5225\u5e02\"}\r\n                },\r\n                {\r\n                    \"iso_code\": \"12\",\r\n                    \"names\": {\"en\": \"Hangang\"}\r\n                }\r\n            ],\r\n            \r\n        },\r\n        {\r\n            \"city\": {\r\n                \"names\": {\r\n                    \"en\": \"Songpa-gu2\"\r\n                }\r\n            },\r\n            \"continent\": {\r\n                \"code\": \"AS2\",\r\n                \"names\": {\"pt-BR\": \"\u00c1sia2\", \"de\": \"Asien\", \"en\": \"Asia2\",}\r\n            },\r\n            \"country\": {\r\n                \"iso_code\": \"KR2\", \r\n                \"names\": {\r\n                    \"de\": \"S\u00fcdkorea2\",\r\n                    \"en\": \"South Korea2\",\r\n                }\r\n            },\r\n            \"location\": {\"latitude\": 37.5013, \"longitude\": 127.1188, \"time_zone\": \"Asia/Seoul2\"},\r\n\r\n            # multiple childerns of list\r\n            \"subdivisions\": [\r\n                {\r\n                    \"iso_code\": \"112\",\r\n                    \"names\": {\"zh-CN\": \"\u9996\u5c14\u7279\u522b\u5e02\", \"en\": \"Seoul2\", \"ja\": \"\u30bd\u30a6\u30eb\u7279\u5225\u5e02\"}\r\n                },\r\n                {\r\n                    \"iso_code\": \"122\",\r\n                    \"names\": {\"en\": \"Hangang2\"}\r\n                }\r\n            ],\r\n            \r\n        },\r\n    ]\r\n}\r\n\r\n\r\n```\r\n```python\r\n# Useage 1) Specify only the column name\r\n# When taking the highest values in container \u200b\u200bwithout the risk of duplication\r\n\r\nquery_only_key = ['count', 'date'] # key name to column\r\nfor r in diselect(sample_from_json, query_only_key):\r\n    print(r)\r\n\r\n# results {'count': 1, 'date': '2022-5-31'}\r\n```\r\n\r\n```python\r\n# Useage 2) Extract nested values\r\n# parent paths tuple keys of target 'terminal' value\r\n# If there are few parental generations, duplicate matching may occur.\r\n# Exception when duplicate occurs\r\n\r\nquery_deep_path = [('city', 'names', 'en'), ('country', 'names', 'en')] # en is key of terminal value\r\nfor r in diselect(sample_from_json, query_deep_path):\r\n    print(r)\r\n\r\n# results \r\n# {('city', 'names', 'en'): 'Songpa-gu', ('country', 'names', 'en'): 'South Korea'}\r\n# {('city', 'names', 'en'): 'Songpa-gu2', ('country', 'names', 'en'): 'South Korea2'}\r\n```\r\n\r\n```python\r\n# Useage 3) Aliasing query to column name\r\n# Change the query to an usable column name\r\n\r\nquery_aliases = {\r\n    ('city', 'names', 'en'): 'city_name',\r\n    ('country', 'names', 'en'): 'country_name',\r\n    ('subdivisions', 'names', 'en'): 'subdivision_name'\r\n}\r\n# or\r\nquery_aliases = [\r\n    {('city', 'names', 'en'): 'city_name'},\r\n    {('country', 'names', 'en'): 'country_name'},\r\n    {('subdivisions', 'names', 'en'): 'subdivision_names'}\r\n]\r\n\r\nfor r in diselect(sample_from_json, query_aliases):\r\n    print(r)\r\n\r\n# results:\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': ['Seoul', 'Hangang']}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': ['Seoul2', 'Hangang2']}\r\n# multiple children values of subdivision_names has coaleased to list ['Seoul', 'Hangang']\r\n```\r\n```python\r\n# Useage 4) join listed children values\r\n# pass tuple value of aliase and function\r\n\r\nquery_aliases_and_join_children = {\r\n    ('city', 'names', 'en'): 'city_name',\r\n    ('country', 'names', 'en'): 'country_name',\r\n    ('subdivisions', 'names', 'en'): ('subdivision_names', ','.join), # alias, join function\r\n}\r\n\r\nfor r in diselect(sample_from_json, query_aliases_and_join_children):\r\n    print(r)\r\n\r\n# results\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'Seoul,Hangang'}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'Seoul2,Hangang2'}\r\n# Soule, Hangang has joined with sep ','\r\n```\r\n```python\r\nquery_aliases_and_join_children = {\r\n    ('city', 'names', 'en'): 'city_name',\r\n    ('country', 'names', 'en'): 'country_name',\r\n    ('subdivisions', 'names', 'en'): [\r\n        'subdivision_names',\r\n        ','.join, str.upper # alias, chaining function\r\n    ]\r\n}\r\n\r\nfor r in diselect(sample_from_json, query_aliases_and_join_children):\r\n    print(r)\r\n# results\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'SEOUL,HANGANG'}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'SEOUL2,HANGANG2'}\r\n```\r\n\r\n```python\r\n# Useage 5) merge muliple select\r\n \r\nquery = {\r\n    (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[\r\n        'address',\r\n        '/' # if str, be a shorcut of join function\r\n    ],\r\n    (('latitude',), ('longitude',)): [\r\n        'coordinate'\r\n    ]\r\n}\r\nfor r in diselect(sample_from_json, query):\r\n    print(r)\r\n\r\n# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': [37.5013, 127.1188]}\r\n# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': [37.5013, 127.1188]}\r\n\r\n# appling functions to coordinate...\r\nquery = {\r\n    (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[ #tuple of multiple paths,\r\n        'address', '/'\r\n    ],\r\n    (('latitude',), ('longitude',)): [ \r\n        'coordinate',\r\n        str,    # convert individual float type elements to str for join\r\n        ','     \r\n    ]\r\n}\r\nfor r in diselect(sample_from_json, query):\r\n    print(r)\r\n\r\n# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': '37.5013,127.1188'}\r\n# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': '37.5013,127.1188'}\r\n```\r\n\r\n\r\n```python\r\n# 4) Summary\r\nquery = {\r\n    ('city', 'names', 'en'): 'city_name',\r\n    ('continent', 'code'): 'continent_code',\r\n    ('continent', 'names', 'en'): 'continent_name',\r\n    ('country', 'iso_code'): 'country_code',\r\n    ('country', 'names', 'en'): 'country_name',\r\n    ('location', 'time_zone'): 'timezone',\r\n    (('latitude',), ('longitude',)): [\r\n        'coordinate',\r\n        str, ','\r\n    ],\r\n    ('subdivisions', 'names', 'en'): [\r\n        'subdivision_name',\r\n        ',', str.upper\r\n    ]\r\n}\r\n\r\nfor r in diselect(container=sample_from_json, query=query):\r\n    print(r)\r\n\r\n# {'city_name': 'Songpa-gu', 'continent_code': 'AS', 'continent_name': 'Asia', 'country_code': 'KR', 'country_name': 'South Korea', 'timezone': 'Asia/Seoul', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL,HANGANG'}\r\n# {'city_name': 'Songpa-gu2', 'continent_code': 'AS2', 'continent_name': 'Asia2', 'country_code': 'KR2', 'country_name': 'South Korea2', 'timezone': 'Asia/Seoul2', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL2,HANGANG2'}\r\n```\r\n\r\n----\r\n\r\n## 3. Arguments\r\n### 1. container\r\n    > nested with dict and list complex data\r\n### 2. query\r\n```python\r\nquery1 = {\r\n    key1, key2,\r\n    {(key3, key2): alias},\r\n    {(key4, key5): (alias2, apply)},\r\n}\r\nquery2 = [\r\n    'column1', 'column2',\r\n    {\r\n        ('path1', 'path2'): 'alias1',\r\n        ('patt1', 'path2', 'path3'): ('alias2', dateutil.parser.parse),\r\n    },\r\n    'column4'\r\n]\r\n```\r\n- non-overlapping 'minimum' path of value item (need not be fullpath)\r\n- parents path lists key of target 'terminal' value (target value must be scalar value, like str, int...)\r\n- More detail is better to avoid duplication (...great-grandparent, grandparent, parent)\r\n- You can mix dict and tuple\r\n- The results column order of the output matches the order of the query\r\n- alias: column name representing the query\r\n- apply: function to be applied to value\r\n### 3. caution\r\n- If there is no query matching the key path of the container, a warning is output and it does not appear into the result column.\r\n- If the matching of the query is duplicated, an exception is raised and a more detailed query is required.\r\n- Consider the data structure of the container. Suggested queries are aggregated by matching top-level keys of matched with query.\r\n```python\r\n# date and count in the presented example data are single entities as top-level keys.\r\n  # 'count': 1,\r\n  # 'date': '2022-5-31',\r\n  # 'data_list': [ ...\r\n# but data_list is multiple row value\r\n# Querying data from both tendencies at the same time leads to unpredictable behavior.\r\n\r\ngreedy_query = [\r\n    # query for top level single context value\r\n    'count', 'date', \r\n    # query for row values\r\n    {\r\n        ('city', 'names', 'en'): 'city_name',\r\n        ('continent', 'code'): 'continent_code',\r\n        ('continent', 'names', 'en'): 'continent_name',\r\n        ('country', 'iso_code'): 'country_code',\r\n        ('country', 'names', 'en'): 'country_name',\r\n        ('location', 'time_zone'): 'timezone',\r\n        ('subdivisions', 'names', 'en'): ('subdivision_name', ','), \r\n    }\r\n]\r\n\r\nfor r in diselect(sample_from_json, greedy_query):\r\n    print(r)\r\n\r\n# results\r\n# {'count': 1, 'date': '2022-5-31', 'city_name': ['Songpa-gu', 'Songpa-gu2'], 'continent_code': ['AS', 'AS2'], 'continent_name': ['Asia', 'Asia2'], 'country_code': ['KR', 'KR2'], 'country_name': ['South Korea', 'South Korea2'], 'timezone': ['Asia/Seoul', 'Asia/Seoul2'], 'subdivision_name': 'Seoul,Hangang,Seoul2,Hangang2'}\r\n\r\n# The data is organized vertically with the top keys count and date. Maybe this is what you want.\r\n# This can be used as a trick to get the column dataset\r\n\r\n\r\n## Tip. separate query by structure for get two of them both\r\nquery_context = ['count', 'date']\r\n\r\nquery_list = {\r\n    ('city', 'names', 'en'): 'city_name',\r\n    ('continent', 'code'): 'continent_code',\r\n    ('continent', 'names', 'en'): 'continent_name',\r\n    ('country', 'iso_code'): 'country_code',\r\n    ('country', 'names', 'en'): 'country_name',\r\n    ('location', 'time_zone'): 'timezone',\r\n    ('subdivisions', 'names', 'en'): ('subdivision_name', ','), \r\n}\r\n\r\n\r\n\r\n[context_data] = list(diselect(sample_from_json, query_context)) # may one\r\ncount = context_data['count']\r\ndate = context_data['date']\r\n\r\n# or may be simple and better just direct indexing when values are easy to access\r\ncount = sample_from_json['count']\r\ndate = sample_from_json['date']\r\n\r\ndata_list = list(diselect(sample_from_json, query_list)) # many\r\n\r\n```\r\n\r\n## 4. More Useages\r\n\r\n### 1. typing values\r\n    - value typing via apply function\r\n\r\n```python\r\nimport dateutil\r\n\r\ndata = [\r\n    {\r\n        'place_id': 142213,\r\n        'visit_count': '5',\r\n        'visit_date': '2022/2/21',\r\n        'rating': '2.5',\r\n    },\r\n    {\r\n        'place_id': 154321,\r\n        'visit_count': '12',\r\n        'visit_date': '2022.3.7.',\r\n        'rating': '4.5',\r\n    },\r\n]\r\n\r\nparsed = diselect(data,\r\n{\r\n    'place_id': ('place_id', str),\r\n    'visit_count': ('visit_count', int),\r\n    'rating': ('point', float),\r\n    'visit_date': ('visit_count', dateutil.parser.parse),\r\n})\r\nfor row in parsed:\r\n    print(row)\r\n# results\r\n# {'place_id': '142213', 'visit_count': datetime.datetime(2022, 2, 21, 0, 0), 'point': 2.5}\r\n# {'place_id': '154321', 'visit_count': datetime.datetime(2022, 3, 7, 0, 0), 'point': 4.5}\r\n```\r\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "smart and convenient dict flatten library for complex container nested with dict and list",
    "version": "1.0.12",
    "project_urls": {
        "Homepage": "https://github.com/zwolf21/diselect"
    },
    "split_keywords": [
        "diselect",
        "dict select",
        "flatten",
        "dict flatten",
        "dict in list",
        "list of dict"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "a085035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64",
                "md5": "b845bf7078b7c2a53162a1c9123df5cf",
                "sha256": "654813bd9b9188437f361172c4912386b32fe098ceecb7a99a852483c7cc1351"
            },
            "downloads": -1,
            "filename": "diselect-1.0.12.tar.gz",
            "has_sig": false,
            "md5_digest": "b845bf7078b7c2a53162a1c9123df5cf",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 12969,
            "upload_time": "2024-01-15T10:56:43",
            "upload_time_iso_8601": "2024-01-15T10:56:43.831842Z",
            "url": "https://files.pythonhosted.org/packages/a0/85/035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64/diselect-1.0.12.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-01-15 10:56:43",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "zwolf21",
    "github_project": "diselect",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "lcname": "diselect"
}

HS Moon