## 1. Introduction
### - A smart and convenience single function for extracting container value consisting of list and dict
### - Query a container with a complex structure, mainly generated from json, and flatten it into a dict with a single structure.
### - Get freedom from code that indexes complex json data one by one and writes for loops like this below...
>
```python
# extracting from json loads data..
sample_from_json = json.loads('sample.json')
count = sample_from_json['count']
data_list = sample_from_json.get('data_list')
for cityinfo in data_list:
for key, value in cityinfo.items():
if key == 'city':
city_name = value['names']['en']
if key == 'subdivisions':
subdiv = []
for subsubdivision in value:
sv = subsubdivision['names']['en']
subdiv.append(sv)
...
...
...
...
...
...
...
OMG...
...
....
```
---
## 2. Installation and Usage
- Made in Python 3.8 or later
```shell
pip install diselect
```
```python
from diselect import diselect
# example
# list of dict in dict in list in dict in list of dict in....
sample_from_json = {
'count': 1,
'date': '2022-5-31',
'data_list': [
{
"city": {
"names": {
"en": "Songpa-gu"
}
},
"continent": {
"code": "AS",
"names": {"pt-BR": "Ásia", "de": "Asien", "en": "Asia",}
},
"country": {
"iso_code": "KR",
"names": {
"de": "Südkorea",
"en": "South Korea",
}
},
"location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul"},
# multiple childerns of list
"subdivisions": [
{
"iso_code": "11",
"names": {"zh-CN": "首尔特别市", "en": "Seoul", "ja": "ソウル特別市"}
},
{
"iso_code": "12",
"names": {"en": "Hangang"}
}
],
},
{
"city": {
"names": {
"en": "Songpa-gu2"
}
},
"continent": {
"code": "AS2",
"names": {"pt-BR": "Ásia2", "de": "Asien", "en": "Asia2",}
},
"country": {
"iso_code": "KR2",
"names": {
"de": "Südkorea2",
"en": "South Korea2",
}
},
"location": {"latitude": 37.5013, "longitude": 127.1188, "time_zone": "Asia/Seoul2"},
# multiple childerns of list
"subdivisions": [
{
"iso_code": "112",
"names": {"zh-CN": "首尔特别市", "en": "Seoul2", "ja": "ソウル特別市"}
},
{
"iso_code": "122",
"names": {"en": "Hangang2"}
}
],
},
]
}
```
```python
# Useage 1) Specify only the column name
# When taking the highest values in container without the risk of duplication
query_only_key = ['count', 'date'] # key name to column
for r in diselect(sample_from_json, query_only_key):
print(r)
# results {'count': 1, 'date': '2022-5-31'}
```
```python
# Useage 2) Extract nested values
# parent paths tuple keys of target 'terminal' value
# If there are few parental generations, duplicate matching may occur.
# Exception when duplicate occurs
query_deep_path = [('city', 'names', 'en'), ('country', 'names', 'en')] # en is key of terminal value
for r in diselect(sample_from_json, query_deep_path):
print(r)
# results
# {('city', 'names', 'en'): 'Songpa-gu', ('country', 'names', 'en'): 'South Korea'}
# {('city', 'names', 'en'): 'Songpa-gu2', ('country', 'names', 'en'): 'South Korea2'}
```
```python
# Useage 3) Aliasing query to column name
# Change the query to an usable column name
query_aliases = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): 'subdivision_name'
}
# or
query_aliases = [
{('city', 'names', 'en'): 'city_name'},
{('country', 'names', 'en'): 'country_name'},
{('subdivisions', 'names', 'en'): 'subdivision_names'}
]
for r in diselect(sample_from_json, query_aliases):
print(r)
# results:
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': ['Seoul', 'Hangang']}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': ['Seoul2', 'Hangang2']}
# multiple children values of subdivision_names has coaleased to list ['Seoul', 'Hangang']
```
```python
# Useage 4) join listed children values
# pass tuple value of aliase and function
query_aliases_and_join_children = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): ('subdivision_names', ','.join), # alias, join function
}
for r in diselect(sample_from_json, query_aliases_and_join_children):
print(r)
# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'Seoul,Hangang'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'Seoul2,Hangang2'}
# Soule, Hangang has joined with sep ','
```
```python
query_aliases_and_join_children = {
('city', 'names', 'en'): 'city_name',
('country', 'names', 'en'): 'country_name',
('subdivisions', 'names', 'en'): [
'subdivision_names',
','.join, str.upper # alias, chaining function
]
}
for r in diselect(sample_from_json, query_aliases_and_join_children):
print(r)
# results
# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'SEOUL2,HANGANG2'}
```
```python
# Useage 5) merge muliple select
query = {
(('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[
'address',
'/' # if str, be a shorcut of join function
],
(('latitude',), ('longitude',)): [
'coordinate'
]
}
for r in diselect(sample_from_json, query):
print(r)
# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': [37.5013, 127.1188]}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': [37.5013, 127.1188]}
# appling functions to coordinate...
query = {
(('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[ #tuple of multiple paths,
'address', '/'
],
(('latitude',), ('longitude',)): [
'coordinate',
str, # convert individual float type elements to str for join
','
]
}
for r in diselect(sample_from_json, query):
print(r)
# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': '37.5013,127.1188'}
# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': '37.5013,127.1188'}
```
```python
# 4) Summary
query = {
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
(('latitude',), ('longitude',)): [
'coordinate',
str, ','
],
('subdivisions', 'names', 'en'): [
'subdivision_name',
',', str.upper
]
}
for r in diselect(container=sample_from_json, query=query):
print(r)
# {'city_name': 'Songpa-gu', 'continent_code': 'AS', 'continent_name': 'Asia', 'country_code': 'KR', 'country_name': 'South Korea', 'timezone': 'Asia/Seoul', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL,HANGANG'}
# {'city_name': 'Songpa-gu2', 'continent_code': 'AS2', 'continent_name': 'Asia2', 'country_code': 'KR2', 'country_name': 'South Korea2', 'timezone': 'Asia/Seoul2', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL2,HANGANG2'}
```
----
## 3. Arguments
### 1. container
> nested with dict and list complex data
### 2. query
```python
query1 = {
key1, key2,
{(key3, key2): alias},
{(key4, key5): (alias2, apply)},
}
query2 = [
'column1', 'column2',
{
('path1', 'path2'): 'alias1',
('patt1', 'path2', 'path3'): ('alias2', dateutil.parser.parse),
},
'column4'
]
```
- non-overlapping 'minimum' path of value item (need not be fullpath)
- parents path lists key of target 'terminal' value (target value must be scalar value, like str, int...)
- More detail is better to avoid duplication (...great-grandparent, grandparent, parent)
- You can mix dict and tuple
- The results column order of the output matches the order of the query
- alias: column name representing the query
- apply: function to be applied to value
### 3. caution
- If there is no query matching the key path of the container, a warning is output and it does not appear into the result column.
- If the matching of the query is duplicated, an exception is raised and a more detailed query is required.
- Consider the data structure of the container. Suggested queries are aggregated by matching top-level keys of matched with query.
```python
# date and count in the presented example data are single entities as top-level keys.
# 'count': 1,
# 'date': '2022-5-31',
# 'data_list': [ ...
# but data_list is multiple row value
# Querying data from both tendencies at the same time leads to unpredictable behavior.
greedy_query = [
# query for top level single context value
'count', 'date',
# query for row values
{
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
('subdivisions', 'names', 'en'): ('subdivision_name', ','),
}
]
for r in diselect(sample_from_json, greedy_query):
print(r)
# results
# {'count': 1, 'date': '2022-5-31', 'city_name': ['Songpa-gu', 'Songpa-gu2'], 'continent_code': ['AS', 'AS2'], 'continent_name': ['Asia', 'Asia2'], 'country_code': ['KR', 'KR2'], 'country_name': ['South Korea', 'South Korea2'], 'timezone': ['Asia/Seoul', 'Asia/Seoul2'], 'subdivision_name': 'Seoul,Hangang,Seoul2,Hangang2'}
# The data is organized vertically with the top keys count and date. Maybe this is what you want.
# This can be used as a trick to get the column dataset
## Tip. separate query by structure for get two of them both
query_context = ['count', 'date']
query_list = {
('city', 'names', 'en'): 'city_name',
('continent', 'code'): 'continent_code',
('continent', 'names', 'en'): 'continent_name',
('country', 'iso_code'): 'country_code',
('country', 'names', 'en'): 'country_name',
('location', 'time_zone'): 'timezone',
('subdivisions', 'names', 'en'): ('subdivision_name', ','),
}
[context_data] = list(diselect(sample_from_json, query_context)) # may one
count = context_data['count']
date = context_data['date']
# or may be simple and better just direct indexing when values are easy to access
count = sample_from_json['count']
date = sample_from_json['date']
data_list = list(diselect(sample_from_json, query_list)) # many
```
## 4. More Useages
### 1. typing values
- value typing via apply function
```python
import dateutil
data = [
{
'place_id': 142213,
'visit_count': '5',
'visit_date': '2022/2/21',
'rating': '2.5',
},
{
'place_id': 154321,
'visit_count': '12',
'visit_date': '2022.3.7.',
'rating': '4.5',
},
]
parsed = diselect(data,
{
'place_id': ('place_id', str),
'visit_count': ('visit_count', int),
'rating': ('point', float),
'visit_date': ('visit_count', dateutil.parser.parse),
})
for row in parsed:
print(row)
# results
# {'place_id': '142213', 'visit_count': datetime.datetime(2022, 2, 21, 0, 0), 'point': 2.5}
# {'place_id': '154321', 'visit_count': datetime.datetime(2022, 3, 7, 0, 0), 'point': 4.5}
```
Raw data
{
"_id": null,
"home_page": "https://github.com/zwolf21/diselect",
"name": "diselect",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": "",
"keywords": "diselect,dict select,flatten,dict flatten,dict in list,list of dict",
"author": "HS Moon",
"author_email": "pbr112@naver.com",
"download_url": "https://files.pythonhosted.org/packages/a0/85/035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64/diselect-1.0.12.tar.gz",
"platform": null,
"description": "## 1. Introduction\r\n### - A smart and convenience single function for extracting container value consisting of list and dict\r\n### - Query a container with a complex structure, mainly generated from json, and flatten it into a dict with a single structure.\r\n### - Get freedom from code that indexes complex json data one by one and writes for loops like this below...\r\n> \r\n```python\r\n\r\n# extracting from json loads data..\r\n\r\nsample_from_json = json.loads('sample.json')\r\n\r\ncount = sample_from_json['count']\r\ndata_list = sample_from_json.get('data_list')\r\n\r\nfor cityinfo in data_list:\r\n for key, value in cityinfo.items():\r\n if key == 'city':\r\n city_name = value['names']['en']\r\n if key == 'subdivisions':\r\n subdiv = []\r\n for subsubdivision in value:\r\n sv = subsubdivision['names']['en']\r\n subdiv.append(sv)\r\n ...\r\n ...\r\n ...\r\n ...\r\n ...\r\n ...\r\n ...\r\n OMG...\r\n ...\r\n ....\r\n```\r\n---\r\n\r\n## 2. Installation and Usage\r\n - Made in Python 3.8 or later\r\n```shell\r\npip install diselect\r\n```\r\n```python\r\nfrom diselect import diselect\r\n\r\n# example\r\n# list of dict in dict in list in dict in list of dict in....\r\nsample_from_json = {\r\n 'count': 1,\r\n 'date': '2022-5-31',\r\n 'data_list': [\r\n {\r\n \"city\": {\r\n \"names\": {\r\n \"en\": \"Songpa-gu\"\r\n }\r\n },\r\n \"continent\": {\r\n \"code\": \"AS\",\r\n \"names\": {\"pt-BR\": \"\u00c1sia\", \"de\": \"Asien\", \"en\": \"Asia\",}\r\n },\r\n \"country\": {\r\n \"iso_code\": \"KR\", \r\n \"names\": {\r\n \"de\": \"S\u00fcdkorea\",\r\n \"en\": \"South Korea\",\r\n }\r\n },\r\n \"location\": {\"latitude\": 37.5013, \"longitude\": 127.1188, \"time_zone\": \"Asia/Seoul\"},\r\n\r\n # multiple childerns of list\r\n \"subdivisions\": [\r\n {\r\n \"iso_code\": \"11\",\r\n \"names\": {\"zh-CN\": \"\u9996\u5c14\u7279\u522b\u5e02\", \"en\": \"Seoul\", \"ja\": \"\u30bd\u30a6\u30eb\u7279\u5225\u5e02\"}\r\n },\r\n {\r\n \"iso_code\": \"12\",\r\n \"names\": {\"en\": \"Hangang\"}\r\n }\r\n ],\r\n \r\n },\r\n {\r\n \"city\": {\r\n \"names\": {\r\n \"en\": \"Songpa-gu2\"\r\n }\r\n },\r\n \"continent\": {\r\n \"code\": \"AS2\",\r\n \"names\": {\"pt-BR\": \"\u00c1sia2\", \"de\": \"Asien\", \"en\": \"Asia2\",}\r\n },\r\n \"country\": {\r\n \"iso_code\": \"KR2\", \r\n \"names\": {\r\n \"de\": \"S\u00fcdkorea2\",\r\n \"en\": \"South Korea2\",\r\n }\r\n },\r\n \"location\": {\"latitude\": 37.5013, \"longitude\": 127.1188, \"time_zone\": \"Asia/Seoul2\"},\r\n\r\n # multiple childerns of list\r\n \"subdivisions\": [\r\n {\r\n \"iso_code\": \"112\",\r\n \"names\": {\"zh-CN\": \"\u9996\u5c14\u7279\u522b\u5e02\", \"en\": \"Seoul2\", \"ja\": \"\u30bd\u30a6\u30eb\u7279\u5225\u5e02\"}\r\n },\r\n {\r\n \"iso_code\": \"122\",\r\n \"names\": {\"en\": \"Hangang2\"}\r\n }\r\n ],\r\n \r\n },\r\n ]\r\n}\r\n\r\n\r\n```\r\n```python\r\n# Useage 1) Specify only the column name\r\n# When taking the highest values in container \u200b\u200bwithout the risk of duplication\r\n\r\nquery_only_key = ['count', 'date'] # key name to column\r\nfor r in diselect(sample_from_json, query_only_key):\r\n print(r)\r\n\r\n# results {'count': 1, 'date': '2022-5-31'}\r\n```\r\n\r\n```python\r\n# Useage 2) Extract nested values\r\n# parent paths tuple keys of target 'terminal' value\r\n# If there are few parental generations, duplicate matching may occur.\r\n# Exception when duplicate occurs\r\n\r\nquery_deep_path = [('city', 'names', 'en'), ('country', 'names', 'en')] # en is key of terminal value\r\nfor r in diselect(sample_from_json, query_deep_path):\r\n print(r)\r\n\r\n# results \r\n# {('city', 'names', 'en'): 'Songpa-gu', ('country', 'names', 'en'): 'South Korea'}\r\n# {('city', 'names', 'en'): 'Songpa-gu2', ('country', 'names', 'en'): 'South Korea2'}\r\n```\r\n\r\n```python\r\n# Useage 3) Aliasing query to column name\r\n# Change the query to an usable column name\r\n\r\nquery_aliases = {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('subdivisions', 'names', 'en'): 'subdivision_name'\r\n}\r\n# or\r\nquery_aliases = [\r\n {('city', 'names', 'en'): 'city_name'},\r\n {('country', 'names', 'en'): 'country_name'},\r\n {('subdivisions', 'names', 'en'): 'subdivision_names'}\r\n]\r\n\r\nfor r in diselect(sample_from_json, query_aliases):\r\n print(r)\r\n\r\n# results:\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': ['Seoul', 'Hangang']}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': ['Seoul2', 'Hangang2']}\r\n# multiple children values of subdivision_names has coaleased to list ['Seoul', 'Hangang']\r\n```\r\n```python\r\n# Useage 4) join listed children values\r\n# pass tuple value of aliase and function\r\n\r\nquery_aliases_and_join_children = {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('subdivisions', 'names', 'en'): ('subdivision_names', ','.join), # alias, join function\r\n}\r\n\r\nfor r in diselect(sample_from_json, query_aliases_and_join_children):\r\n print(r)\r\n\r\n# results\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'Seoul,Hangang'}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'Seoul2,Hangang2'}\r\n# Soule, Hangang has joined with sep ','\r\n```\r\n```python\r\nquery_aliases_and_join_children = {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('subdivisions', 'names', 'en'): [\r\n 'subdivision_names',\r\n ','.join, str.upper # alias, chaining function\r\n ]\r\n}\r\n\r\nfor r in diselect(sample_from_json, query_aliases_and_join_children):\r\n print(r)\r\n# results\r\n# {'city_name': 'Songpa-gu', 'country_name': 'South Korea', 'subdivision_names': 'SEOUL,HANGANG'}\r\n# {'city_name': 'Songpa-gu2', 'country_name': 'South Korea2', 'subdivision_names': 'SEOUL2,HANGANG2'}\r\n```\r\n\r\n```python\r\n# Useage 5) merge muliple select\r\n \r\nquery = {\r\n (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[\r\n 'address',\r\n '/' # if str, be a shorcut of join function\r\n ],\r\n (('latitude',), ('longitude',)): [\r\n 'coordinate'\r\n ]\r\n}\r\nfor r in diselect(sample_from_json, query):\r\n print(r)\r\n\r\n# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': [37.5013, 127.1188]}\r\n# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': [37.5013, 127.1188]}\r\n\r\n# appling functions to coordinate...\r\nquery = {\r\n (('continent', 'names', 'en'), ('country', 'names', 'en'), ('city', 'names', 'en')):[ #tuple of multiple paths,\r\n 'address', '/'\r\n ],\r\n (('latitude',), ('longitude',)): [ \r\n 'coordinate',\r\n str, # convert individual float type elements to str for join\r\n ',' \r\n ]\r\n}\r\nfor r in diselect(sample_from_json, query):\r\n print(r)\r\n\r\n# {'address': 'Asia/South Korea/Songpa-gu', 'coordinate': '37.5013,127.1188'}\r\n# {'address': 'Asia2/South Korea2/Songpa-gu2', 'coordinate': '37.5013,127.1188'}\r\n```\r\n\r\n\r\n```python\r\n# 4) Summary\r\nquery = {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('continent', 'code'): 'continent_code',\r\n ('continent', 'names', 'en'): 'continent_name',\r\n ('country', 'iso_code'): 'country_code',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('location', 'time_zone'): 'timezone',\r\n (('latitude',), ('longitude',)): [\r\n 'coordinate',\r\n str, ','\r\n ],\r\n ('subdivisions', 'names', 'en'): [\r\n 'subdivision_name',\r\n ',', str.upper\r\n ]\r\n}\r\n\r\nfor r in diselect(container=sample_from_json, query=query):\r\n print(r)\r\n\r\n# {'city_name': 'Songpa-gu', 'continent_code': 'AS', 'continent_name': 'Asia', 'country_code': 'KR', 'country_name': 'South Korea', 'timezone': 'Asia/Seoul', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL,HANGANG'}\r\n# {'city_name': 'Songpa-gu2', 'continent_code': 'AS2', 'continent_name': 'Asia2', 'country_code': 'KR2', 'country_name': 'South Korea2', 'timezone': 'Asia/Seoul2', 'coordinate': '37.5013,127.1188', 'subdivision_name': 'SEOUL2,HANGANG2'}\r\n```\r\n\r\n----\r\n\r\n## 3. Arguments\r\n### 1. container\r\n > nested with dict and list complex data\r\n### 2. query\r\n```python\r\nquery1 = {\r\n key1, key2,\r\n {(key3, key2): alias},\r\n {(key4, key5): (alias2, apply)},\r\n}\r\nquery2 = [\r\n 'column1', 'column2',\r\n {\r\n ('path1', 'path2'): 'alias1',\r\n ('patt1', 'path2', 'path3'): ('alias2', dateutil.parser.parse),\r\n },\r\n 'column4'\r\n]\r\n```\r\n- non-overlapping 'minimum' path of value item (need not be fullpath)\r\n- parents path lists key of target 'terminal' value (target value must be scalar value, like str, int...)\r\n- More detail is better to avoid duplication (...great-grandparent, grandparent, parent)\r\n- You can mix dict and tuple\r\n- The results column order of the output matches the order of the query\r\n- alias: column name representing the query\r\n- apply: function to be applied to value\r\n### 3. caution\r\n- If there is no query matching the key path of the container, a warning is output and it does not appear into the result column.\r\n- If the matching of the query is duplicated, an exception is raised and a more detailed query is required.\r\n- Consider the data structure of the container. Suggested queries are aggregated by matching top-level keys of matched with query.\r\n```python\r\n# date and count in the presented example data are single entities as top-level keys.\r\n # 'count': 1,\r\n # 'date': '2022-5-31',\r\n # 'data_list': [ ...\r\n# but data_list is multiple row value\r\n# Querying data from both tendencies at the same time leads to unpredictable behavior.\r\n\r\ngreedy_query = [\r\n # query for top level single context value\r\n 'count', 'date', \r\n # query for row values\r\n {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('continent', 'code'): 'continent_code',\r\n ('continent', 'names', 'en'): 'continent_name',\r\n ('country', 'iso_code'): 'country_code',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('location', 'time_zone'): 'timezone',\r\n ('subdivisions', 'names', 'en'): ('subdivision_name', ','), \r\n }\r\n]\r\n\r\nfor r in diselect(sample_from_json, greedy_query):\r\n print(r)\r\n\r\n# results\r\n# {'count': 1, 'date': '2022-5-31', 'city_name': ['Songpa-gu', 'Songpa-gu2'], 'continent_code': ['AS', 'AS2'], 'continent_name': ['Asia', 'Asia2'], 'country_code': ['KR', 'KR2'], 'country_name': ['South Korea', 'South Korea2'], 'timezone': ['Asia/Seoul', 'Asia/Seoul2'], 'subdivision_name': 'Seoul,Hangang,Seoul2,Hangang2'}\r\n\r\n# The data is organized vertically with the top keys count and date. Maybe this is what you want.\r\n# This can be used as a trick to get the column dataset\r\n\r\n\r\n## Tip. separate query by structure for get two of them both\r\nquery_context = ['count', 'date']\r\n\r\nquery_list = {\r\n ('city', 'names', 'en'): 'city_name',\r\n ('continent', 'code'): 'continent_code',\r\n ('continent', 'names', 'en'): 'continent_name',\r\n ('country', 'iso_code'): 'country_code',\r\n ('country', 'names', 'en'): 'country_name',\r\n ('location', 'time_zone'): 'timezone',\r\n ('subdivisions', 'names', 'en'): ('subdivision_name', ','), \r\n}\r\n\r\n\r\n\r\n[context_data] = list(diselect(sample_from_json, query_context)) # may one\r\ncount = context_data['count']\r\ndate = context_data['date']\r\n\r\n# or may be simple and better just direct indexing when values are easy to access\r\ncount = sample_from_json['count']\r\ndate = sample_from_json['date']\r\n\r\ndata_list = list(diselect(sample_from_json, query_list)) # many\r\n\r\n```\r\n\r\n## 4. More Useages\r\n\r\n### 1. typing values\r\n - value typing via apply function\r\n\r\n```python\r\nimport dateutil\r\n\r\ndata = [\r\n {\r\n 'place_id': 142213,\r\n 'visit_count': '5',\r\n 'visit_date': '2022/2/21',\r\n 'rating': '2.5',\r\n },\r\n {\r\n 'place_id': 154321,\r\n 'visit_count': '12',\r\n 'visit_date': '2022.3.7.',\r\n 'rating': '4.5',\r\n },\r\n]\r\n\r\nparsed = diselect(data,\r\n{\r\n 'place_id': ('place_id', str),\r\n 'visit_count': ('visit_count', int),\r\n 'rating': ('point', float),\r\n 'visit_date': ('visit_count', dateutil.parser.parse),\r\n})\r\nfor row in parsed:\r\n print(row)\r\n# results\r\n# {'place_id': '142213', 'visit_count': datetime.datetime(2022, 2, 21, 0, 0), 'point': 2.5}\r\n# {'place_id': '154321', 'visit_count': datetime.datetime(2022, 3, 7, 0, 0), 'point': 4.5}\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "smart and convenient dict flatten library for complex container nested with dict and list",
"version": "1.0.12",
"project_urls": {
"Homepage": "https://github.com/zwolf21/diselect"
},
"split_keywords": [
"diselect",
"dict select",
"flatten",
"dict flatten",
"dict in list",
"list of dict"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "a085035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64",
"md5": "b845bf7078b7c2a53162a1c9123df5cf",
"sha256": "654813bd9b9188437f361172c4912386b32fe098ceecb7a99a852483c7cc1351"
},
"downloads": -1,
"filename": "diselect-1.0.12.tar.gz",
"has_sig": false,
"md5_digest": "b845bf7078b7c2a53162a1c9123df5cf",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 12969,
"upload_time": "2024-01-15T10:56:43",
"upload_time_iso_8601": "2024-01-15T10:56:43.831842Z",
"url": "https://files.pythonhosted.org/packages/a0/85/035c1403d7395ce7a45463ebb4b18277552efe4a91ab705bde6cd1c73a64/diselect-1.0.12.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-01-15 10:56:43",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "zwolf21",
"github_project": "diselect",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "diselect"
}