### Overview
Those dict flavours that you have probably thought of at some point.
Zero dependencies.
### Installation
```bash
pip install those-dicts
```
### TL;DR
Below you may find examples of behavior under normal dist-style usage of those_dicts. Essentially those are dicts but with a twist.
```python
from those_dicts import BatchedDict, GraphDict, TwoWayDict, OOMDict
my_batched_dict = BatchedDict(nested=True)
client1 = dict(name='Lieutenant', surname='Kowalski',
address=dict(street='Funny Avenue', city='Elsewhere'))
client2 = dict(name='Thomas', surname='Dison',
address=dict(street='Lightbulb St.', city='Elsewhere'))
my_batched_dict.update(client1)
my_batched_dict.update(client2)
# >>> my_batched_dict['name']
# ['Lieutenant', 'Thomas']
# >>> my_batched_dict['address']
# {'street': ['Funny Avenue', 'Lightbulb St.'], 'city': ['Elsewhere', 'Elsewhere']}
my_graph_dict = GraphDict(Warsaw='Katowice', Katowice='Gdansk', Gdansk='Warsaw')
flights_to_germany = dict(Warsaw='Berlin', Katowice='Frankfurt')
flights_from_germany = dict(Berlin='Warsaw', Frankfurt='Katowice')
my_graph_dict.update(flights_to_germany)
my_graph_dict.update(flights_from_germany)
# >>> my_graph_dict['Warsaw']
# {'Berlin', 'Katowice'}
# >>> my_graph_dict['Berlin']
# 'Warsaw'
my_twoway_dict = TwoWayDict({('Eric', 'Doe'): ('Ella', 'Moon')})
# >>> my_twoway_dict[('Ella', 'Moon')] == ('Eric', 'Doe')
# True
# >>> my_twoway_dict[('Eric', 'Doe')] == ('Ella', 'Moon')
# True
new_marriage_after_divorce = {('Ella', 'Moon'): ('Benny', 'Hills')}
my_twoway_dict.update(new_marriage_after_divorce)
# >>> my_twoway_dict[('Ella', 'Moon')] == ('Benny', 'Hills')
# True
# >>> my_twoway_dict[('Eric', 'Doe')] is None
# True
from some_lib import ObjWithDefinedSize
my_oom_dict = OOMDict(max_ram_entries=10)
my_oom_dict.update([str(k): ObjWithDefinedSize(mb_size=k) for k in range(1000)])
# first 10 objects are in RAM, the rest is on the disk
del my_oom_dict # clears the disk also
```
### Getting Started
#### BatchedDict
When you want to aggregate multiple dicts:
```python
from those_dicts import BatchedDict
my_batched_dict = BatchedDict()
my_batched_nested = BatchedDict(nested=True)
client1 = dict(name='Lieutenant', surname='Kowalski',
address=dict(street='Funny Avenue', city='Elsewhere'))
client2 = dict(name='Thomas', surname='Dison',
address=dict(street='Lightbulb St.', city='Elsewhere'))
my_batched_dict.update(client1)
my_batched_dict.update(client2)
my_batched_nested.update(client1)
my_batched_nested.update(client2)
# or equivalently, because it is a dict
my_batched_dict = BatchedDict(name='Lieutenant', surname='Kowalski',
address=dict(street='Funny Avenue', city='Elsewhere'))
my_batched_nested = BatchedDict(nested=True, name='Lieutenant', surname='Kowalski',
address=dict(street='Funny Avenue', city='Elsewhere'))
my_batched_dict.update(client2)
my_batched_nested.update(client2)
# >>> my_batched_dict
# {'name': ['Lieutenant', 'Thomas'], 'surname': ['Kowalski', 'Dison'], 'address': [{'street': 'Funny Avenue', 'city': 'Elsewhere'}, {'street': 'Lightbulb St.', 'city': 'Elsewhere'}]}
# >>> my_batched_nested
# {'name': ['Lieutenant', 'Thomas'], 'surname': ['Kowalski', 'Dison'], 'address': {'street': ['Funny Avenue', 'Lightbulb St.'], 'city': ['Elsewhere', 'Elsewhere']}}
# straightforward aggregation use case
my_batched_dict = BatchedDict()
my_batched_dict['john_properties'] = 'car'
my_batched_dict['john_properties'] = 'bike'
my_batched_dict['john_properties'] = 'grill'
my_batched_dict['john_properties'] = 'gaming pc'
# >>> my_batched_dict['john_properties']
# ['car', 'bike', 'grill', 'gaming pc']
# >>> my_batched_dict['john_properties'].remove('grill')
# >>> my_batched_dict['john_properties']
# ['car', 'bike', 'gaming pc']
my_batched_dict['ella_properties'] = 'house'
my_batched_dict['ella_properties'] = 'garage'
# >>> my_batched_dict['ella_properties']
# ['house', 'garage']
```
Essentially it is a dict, so usage is intuitive.
### GraphDict
When you want to create a mapping from one hashable to another hashable that may traverse further.
```python
from dataclasses import dataclass
from those_dicts import GraphDict
@dataclass(frozen=True)
class Building:
coordinates: tuple[float, float]
address: str
elevation: float
purpose: str
history: str
# some big, hashable data structure
@dataclass(frozen=True)
class City:
name: str
country: str
area: float
population: int
top_10_buildings: frozenset[Building]
warsaw = City('Warsaw', ...)
katowice = ... # you get the point
gdansk = ...
berlin = ...
frankfurt = ...
my_graph_dict = GraphDict({warsaw: katowice, katowice: gdansk, gdansk: warsaw})
flights_to_germany = {warsaw: berlin, katowice: frankfurt}
flights_from_germany = {berlin: warsaw, frankfurt: katowice}
my_graph_dict.update(flights_to_germany)
my_graph_dict.update(flights_from_germany)
# >>> my_graph_dict[warsaw]
# {berlin, katowice}
# >>> my_graph_dict[berlin]
# warsaw
# >>> my_graph_dict
# {katowice: {2, 4}, warsaw: {0, 3}, gdansk: {1}, berlin: {1}, frankfurt: {0}}
```
GraphDict stores each hashable object only once - here everything is a key.
Values are just index-wise references. This means a lot of memory savings for storing big objects.
GraphDict is compatible with dict, but with a twist(s) enlisted below:
- .pop() method is computationally expensive, because forces reindexing all the values. Better to use del instead.
- del graph_dict_instance\[some_key] removes all links from and to given key, without removing key entry itself. Leaving (disconnected) key entry allows to keep unrelated indices in values as is (no reindexing).
- .popitem() method is computationally expensive, because forces reindexing all the values, although not so expensive as .pop() because it returns the last key-value pair.
- .keys() method returns a mapping proxy (like dict), but the definition of key here is: a node that has a corresponding value(s) (outgoing connection).
- .values() method returns a mapping proxy (like dict), but the definition of value here is: a node that has a corresponding key (incoming connection).
- .items() method returns a mapping proxy (like dict), but the definition of item here is: a pair of nodes (key-value manner) for every key that is either in keys() or in values().
- .setdefault() raises NotImplementedError - use .get(key, default) instead.
- .make_loops(keys: Optional\[Iterable] = None) is new compared to dict - it adds connections to itself for every key provided or to all keys.
- .delete_link(key, value) removes directed connection from key to value if exists. Do not influence existence of keys.
- .disconnect(key, value) removes connection from key to value and from value to key if exist. Do not influence existence of keys.
- .update() shall be used to update GraphDict like you would update regular dict.
- .merge() shall be used to update GraphDict with another GraphDict.
- .reindex() removes entries that are totally disconnected and updates indices stored in values for all entries (because deletion changes the order of keys).
- .get_dict() returns regular dict with meaningful keys (that have other value than None).
### TwoWayDict
It is a subclass of GraphDict that is restricted to have only exclusive two-way connections.
You can access value through its key and other way around.
Compared to GraphDict, .merge() and .make_loops() are raising NotImplementedError as those doesn't make sense for this class.
### OOMDict
When you want to limit impact on RAM.
```python
from those_dicts import OOMDict
my_oom_dict = OOMDict(max_ram_entries=10000) # the default
for name, big_obj in big_obj_generator(num_obj=1000000):
my_oom_dict[name] = big_obj
# everything above 10000 objects will be stored on the disk
```
Even if storage is split between RAM and disk, it is just a dict, so use it as usual.
Raw data
{
"_id": null,
"home_page": "https://github.com/jakubgajski/those_dicts",
"name": "those-dicts",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.9",
"maintainer_email": null,
"keywords": "graph, batch, two way, dict, dicts, data structures, lightweight",
"author": "Jakub Gajski",
"author_email": "jakub.gajski@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/c7/94/a364a53189af60cb26f54552b550cd9d9c4e23827acca49598529704df74/those_dicts-0.1.1.tar.gz",
"platform": null,
"description": "### Overview \n \nThose dict flavours that you have probably thought of at some point.\nZero dependencies.\n\n### Installation \n \n```bash\npip install those-dicts\n```\n \n### TL;DR \n \nBelow you may find examples of behavior under normal dist-style usage of those_dicts. Essentially those are dicts but with a twist. \n \n```python\nfrom those_dicts import BatchedDict, GraphDict, TwoWayDict, OOMDict\n\nmy_batched_dict = BatchedDict(nested=True)\nclient1 = dict(name='Lieutenant', surname='Kowalski',\n address=dict(street='Funny Avenue', city='Elsewhere'))\nclient2 = dict(name='Thomas', surname='Dison',\n address=dict(street='Lightbulb St.', city='Elsewhere'))\nmy_batched_dict.update(client1)\nmy_batched_dict.update(client2)\n# >>> my_batched_dict['name']\n# ['Lieutenant', 'Thomas']\n# >>> my_batched_dict['address']\n# {'street': ['Funny Avenue', 'Lightbulb St.'], 'city': ['Elsewhere', 'Elsewhere']}\n\nmy_graph_dict = GraphDict(Warsaw='Katowice', Katowice='Gdansk', Gdansk='Warsaw')\nflights_to_germany = dict(Warsaw='Berlin', Katowice='Frankfurt')\nflights_from_germany = dict(Berlin='Warsaw', Frankfurt='Katowice')\nmy_graph_dict.update(flights_to_germany)\nmy_graph_dict.update(flights_from_germany)\n# >>> my_graph_dict['Warsaw']\n# {'Berlin', 'Katowice'}\n# >>> my_graph_dict['Berlin']\n# 'Warsaw'\n\nmy_twoway_dict = TwoWayDict({('Eric', 'Doe'): ('Ella', 'Moon')})\n# >>> my_twoway_dict[('Ella', 'Moon')] == ('Eric', 'Doe')\n# True\n# >>> my_twoway_dict[('Eric', 'Doe')] == ('Ella', 'Moon')\n# True\nnew_marriage_after_divorce = {('Ella', 'Moon'): ('Benny', 'Hills')}\nmy_twoway_dict.update(new_marriage_after_divorce)\n# >>> my_twoway_dict[('Ella', 'Moon')] == ('Benny', 'Hills')\n# True\n# >>> my_twoway_dict[('Eric', 'Doe')] is None\n# True\n\nfrom some_lib import ObjWithDefinedSize\n\nmy_oom_dict = OOMDict(max_ram_entries=10)\nmy_oom_dict.update([str(k): ObjWithDefinedSize(mb_size=k) for k in range(1000)])\n# first 10 objects are in RAM, the rest is on the disk\n\ndel my_oom_dict # clears the disk also\n```\n \n### Getting Started \n \n#### BatchedDict \n \nWhen you want to aggregate multiple dicts: \n \n```python\nfrom those_dicts import BatchedDict\n\nmy_batched_dict = BatchedDict()\nmy_batched_nested = BatchedDict(nested=True)\nclient1 = dict(name='Lieutenant', surname='Kowalski',\n address=dict(street='Funny Avenue', city='Elsewhere'))\nclient2 = dict(name='Thomas', surname='Dison',\n address=dict(street='Lightbulb St.', city='Elsewhere'))\nmy_batched_dict.update(client1)\nmy_batched_dict.update(client2)\nmy_batched_nested.update(client1)\nmy_batched_nested.update(client2)\n# or equivalently, because it is a dict\nmy_batched_dict = BatchedDict(name='Lieutenant', surname='Kowalski',\n address=dict(street='Funny Avenue', city='Elsewhere'))\nmy_batched_nested = BatchedDict(nested=True, name='Lieutenant', surname='Kowalski',\n address=dict(street='Funny Avenue', city='Elsewhere'))\nmy_batched_dict.update(client2)\nmy_batched_nested.update(client2)\n# >>> my_batched_dict \n# {'name': ['Lieutenant', 'Thomas'], 'surname': ['Kowalski', 'Dison'], 'address': [{'street': 'Funny Avenue', 'city': 'Elsewhere'}, {'street': 'Lightbulb St.', 'city': 'Elsewhere'}]}\n# >>> my_batched_nested\n# {'name': ['Lieutenant', 'Thomas'], 'surname': ['Kowalski', 'Dison'], 'address': {'street': ['Funny Avenue', 'Lightbulb St.'], 'city': ['Elsewhere', 'Elsewhere']}}\n\n# straightforward aggregation use case\nmy_batched_dict = BatchedDict()\nmy_batched_dict['john_properties'] = 'car'\nmy_batched_dict['john_properties'] = 'bike'\nmy_batched_dict['john_properties'] = 'grill'\nmy_batched_dict['john_properties'] = 'gaming pc'\n# >>> my_batched_dict['john_properties']\n# ['car', 'bike', 'grill', 'gaming pc']\n# >>> my_batched_dict['john_properties'].remove('grill')\n# >>> my_batched_dict['john_properties']\n# ['car', 'bike', 'gaming pc']\n\nmy_batched_dict['ella_properties'] = 'house'\nmy_batched_dict['ella_properties'] = 'garage'\n# >>> my_batched_dict['ella_properties']\n# ['house', 'garage']\n``` \n \nEssentially it is a dict, so usage is intuitive. \n \n### GraphDict \n \nWhen you want to create a mapping from one hashable to another hashable that may traverse further. \n \n```python\nfrom dataclasses import dataclass\nfrom those_dicts import GraphDict\n\n@dataclass(frozen=True)\nclass Building:\n coordinates: tuple[float, float]\n address: str\n elevation: float\n purpose: str\n history: str\n\n# some big, hashable data structure \n@dataclass(frozen=True)\nclass City:\n name: str\n country: str\n area: float\n population: int\n top_10_buildings: frozenset[Building]\n\n\nwarsaw = City('Warsaw', ...)\nkatowice = ... # you get the point\ngdansk = ...\nberlin = ...\nfrankfurt = ...\nmy_graph_dict = GraphDict({warsaw: katowice, katowice: gdansk, gdansk: warsaw})\nflights_to_germany = {warsaw: berlin, katowice: frankfurt}\nflights_from_germany = {berlin: warsaw, frankfurt: katowice}\nmy_graph_dict.update(flights_to_germany)\nmy_graph_dict.update(flights_from_germany)\n# >>> my_graph_dict[warsaw]\n# {berlin, katowice}\n# >>> my_graph_dict[berlin]\n# warsaw\n# >>> my_graph_dict\n# {katowice: {2, 4}, warsaw: {0, 3}, gdansk: {1}, berlin: {1}, frankfurt: {0}}\n```\n \nGraphDict stores each hashable object only once - here everything is a key.\nValues are just index-wise references. This means a lot of memory savings for storing big objects.\n \nGraphDict is compatible with dict, but with a twist(s) enlisted below: \n \n- .pop() method is computationally expensive, because forces reindexing all the values. Better to use del instead.\n- del graph_dict_instance\\[some_key] removes all links from and to given key, without removing key entry itself. Leaving (disconnected) key entry allows to keep unrelated indices in values as is (no reindexing). \n- .popitem() method is computationally expensive, because forces reindexing all the values, although not so expensive as .pop() because it returns the last key-value pair. \n- .keys() method returns a mapping proxy (like dict), but the definition of key here is: a node that has a corresponding value(s) (outgoing connection). \n- .values() method returns a mapping proxy (like dict), but the definition of value here is: a node that has a corresponding key (incoming connection). \n- .items() method returns a mapping proxy (like dict), but the definition of item here is: a pair of nodes (key-value manner) for every key that is either in keys() or in values(). \n- .setdefault() raises NotImplementedError - use .get(key, default) instead. \n- .make_loops(keys: Optional\\[Iterable] = None) is new compared to dict - it adds connections to itself for every key provided or to all keys. \n- .delete_link(key, value) removes directed connection from key to value if exists. Do not influence existence of keys. \n- .disconnect(key, value) removes connection from key to value and from value to key if exist. Do not influence existence of keys. \n- .update() shall be used to update GraphDict like you would update regular dict. \n- .merge() shall be used to update GraphDict with another GraphDict. \n- .reindex() removes entries that are totally disconnected and updates indices stored in values for all entries (because deletion changes the order of keys). \n- .get_dict() returns regular dict with meaningful keys (that have other value than None). \n \n### TwoWayDict \n \nIt is a subclass of GraphDict that is restricted to have only exclusive two-way connections. \nYou can access value through its key and other way around. \n \nCompared to GraphDict, .merge() and .make_loops() are raising NotImplementedError as those doesn't make sense for this class. \n \n### OOMDict \n \nWhen you want to limit impact on RAM. \n \n```python\nfrom those_dicts import OOMDict\n\nmy_oom_dict = OOMDict(max_ram_entries=10000) # the default\n\nfor name, big_obj in big_obj_generator(num_obj=1000000):\n my_oom_dict[name] = big_obj\n\n# everything above 10000 objects will be stored on the disk\n``` \n \nEven if storage is split between RAM and disk, it is just a dict, so use it as usual. \n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Those missing flavors of dict",
"version": "0.1.1",
"project_urls": {
"Homepage": "https://github.com/jakubgajski/those_dicts",
"Repository": "https://github.com/jakubgajski/those_dicts"
},
"split_keywords": [
"graph",
" batch",
" two way",
" dict",
" dicts",
" data structures",
" lightweight"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "ec29ab092cc37b9a010e9f86712267bb4661e344c176bdcb85597376c7980546",
"md5": "a9d2a2099c55d5f1b9efa330b2f5c371",
"sha256": "6cf699e9efaa39811e48d0bfffc17be25c7c3c854f021f6008fc1b8547243a29"
},
"downloads": -1,
"filename": "those_dicts-0.1.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a9d2a2099c55d5f1b9efa330b2f5c371",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.9",
"size": 7868,
"upload_time": "2024-07-06T20:46:13",
"upload_time_iso_8601": "2024-07-06T20:46:13.305096Z",
"url": "https://files.pythonhosted.org/packages/ec/29/ab092cc37b9a010e9f86712267bb4661e344c176bdcb85597376c7980546/those_dicts-0.1.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "c794a364a53189af60cb26f54552b550cd9d9c4e23827acca49598529704df74",
"md5": "9305e26cc9410aac99bc886e51f0380e",
"sha256": "0233ed473a01fe18c7c4298a3f4cadd2ba645016dddfd6d924be1bd86ef440da"
},
"downloads": -1,
"filename": "those_dicts-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "9305e26cc9410aac99bc886e51f0380e",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.9",
"size": 9319,
"upload_time": "2024-07-06T20:46:15",
"upload_time_iso_8601": "2024-07-06T20:46:15.033438Z",
"url": "https://files.pythonhosted.org/packages/c7/94/a364a53189af60cb26f54552b550cd9d9c4e23827acca49598529704df74/those_dicts-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-07-06 20:46:15",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "jakubgajski",
"github_project": "those_dicts",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "those-dicts"
}