dictionaryutils


Namedictionaryutils JSON
Version 3.4.11 PyPI version JSON
download
home_pagehttps://github.com/uc-cdis/dictionaryutils
SummaryPython wrapper and metaschema for datadictionary.
upload_time2024-09-26 14:16:08
maintainerNone
docs_urlNone
authorCTDS UChicago
requires_python<4,>=3.9
licenseApache-2.0
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI
coveralls test coverage No coveralls.
            # dictionaryutils

python wrapper and metaschema for datadictionary.
It can be used to:
- load a local dictionary to a python object.
- dump schemas to a file that can be uploaded to s3 as an artifact.
- load schema file from an url to a python object that can be used by services

## Test for dictionary validity with Docker
Say you have a dictionary you are building locally and you want to see if it will pass the tests.

You can add a simple alias to your `.bash_profile` to enable a quick test command:
```
testdict() { docker run --rm -v $(pwd):/dictionary quay.io/cdis/dictionaryutils:master; }
```

Then from the directory containing the `gdcdictionary` directory run `testdict`.


## Generate simulated data with Docker
If you wish to generate fake simulated data you can also do that with dictionaryutils and the data-simulator.

```
simdata() { docker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c "cd /dictionary && python setup.py install --force; python /src/datasimulator/bin/data-simulator simulate --path /simdata/ $*; export SUCCESS=$?; rm -rf build dictionaryutils dist gdcdictionary.egg-info; chmod -R a+rwX /simdata; exit $SUCCESS"; }
simdataurl() { docker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c "python /src/datasimulator/bin/data-simulator simulate --path /simdata/ $*; chmod -R a+rwX /simdata"; }

```

Then from the directory containing the `gdcdictionary` directory run `simdata` and a folder will be created called `simdata` with the results of the simulator run. You can also pass in additional arguments to the data-simulator script such as `simdata --max_samples 10`.

The `--max_samples` argument will define a default number of nodes to simulate, but you can override it using the `--node_num_instances_file` argument. For example, if you create the following `instances.json`:

```
{
        "case": 100,
        "demographic": 100
}

```
Then run the following:
```
docker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c "cd /dictionary && python setup.py install --force; python /src/datasimulator/bin/data-simulator simulate --path /simdata/ --program workshop --project project1 --max_samples 10 --node_num_instances_file instances.json; export SUCCESS=$?; rm -rf build dictionaryutils dist gdcdictionary.egg-info; chmod -R a+rwX /simdata; exit $SUCCESS";
```
Then you'll get 100 each of `case` and `demographic` nodes and 10 each of everything else. Note that the above example also defines `program` and `project` names.

You can also run the simulator for an arbitrary json url by using `simdataurl --url https://datacommons.example.com/schema.json`.


## Use dictionaryutils to load a dictionary
```
from dictionaryutils import DataDictionary

dict_fetch_from_remote = DataDictionary(url=URL_FOR_THE_JSON)

dict_loaded_locally = DataDictionary(root_dir=PATH_TO_SCHEMA_DIR)
```

## Use dictionaryutils to dump a dictionary
```
import json
from dictionaryutils import dump_schemas_from_dir

with open('dump.json', 'w') as f:
    json.dump(dump_schemas_from_dir('../datadictionary/gdcdictionary/schemas/'), f)
```

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/uc-cdis/dictionaryutils",
    "name": "dictionaryutils",
    "maintainer": null,
    "docs_url": null,
    "requires_python": "<4,>=3.9",
    "maintainer_email": null,
    "keywords": null,
    "author": "CTDS UChicago",
    "author_email": "cdis@uchicago.edu",
    "download_url": "https://files.pythonhosted.org/packages/54/21/a0f43f201ee5a8a085534671d6e87df5786d676464a7a931a3373d43b977/dictionaryutils-3.4.11.tar.gz",
    "platform": null,
    "description": "# dictionaryutils\n\npython wrapper and metaschema for datadictionary.\nIt can be used to:\n- load a local dictionary to a python object.\n- dump schemas to a file that can be uploaded to s3 as an artifact.\n- load schema file from an url to a python object that can be used by services\n\n## Test for dictionary validity with Docker\nSay you have a dictionary you are building locally and you want to see if it will pass the tests.\n\nYou can add a simple alias to your `.bash_profile` to enable a quick test command:\n```\ntestdict() { docker run --rm -v $(pwd):/dictionary quay.io/cdis/dictionaryutils:master; }\n```\n\nThen from the directory containing the `gdcdictionary` directory run `testdict`.\n\n\n## Generate simulated data with Docker\nIf you wish to generate fake simulated data you can also do that with dictionaryutils and the data-simulator.\n\n```\nsimdata() { docker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c \"cd /dictionary && python setup.py install --force; python /src/datasimulator/bin/data-simulator simulate --path /simdata/ $*; export SUCCESS=$?; rm -rf build dictionaryutils dist gdcdictionary.egg-info; chmod -R a+rwX /simdata; exit $SUCCESS\"; }\nsimdataurl() { docker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c \"python /src/datasimulator/bin/data-simulator simulate --path /simdata/ $*; chmod -R a+rwX /simdata\"; }\n\n```\n\nThen from the directory containing the `gdcdictionary` directory run `simdata` and a folder will be created called `simdata` with the results of the simulator run. You can also pass in additional arguments to the data-simulator script such as `simdata --max_samples 10`.\n\nThe `--max_samples` argument will define a default number of nodes to simulate, but you can override it using the `--node_num_instances_file` argument. For example, if you create the following `instances.json`:\n\n```\n{\n        \"case\": 100,\n        \"demographic\": 100\n}\n\n```\nThen run the following:\n```\ndocker run --rm -v $(pwd):/dictionary -v $(pwd)/simdata:/simdata quay.io/cdis/dictionaryutils:master /bin/sh -c \"cd /dictionary && python setup.py install --force; python /src/datasimulator/bin/data-simulator simulate --path /simdata/ --program workshop --project project1 --max_samples 10 --node_num_instances_file instances.json; export SUCCESS=$?; rm -rf build dictionaryutils dist gdcdictionary.egg-info; chmod -R a+rwX /simdata; exit $SUCCESS\";\n```\nThen you'll get 100 each of `case` and `demographic` nodes and 10 each of everything else. Note that the above example also defines `program` and `project` names.\n\nYou can also run the simulator for an arbitrary json url by using `simdataurl --url https://datacommons.example.com/schema.json`.\n\n\n## Use dictionaryutils to load a dictionary\n```\nfrom dictionaryutils import DataDictionary\n\ndict_fetch_from_remote = DataDictionary(url=URL_FOR_THE_JSON)\n\ndict_loaded_locally = DataDictionary(root_dir=PATH_TO_SCHEMA_DIR)\n```\n\n## Use dictionaryutils to dump a dictionary\n```\nimport json\nfrom dictionaryutils import dump_schemas_from_dir\n\nwith open('dump.json', 'w') as f:\n    json.dump(dump_schemas_from_dir('../datadictionary/gdcdictionary/schemas/'), f)\n```\n",
    "bugtrack_url": null,
    "license": "Apache-2.0",
    "summary": "Python wrapper and metaschema for datadictionary.",
    "version": "3.4.11",
    "project_urls": {
        "Homepage": "https://github.com/uc-cdis/dictionaryutils",
        "Repository": "https://github.com/uc-cdis/dictionaryutils"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "5421a0f43f201ee5a8a085534671d6e87df5786d676464a7a931a3373d43b977",
                "md5": "ae02272bfd77af67d5627586293b43f3",
                "sha256": "a63ae34b4c0130cd94e4ca685cbb18155c225f9bface32ed1d503393d0dd39cf"
            },
            "downloads": -1,
            "filename": "dictionaryutils-3.4.11.tar.gz",
            "has_sig": false,
            "md5_digest": "ae02272bfd77af67d5627586293b43f3",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "<4,>=3.9",
            "size": 14290,
            "upload_time": "2024-09-26T14:16:08",
            "upload_time_iso_8601": "2024-09-26T14:16:08.823962Z",
            "url": "https://files.pythonhosted.org/packages/54/21/a0f43f201ee5a8a085534671d6e87df5786d676464a7a931a3373d43b977/dictionaryutils-3.4.11.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-09-26 14:16:08",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "uc-cdis",
    "github_project": "dictionaryutils",
    "travis_ci": true,
    "coveralls": false,
    "github_actions": false,
    "lcname": "dictionaryutils"
}
        
Elapsed time: 0.52078s