bacdiving


Namebacdiving JSON
Version 1.2.7 PyPI version JSON
download
home_page
SummaryBacdiving accesses the Bacterial Diversity Metadatabase BacDive and provides various visualization options.
upload_time2022-12-13 17:22:18
maintainer
docs_urlNone
author
requires_python>=3.8
license
keywords bacdive bacteria phenotype information
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Bacdiving

Bacdiving accesses and retrieves information from the world's largest database for standardized bacterial phenotypic information: BacDive.
Additionally, Bacdiving provides several options to visualize this information.  

Before using Bacdiving please register (for free) on [BacDive](https://api.bacdive.dsmz.de/).
Using your BacDive credentials you can dive into Bacdiving. 

In general, Bacdiving can deal with two types of input data: a taxonomy table (e.g. as extracted from a phyloseq-object) or an input file (.csv, .txt, .tsv) with one query-type per row.
Possible BacDive query types include: BacDive id, taxonomy (as in species name), 16S sequencing accession id (e.g. SILVA id), culture collection accession id or genome sequence accession id.
Make sure that the input file should is consistant with only contain one (!) query type for all of its rows.

Here is a minimal example on how to use Bacdiving, please refer to the full [documentation](https://bacdiving.readthedocs.io/en/latest/) for more details:

```
from bacdiving import bacdive_caller as bc
from bacdiving import treeplots_maker as tm
from bacdiving import visualizations_maker as vm

### Retrieve and access information stored on BacDive ###

# Run for a single input from text file for SILVA id queries
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./SILVA_ids.txt input_via_file search_by_16S_seq_accession"], sample_names=["SILVA"], output_dir="./")
resulting_df = resulting_list_with_all_res_dfs[0]  

# Run for a single input from text file for taxonomy queries
resulting_list_with_all_res_dfs = bc.bacdive_call(inputs_list=["./taxonomy_ids.txt input_via_file search_by_taxonomy"], sample_names=["taxonomy"], output_dir="./results/") # if credentials are not given via parameters, you will get prompted
resulting_df = resulting_list_with_all_res_dfs[0] 

# Run for a single input from text file for BacDive id queries
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./bacdive_ids.txt input_via_file search_by_id"], sample_names=["bacdive"], output_dir="./")
resulting_df = resulting_list_with_all_res_dfs[0] 

# Run for a single input from text file for culture collection queries
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./culture_col_ids.txt input_via_file search_by_culture_collection"], sample_names=["culturecol"], output_dir="./")
resulting_df = resulting_list_with_all_res_dfs[0] 

# Run for a single input from text file for genome accession queries
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./genome_ids.txt input_via_file search_by_genome_accession"], sample_names=["genomecol"], output_dir="./") 
resulting_df = resulting_list_with_all_res_dfs[0] 

# Run for single taxonomy table input (e.g. as extracted from phyloseq-object)
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./taxtab.tsv taxtable_input"], sample_names=["taxtab"], print_res_df_ToFile = True, print_access_stats = True, print_flattened_file=True, columns_of_interest=["Physiology and metabolism.oxygen tolerance.oxygen tolerance", "Culture and growth conditions.culture temp.temperature", "Isolation, sampling and environmental information.isolation.origin.country","Morphology.cell morphology.motility"], output_dir="./") 
resulting_df = resulting_list_with_all_res_dfs[0] 

# Run for multiple inputs (of possibly different input types)
resulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id="<your ID>", bacdive_password="<your password>", inputs_list=["./SILVA_ids.txt input_via_file search_by_16S_seq_accession", "./taxonomy_ids.txt input_via_file search_by_taxonomy", "./taxtab1.tsv taxtable_input", "./taxtab2.tsv taxtable_input"], sample_names=["sample1", "sample2", "sample3", "sample4"], print_flattened_file=True, columns_of_interest=["Physiology and metabolism.oxygen tolerance.oxygen tolerance", "Culture and growth conditions.culture temp.temperature"])
resulting_df = resulting_list_with_all_res_dfs[1]  # pick your dataframe of interest from this list
```

```
### Some possible visualizations ###

#Tree plots
tm.overview_treeplot(resulting_df, label_name1="Temperature", label_name2="Oxygen tolerance", saveToFile=True, output_dir="./")
tm.circular_treeplot(resulting_df, output_dir="./")

#Relative abundance plot
vm.stacked_barplot_relative_abundance(resulting_list_with_all_res_dfs, sample_names=["Silva_input", "Taxtab_input"], plot_column="Name and taxonomic classification.genus", title="Relative abundance", saveToFile = True, output_dir="./")

#Fatty acid profile plot
vm.fatty_acid_profile(resulting_df, species = "Achromobacter denitrificans",  figsize=[20, 15], saveToFile=True, output_dir="./")

#Pie plot
vm.pieplot_maker(resulting_df,"Morphology.cell morphology.motility", title="Motility for all species", saveToFile = True, output_dir="./")

#World map
vm.worldmap_maker(resulting_df)

#Frequency plot
vm.freqplot_maker(resulting_df, "Isolation, sampling and environmental information.isolation.country", title="Countries of origin", ylabel_name = "All countries", saveToFile=True, output_dir="./")

#Species list for ALL species in resulting_df, not for a subset
species_list = resulting_df["Name and taxonomic classification.species"].tolist()

#Barplot
vm.barplot_maker(resulting_df, "Sequence information.GC content.GC-content", "GC-content", "GC-content", figsize=[40,10],  species_list=species_list, saveToFile=True, output_dir="./")

#Boxplot
value_dict = vm.access_list_df_objects(resulting_df, "Culture and growth conditions.culture temp", "temperature", temp= 1, species_list=species_list)
vm.boxplot_maker(value_dict, title= "Optimal temperature for species", xlabel_name= "species", figsize=[20, 10], ylabel_name="Opt. Culture Temp. $C^{o}$", saveToFile=True, output_dir="./")
```

            

Raw data

            {
    "_id": null,
    "home_page": "",
    "name": "bacdiving",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": "",
    "keywords": "BacDive,bacteria,phenotype information",
    "author": "",
    "author_email": "Mahima Arunkumar <M.Arunkumar@campus.lmu.de>",
    "download_url": "https://files.pythonhosted.org/packages/5a/10/1ebbe4c47827d4fdea87d2be4334f3f8e51da7944bb0a53649b4127f74a1/bacdiving-1.2.7.tar.gz",
    "platform": null,
    "description": "# Bacdiving\n\nBacdiving accesses and retrieves information from the world's largest database for standardized bacterial phenotypic information: BacDive.\nAdditionally, Bacdiving provides several options to visualize this information.  \n\nBefore using Bacdiving please register (for free) on [BacDive](https://api.bacdive.dsmz.de/).\nUsing your BacDive credentials you can dive into Bacdiving. \n\nIn general, Bacdiving can deal with two types of input data: a taxonomy table (e.g. as extracted from a phyloseq-object) or an input file (.csv, .txt, .tsv) with one query-type per row.\nPossible BacDive query types include: BacDive id, taxonomy (as in species name), 16S sequencing accession id (e.g. SILVA id), culture collection accession id or genome sequence accession id.\nMake sure that the input file should is consistant with only contain one (!) query type for all of its rows.\n\nHere is a minimal example on how to use Bacdiving, please refer to the full [documentation](https://bacdiving.readthedocs.io/en/latest/) for more details:\n\n```\nfrom bacdiving import bacdive_caller as bc\nfrom bacdiving import treeplots_maker as tm\nfrom bacdiving import visualizations_maker as vm\n\n### Retrieve and access information stored on BacDive ###\n\n# Run for a single input from text file for SILVA id queries\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./SILVA_ids.txt input_via_file search_by_16S_seq_accession\"], sample_names=[\"SILVA\"], output_dir=\"./\")\nresulting_df = resulting_list_with_all_res_dfs[0]  \n\n# Run for a single input from text file for taxonomy queries\nresulting_list_with_all_res_dfs = bc.bacdive_call(inputs_list=[\"./taxonomy_ids.txt input_via_file search_by_taxonomy\"], sample_names=[\"taxonomy\"], output_dir=\"./results/\") # if credentials are not given via parameters, you will get prompted\nresulting_df = resulting_list_with_all_res_dfs[0] \n\n# Run for a single input from text file for BacDive id queries\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./bacdive_ids.txt input_via_file search_by_id\"], sample_names=[\"bacdive\"], output_dir=\"./\")\nresulting_df = resulting_list_with_all_res_dfs[0] \n\n# Run for a single input from text file for culture collection queries\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./culture_col_ids.txt input_via_file search_by_culture_collection\"], sample_names=[\"culturecol\"], output_dir=\"./\")\nresulting_df = resulting_list_with_all_res_dfs[0] \n\n# Run for a single input from text file for genome accession queries\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./genome_ids.txt input_via_file search_by_genome_accession\"], sample_names=[\"genomecol\"], output_dir=\"./\") \nresulting_df = resulting_list_with_all_res_dfs[0] \n\n# Run for single taxonomy table input (e.g. as extracted from phyloseq-object)\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./taxtab.tsv taxtable_input\"], sample_names=[\"taxtab\"], print_res_df_ToFile = True, print_access_stats = True, print_flattened_file=True, columns_of_interest=[\"Physiology and metabolism.oxygen tolerance.oxygen tolerance\", \"Culture and growth conditions.culture temp.temperature\", \"Isolation, sampling and environmental information.isolation.origin.country\",\"Morphology.cell morphology.motility\"], output_dir=\"./\") \nresulting_df = resulting_list_with_all_res_dfs[0] \n\n# Run for multiple inputs (of possibly different input types)\nresulting_list_with_all_res_dfs = bc.bacdive_call(bacdive_id=\"<your ID>\", bacdive_password=\"<your password>\", inputs_list=[\"./SILVA_ids.txt input_via_file search_by_16S_seq_accession\", \"./taxonomy_ids.txt input_via_file search_by_taxonomy\", \"./taxtab1.tsv taxtable_input\", \"./taxtab2.tsv taxtable_input\"], sample_names=[\"sample1\", \"sample2\", \"sample3\", \"sample4\"], print_flattened_file=True, columns_of_interest=[\"Physiology and metabolism.oxygen tolerance.oxygen tolerance\", \"Culture and growth conditions.culture temp.temperature\"])\nresulting_df = resulting_list_with_all_res_dfs[1]  # pick your dataframe of interest from this list\n```\n\n```\n### Some possible visualizations ###\n\n#Tree plots\ntm.overview_treeplot(resulting_df, label_name1=\"Temperature\", label_name2=\"Oxygen tolerance\", saveToFile=True, output_dir=\"./\")\ntm.circular_treeplot(resulting_df, output_dir=\"./\")\n\n#Relative abundance plot\nvm.stacked_barplot_relative_abundance(resulting_list_with_all_res_dfs, sample_names=[\"Silva_input\", \"Taxtab_input\"], plot_column=\"Name and taxonomic classification.genus\", title=\"Relative abundance\", saveToFile = True, output_dir=\"./\")\n\n#Fatty acid profile plot\nvm.fatty_acid_profile(resulting_df, species = \"Achromobacter denitrificans\",  figsize=[20, 15], saveToFile=True, output_dir=\"./\")\n\n#Pie plot\nvm.pieplot_maker(resulting_df,\"Morphology.cell morphology.motility\", title=\"Motility for all species\", saveToFile = True, output_dir=\"./\")\n\n#World map\nvm.worldmap_maker(resulting_df)\n\n#Frequency plot\nvm.freqplot_maker(resulting_df, \"Isolation, sampling and environmental information.isolation.country\", title=\"Countries of origin\", ylabel_name = \"All countries\", saveToFile=True, output_dir=\"./\")\n\n#Species list for ALL species in resulting_df, not for a subset\nspecies_list = resulting_df[\"Name and taxonomic classification.species\"].tolist()\n\n#Barplot\nvm.barplot_maker(resulting_df, \"Sequence information.GC content.GC-content\", \"GC-content\", \"GC-content\", figsize=[40,10],  species_list=species_list, saveToFile=True, output_dir=\"./\")\n\n#Boxplot\nvalue_dict = vm.access_list_df_objects(resulting_df, \"Culture and growth conditions.culture temp\", \"temperature\", temp= 1, species_list=species_list)\nvm.boxplot_maker(value_dict, title= \"Optimal temperature for species\", xlabel_name= \"species\", figsize=[20, 10], ylabel_name=\"Opt. Culture Temp. $C^{o}$\", saveToFile=True, output_dir=\"./\")\n```\n",
    "bugtrack_url": null,
    "license": "",
    "summary": "Bacdiving accesses the Bacterial Diversity Metadatabase BacDive and provides various visualization options.",
    "version": "1.2.7",
    "split_keywords": [
        "bacdive",
        "bacteria",
        "phenotype information"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "5b80b773d87350a47b6d88d53ec14546",
                "sha256": "f88095a4b364cf0972d06402364d3327c33e7804925efb446c4b5f5abe4ec69b"
            },
            "downloads": -1,
            "filename": "bacdiving-1.2.7-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "5b80b773d87350a47b6d88d53ec14546",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8",
            "size": 18569,
            "upload_time": "2022-12-13T17:22:16",
            "upload_time_iso_8601": "2022-12-13T17:22:16.154392Z",
            "url": "https://files.pythonhosted.org/packages/02/9f/fac83af5bbf82b0eb4c48a521eaddff8798b74e7ca2dbec25299a72a7f61/bacdiving-1.2.7-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "md5": "966173a90650005568b8691a55a6c29f",
                "sha256": "bdf52b177d0a8e0fbb4d61f5408a193f2dbd903cc2e8b5ed1bdcf6bfa3f459e9"
            },
            "downloads": -1,
            "filename": "bacdiving-1.2.7.tar.gz",
            "has_sig": false,
            "md5_digest": "966173a90650005568b8691a55a6c29f",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 18550,
            "upload_time": "2022-12-13T17:22:18",
            "upload_time_iso_8601": "2022-12-13T17:22:18.654955Z",
            "url": "https://files.pythonhosted.org/packages/5a/10/1ebbe4c47827d4fdea87d2be4334f3f8e51da7944bb0a53649b4127f74a1/bacdiving-1.2.7.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2022-12-13 17:22:18",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "lcname": "bacdiving"
}
        
Elapsed time: 0.04088s