taxpasta


Nametaxpasta JSON
Version 0.6.1 PyPI version JSON
download
home_pageNone
SummaryTAXonomic Profile Aggregation and STAndardisation
upload_time2023-10-04 08:39:25
maintainerNone
docs_urlNone
authorNone
requires_python>=3.8
licenseNone
keywords bioinformatics classification merging metagenomic profile metagenomics profiling standardisation taxonomic profile taxonomy
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            <img alt="taxpasta logo - a green DNA double helix morphing into a fusili pasta shape with the word taxpasta above it" src="docs/assets/images/taxpasta-logo.svg" style="display: block; margin-left: auto; margin-right: auto; width: 50%;" />

<p style="text-align:center">
 <i><b style="color:#1c8f4f">TAX</b>onomic <b style="color:#1c8f4f">P</b>rofile <b style="color:#1c8f4f">A</b>ggregation and <b style="color:#1c8f4f">STA</b>ndardisation</i>
</p>

|            |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Package    | [![Latest PyPI Version](https://img.shields.io/pypi/v/taxpasta.svg)](https://pypi.org/project/taxpasta/) [![Supported Python Versions](https://img.shields.io/pypi/pyversions/taxpasta.svg)](https://pypi.org/project/taxpasta/) [![DOI](https://zenodo.org/badge/499589621.svg)](https://zenodo.org/badge/latestdoi/499589621)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
| Meta       | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Apache-2.0](https://img.shields.io/pypi/l/taxpasta.svg)](LICENSE) [![Code of Conduct](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](.github/CODE_OF_CONDUCT.md) [![Code Style Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) [![pyOpenSci](https://img.shields.io/badge/PyOpenSci-Peer%20Reviewed-success.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHIAAABnCAYAAADcxhZVAAAACXBIWXMAAAWJAAAFiQFtaJ36AAAHXUlEQVR4nO1dy3EbORBtq3SX9oiTtBGIG4HoCCxHYG4EpiIQFYGpCExFYCoCUxEsGcFyTziuGIG2wHpjDrnzwTQaMwMMXhWrVJI4vzfofzc+vL+/k29orcdEdI2PwbjilGsieiOirflZKbX2foERwAuRWmtD2B0+twKHfCGipfkopd4EjhcdRInUWl8S0ZyIvnh6UDscf54IPYYYkVrrERGtiOhC7vJKYQgdJ7F7wJnEQVomkXCeFc47eJAUkRB3pyT+Q0RPRPTs6Umb8y0hzgcPZ9GKB/nvya83EH1vdFixf3l62H8qpRaejh0MJFZkkXib5Y0R6DJfK9NYxoOHlGg9xargd1tP5xq8aCWPRE4KflcVBEhwhC8iv2mtp4TggNZ6IRQYSCjBuccHY8j8lh58O/C1IhMSkQkcJCIjgYSOND7iR4fvGz806VJHOBMJx7/Ib7SC1lrkRhBhKou9bpVSvvzYXqCUSDyYO/h/1x4vVsqhNyT+LPuj1trEfhexpsAKidRaz4ho2mI2ow1cEdGDCVZore9iS4EdGTtmFWqtV7jhmEjM4yrGFNip1bocSATmAmI2GvwiEuJ0SGG0G9xzFNgTiWKpaSw31QDTWBLT2YqMzbCxxUUs+cyMyKK001AQB5EoHg55NWaOPtfX/SR4LZ3hLIKEbxZVYgct8DIHjdCJ3MFlIsf78Bm5agVngde87MNtsDxdXKcoiLzpwXVw8KyUyvxAV9cpCiJDxKNSam9pI9Q2eCJ91uxIYwPDZp6lpCBSTahtiD7wEXwQuYEBws5RnuCtKFNhMhiJxAMkiTQETnylh3IuwhhOfKi63QukiHzOdBYd9NYEDz098BYgQeQmZ3hkOiuKaElIkCAyqyi/hF4McQUGX89zhj5GLl6VUplRswxYjFoTiSqKpcW/topz3MQV86QL3Nwk8KS0FZF9ljpnqEvlIvtu6Kmg2mfQd9XhRGTO1QjZuNnVuUywwtd9Vh3n0G3fe3AtXaFS35UNujip91mVBS7awn6GAEogG+s4pdQHOOqlhcEB4GPOYLMisQZZKHGllGrNKMqC5kMdpvBaQeIEAyyahgCN+P1KRD+01ibFNkdxm1fsicRUDBc3JFQUZk1AooS6uQCpfxup57MSIZ/GGloB1n1JMH7qyWYwquunacP3sUJ/EQkRcy99gp7iSSk1P700IwZbaPEzc/rWWPViOEos4+ZiJtOoj89KqbJEdFtWpxG537E6RUptCidfwVqb1fmHglbrq+P367CFFVlr1AnqR1scTQnjonKEWU3z6F4cSxBpXgiX70ujAzKNpHBq9ZOYRSexIl1a17044h1UIJjSzhG3s7ovRLrA+IJezPoOxpeyxWya6lEBrPQxVsspXj343jd1IcMyJCJrUEamkQJKqWvo9z9g7W8ETnnL6dtMRFqgZmXu/25cN6XUCPre1Qp/aNoaHwKRj0T0G976l64uoo7M3P+toLPv6/63Bv8LWFSh70SaivL9EF+89XdC4osFkDmyuQYEV2qJr8Btk+hP34kscuA7rZeBe2BlJduu4gpY60pnIsvSQEIoCl913j3WxD1wJPPKdlVKrUhf4m6ej0XmCp+DAsjkThCxul+RDVwQBfnhfKBiZKPHLnFTp865SUc1Mgy6ArcSg4h+r4v4SO7Es/C4lVIZNjD5g4BDFKz2ZRUzdtA28CR1PAtsQmubhz3B8TFrxauo1Yo8n4RDXIUd3JJRoFMeOWrgpi5v6W3/SIiRrCOLW8meRzRbD5qiLEYgvrTaj3x2LOOk+xOjRmWET2WOE9jmP55dnC6wYhR1j6uah1tpPYfFte3ame8ROERWitYUNO8GnER4pRRLRHYD8X7MRGQH8DEoP6TxLKVAxKQJ1hUlkUEiCiIZYa/otiocqmjttM/Rx2D8WIgMrQFJXCLEQmRj46HjGa2cc1faAbEQKe6XeQaHyMqXdbArsqvMicNs2cqXNRYiObHYTx1tFcGagFLXFhEFkbhJjsHTRdkIx3+tLQONyf3grMpWgwIwsDiuT22yISYiOQMtrrLd2VsCd+jGcIhEzpIjXmdt6Er0c3AS7M82ifTYIjucksML33lSiNQH5tetVnFsRC6ZhcC3qAIUB8Jx3BeldA7QKaIiEiKIq/O+SJMp0ChrLWG8FV91Ca21ywDAV8xmd8oZwohyGfVyNF68DrESOcL4MS52KFtsvLE29KHrpqrm/NdNzh0lkXSwErkGRoYdjI1lla5CleAYYl0iRfa56UDCaIkkt16LMpjq9tNVMhIeFvGY2yrKGrETGdrA/EZ6MY+oiaSwyHRqSIq+1AMGw7jLlnULvLim1aJfkRl6vDLZ4jSPwRCZAaM8v/bjauSadAdHJB18vYVQlxgH4hvCDZJIOojaqYCv2QTGL535aJUfLJEZ4MzPUILha3ggO1Jki8ETmQErdIKPlEH0gqiQ910cEpEFyIXcxojc2BKb7fmxbruzOhFpCQTiyyoJtj46rJogERkJUn9kJEhExgAi+g/1Qv7cFT6QFQAAAABJRU5ErkJggg==)](https://github.com/pyOpenSci/software-review/issues/84) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05627/status.svg)](https://doi.org/10.21105/joss.05627) |
| Automation | [![GitHub Workflow](https://github.com/taxprofiler/taxpasta/workflows/CI-CD/badge.svg)](https://github.com/taxprofiler/taxpasta/workflows/CI-CD) [![Documentation](https://readthedocs.org/projects/taxpasta/badge/?version=latest)](https://taxpasta.readthedocs.io/en/latest/?badge=latest) [![Code Coverage](https://codecov.io/gh/taxprofiler/taxpasta/branch/dev/graph/badge.svg)](https://codecov.io/gh/taxprofiler/taxpasta)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |

## About

The main purpose of taxpasta is to _standardise_ taxonomic profiles created by a
range of bioinformatics tools. We call those tools taxonomic profilers. They
each come with their own particular tabular output format. Across the profilers,
relative abundances can be reported in read counts, fractions, or percentages,
as well as any number of additional columns with extra information. We therefore
decided to take [the lessons learnt](https://xkcd.com/927/) to heart and provide
our own solution to deal with this pasticcio. With taxpasta you can ingest all
of those formats and, at a minimum, output taxonomy identifiers and their
integer counts. Taxpasta can not only standardise profiles but also _merge_ them
across samples for the _same_ profiler into a single table.

![Diagram of taxpasta functionality. On the left are a range of taxonomic profilers with hetereogeneous output types with a header of taxonomic profiles, then a range of colourful lines leading into a box with a single green line, the taxpasta logo plus three icons for Validation, Standardisation and Conversion, and finally a range of green lines spreading out to a range of file icons with various file types with a header of Standardised Tables.](docs/assets/images/taxpasta_diagram_v0.4.png)

### Supported Taxonomic Profilers

Taxpasta currently supports standardisation and generation of comparable
taxonomic tables for:

-   [Bracken](https://ccb.jhu.edu/software/bracken/)
-   [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
-   [DIAMOND](https://github.com/bbuchfink/diamond)
-   [ganon](https://pirovc.github.io/ganon/)
-   [Kaiju](https://kaiju.binf.ku.dk/)
-   [Kraken2](https://ccb.jhu.edu/software/kraken2/)
-   [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
-   [MEGAN6](http://www-ab.informatik.uni-tuebingen.de/software/megan6)/[MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
-   [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan)
-   [mOTUs](https://motu-tool.org/)

See [supported profilers](https://taxpasta.readthedocs.io/en/latest/supported_profilers/)
for more information.

## Install

It's as simple as:

```shell
pip install taxpasta
```

Taxpasta is also available from the [Bioconda](https://bioconda.github.io/)
channel

```shell
conda install -c bioconda taxpasta
```

and thus automatically generated
[Docker](https://quay.io/repository/biocontainers/taxpasta?tab=tags) and
[Singularity](https://depot.galaxyproject.org/singularity/)
[BioContainers](https://biocontainers.pro/) images also exist.

### Optional Dependencies

Taxpasta supports a number of extras that you can install for additional
features; primarily support for additional output file formats. You can install
them by specifying a comma separated list within square brackets, for example,

```shell
pip install 'taxpasta[rich,biom]'
```

-   `rich` provides [rich](https://rich.readthedocs.io/)-formatted command line output and logging.
-   `arrow` supports writing output tables in [Apache Arrow](https://arrow.apache.org/) format.
-   `parquet` supports writing output tables in [Apache Parquet](https://parquet.apache.org/) format.
-   `biom` supports writing output tables in [BIOM](https://biom-format.org/) format.
-   `ods` supports writing output tables in [ODS](https://www.libreoffice.org/discover/what-is-opendocument/) format.
-   `xlsx` supports writing output tables in [Microsoft Excel](https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247) format.
-   `all` includes all of the above.
-   `dev` provides all tools needed for contributing to taxpasta.

## Usage

The main entry point for taxpasta is its command-line interface (CLI). You can interactively
explore the offered commands through the help system.

```shell
taxpasta -h
```

Taxpasta currently offers two commands corresponding to the main
[use-cases](#about). You can find out more in the [commands'
documentation](https://taxpasta.readthedocs.io/en/latest/commands).

### Standardise

Since the [supported profilers](#supported-taxonomic-profilers) all produce
their own flavour of tabular output, a quick way to normalize such files, is to
standardise them with taxpasta. You need to let taxpasta know what tool the file
was created by. As an example, let's standardise a MetaPhlAn profile. (You can
find an example file in our [test
data](https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt).)

```shell
curl -O https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt
taxpasta standardise -p metaphlan -o standardised.tsv MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt
```

With these minimal arguments, taxpasta produces a two column output consisting of

| taxonomy_id | count |
| ----------- | ----- |
|             |       |

You can count on the second column being integers :wink:. Having such a simple
and tidy table should make your downstream analysis much smoother to start out
with. Please have a look at the full [getting
started](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started)
tutorial for a more thorough introduction.

### Merge

Converting single tables is nice, but hopefully you have many shiny samples to
analyze. The `taxpasta merge` command works similarly to `standardise` except
that you provide multiple profiles as input. You can grab a few more 'MOCK' examples from
our [test
data](https://github.com/taxprofiler/taxpasta/tree/main/tests/data/metaphlan) and
try it out.

```shell
LOCATION=https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan
curl -O "${LOCATION}/MOCK_001_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt"
curl -O "${LOCATION}/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt"
curl -O "${LOCATION}/MOCK_003_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt"

taxpasta merge -p metaphlan -o merged.tsv MOCK_*.metaphlan3_profile.txt
```

The output of the `merge` command has one column for the taxonomic identifier and
one more column for each input profile. Again, have a look at the full
[getting
started](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started)
tutorial for a more thorough introduction.

## Citation

If you use TAXPASTA in your academic work, please cite our [article in the Journal of Open Source Software](https://doi.org/10.21105/joss.05627).

> Beber, M. E., Borry, M., Stamouli, S., & Fellows Yates, J. A. (2023). TAXPASTA: TAXonomic Profile Aggregation and STAndardisation. Journal of Open Source Software, 8(87), 5627. https://doi.org/10.21105/joss.05627

## Acknowledgments

Many thanks to:

-   [nf-core](https://nf-co.re) for bringing the original developers together
-   [Zandra Fagernäs](https://twitter.com/ZandraSelina) for the logo design

## Copyright

-   Copyright © 2022, 2023, Moritz E. Beber, Maxime Borry, James A. Fellows
    Yates, and Sofia Stamouli.
-   Free software distributed under the [Apache Software License
    2.0](https://www.apache.org/licenses/LICENSE-2.0).

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "taxpasta",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": null,
    "keywords": "bioinformatics,classification,merging,metagenomic profile,metagenomics,profiling,standardisation,taxonomic profile,taxonomy",
    "author": null,
    "author_email": "\"Moritz E. Beber\" <moritz.beber@posteo.de>, Maxime Borry <maxime_borry@eva.mpg.de>, \"James A. Fellows Yates\" <jfy133@gmail.com>, Sofia Stamouli <sofia.stamouli@scilifelab.se>",
    "download_url": "https://files.pythonhosted.org/packages/0c/3c/63785feea4337ca3d0a74c6819f6fba913870ea7027137764fba64826aa6/taxpasta-0.6.1.tar.gz",
    "platform": null,
    "description": "<img alt=\"taxpasta logo - a green DNA double helix morphing into a fusili pasta shape with the word taxpasta above it\" src=\"docs/assets/images/taxpasta-logo.svg\" style=\"display: block; margin-left: auto; margin-right: auto; width: 50%;\" />\n\n<p style=\"text-align:center\">\n <i><b style=\"color:#1c8f4f\">TAX</b>onomic <b style=\"color:#1c8f4f\">P</b>rofile <b style=\"color:#1c8f4f\">A</b>ggregation and <b style=\"color:#1c8f4f\">STA</b>ndardisation</i>\n</p>\n\n|            |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |\n| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Package    | [![Latest PyPI Version](https://img.shields.io/pypi/v/taxpasta.svg)](https://pypi.org/project/taxpasta/) [![Supported Python Versions](https://img.shields.io/pypi/pyversions/taxpasta.svg)](https://pypi.org/project/taxpasta/) [![DOI](https://zenodo.org/badge/499589621.svg)](https://zenodo.org/badge/latestdoi/499589621)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |\n| Meta       | [![Project Status: Active \u2013 The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Apache-2.0](https://img.shields.io/pypi/l/taxpasta.svg)](LICENSE) [![Code of Conduct](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](.github/CODE_OF_CONDUCT.md) [![Code Style Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) [![pyOpenSci](https://img.shields.io/badge/PyOpenSci-Peer%20Reviewed-success.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHIAAABnCAYAAADcxhZVAAAACXBIWXMAAAWJAAAFiQFtaJ36AAAHXUlEQVR4nO1dy3EbORBtq3SX9oiTtBGIG4HoCCxHYG4EpiIQFYGpCExFYCoCUxEsGcFyTziuGIG2wHpjDrnzwTQaMwMMXhWrVJI4vzfofzc+vL+/k29orcdEdI2PwbjilGsieiOirflZKbX2foERwAuRWmtD2B0+twKHfCGipfkopd4EjhcdRInUWl8S0ZyIvnh6UDscf54IPYYYkVrrERGtiOhC7vJKYQgdJ7F7wJnEQVomkXCeFc47eJAUkRB3pyT+Q0RPRPTs6Umb8y0hzgcPZ9GKB/nvya83EH1vdFixf3l62H8qpRaejh0MJFZkkXib5Y0R6DJfK9NYxoOHlGg9xargd1tP5xq8aCWPRE4KflcVBEhwhC8iv2mtp4TggNZ6IRQYSCjBuccHY8j8lh58O/C1IhMSkQkcJCIjgYSOND7iR4fvGz806VJHOBMJx7/Ib7SC1lrkRhBhKou9bpVSvvzYXqCUSDyYO/h/1x4vVsqhNyT+LPuj1trEfhexpsAKidRaz4ho2mI2ow1cEdGDCVZore9iS4EdGTtmFWqtV7jhmEjM4yrGFNip1bocSATmAmI2GvwiEuJ0SGG0G9xzFNgTiWKpaSw31QDTWBLT2YqMzbCxxUUs+cyMyKK001AQB5EoHg55NWaOPtfX/SR4LZ3hLIKEbxZVYgct8DIHjdCJ3MFlIsf78Bm5agVngde87MNtsDxdXKcoiLzpwXVw8KyUyvxAV9cpCiJDxKNSam9pI9Q2eCJ91uxIYwPDZp6lpCBSTahtiD7wEXwQuYEBws5RnuCtKFNhMhiJxAMkiTQETnylh3IuwhhOfKi63QukiHzOdBYd9NYEDz098BYgQeQmZ3hkOiuKaElIkCAyqyi/hF4McQUGX89zhj5GLl6VUplRswxYjFoTiSqKpcW/topz3MQV86QL3Nwk8KS0FZF9ljpnqEvlIvtu6Kmg2mfQd9XhRGTO1QjZuNnVuUywwtd9Vh3n0G3fe3AtXaFS35UNujip91mVBS7awn6GAEogG+s4pdQHOOqlhcEB4GPOYLMisQZZKHGllGrNKMqC5kMdpvBaQeIEAyyahgCN+P1KRD+01ibFNkdxm1fsicRUDBc3JFQUZk1AooS6uQCpfxup57MSIZ/GGloB1n1JMH7qyWYwquunacP3sUJ/EQkRcy99gp7iSSk1P700IwZbaPEzc/rWWPViOEos4+ZiJtOoj89KqbJEdFtWpxG537E6RUptCidfwVqb1fmHglbrq+P367CFFVlr1AnqR1scTQnjonKEWU3z6F4cSxBpXgiX70ujAzKNpHBq9ZOYRSexIl1a17044h1UIJjSzhG3s7ovRLrA+IJezPoOxpeyxWya6lEBrPQxVsspXj343jd1IcMyJCJrUEamkQJKqWvo9z9g7W8ETnnL6dtMRFqgZmXu/25cN6XUCPre1Qp/aNoaHwKRj0T0G976l64uoo7M3P+toLPv6/63Bv8LWFSh70SaivL9EF+89XdC4osFkDmyuQYEV2qJr8Btk+hP34kscuA7rZeBe2BlJduu4gpY60pnIsvSQEIoCl913j3WxD1wJPPKdlVKrUhf4m6ej0XmCp+DAsjkThCxul+RDVwQBfnhfKBiZKPHLnFTp865SUc1Mgy6ArcSg4h+r4v4SO7Es/C4lVIZNjD5g4BDFKz2ZRUzdtA28CR1PAtsQmubhz3B8TFrxauo1Yo8n4RDXIUd3JJRoFMeOWrgpi5v6W3/SIiRrCOLW8meRzRbD5qiLEYgvrTaj3x2LOOk+xOjRmWET2WOE9jmP55dnC6wYhR1j6uah1tpPYfFte3ame8ROERWitYUNO8GnER4pRRLRHYD8X7MRGQH8DEoP6TxLKVAxKQJ1hUlkUEiCiIZYa/otiocqmjttM/Rx2D8WIgMrQFJXCLEQmRj46HjGa2cc1faAbEQKe6XeQaHyMqXdbArsqvMicNs2cqXNRYiObHYTx1tFcGagFLXFhEFkbhJjsHTRdkIx3+tLQONyf3grMpWgwIwsDiuT22yISYiOQMtrrLd2VsCd+jGcIhEzpIjXmdt6Er0c3AS7M82ifTYIjucksML33lSiNQH5tetVnFsRC6ZhcC3qAIUB8Jx3BeldA7QKaIiEiKIq/O+SJMp0ChrLWG8FV91Ca21ywDAV8xmd8oZwohyGfVyNF68DrESOcL4MS52KFtsvLE29KHrpqrm/NdNzh0lkXSwErkGRoYdjI1lla5CleAYYl0iRfa56UDCaIkkt16LMpjq9tNVMhIeFvGY2yrKGrETGdrA/EZ6MY+oiaSwyHRqSIq+1AMGw7jLlnULvLim1aJfkRl6vDLZ4jSPwRCZAaM8v/bjauSadAdHJB18vYVQlxgH4hvCDZJIOojaqYCv2QTGL535aJUfLJEZ4MzPUILha3ggO1Jki8ETmQErdIKPlEH0gqiQ910cEpEFyIXcxojc2BKb7fmxbruzOhFpCQTiyyoJtj46rJogERkJUn9kJEhExgAi+g/1Qv7cFT6QFQAAAABJRU5ErkJggg==)](https://github.com/pyOpenSci/software-review/issues/84) [![DOI](https://joss.theoj.org/papers/10.21105/joss.05627/status.svg)](https://doi.org/10.21105/joss.05627) |\n| Automation | [![GitHub Workflow](https://github.com/taxprofiler/taxpasta/workflows/CI-CD/badge.svg)](https://github.com/taxprofiler/taxpasta/workflows/CI-CD) [![Documentation](https://readthedocs.org/projects/taxpasta/badge/?version=latest)](https://taxpasta.readthedocs.io/en/latest/?badge=latest) [![Code Coverage](https://codecov.io/gh/taxprofiler/taxpasta/branch/dev/graph/badge.svg)](https://codecov.io/gh/taxprofiler/taxpasta)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |\n\n## About\n\nThe main purpose of taxpasta is to _standardise_ taxonomic profiles created by a\nrange of bioinformatics tools. We call those tools taxonomic profilers. They\neach come with their own particular tabular output format. Across the profilers,\nrelative abundances can be reported in read counts, fractions, or percentages,\nas well as any number of additional columns with extra information. We therefore\ndecided to take [the lessons learnt](https://xkcd.com/927/) to heart and provide\nour own solution to deal with this pasticcio. With taxpasta you can ingest all\nof those formats and, at a minimum, output taxonomy identifiers and their\ninteger counts. Taxpasta can not only standardise profiles but also _merge_ them\nacross samples for the _same_ profiler into a single table.\n\n![Diagram of taxpasta functionality. On the left are a range of taxonomic profilers with hetereogeneous output types with a header of taxonomic profiles, then a range of colourful lines leading into a box with a single green line, the taxpasta logo plus three icons for Validation, Standardisation and Conversion, and finally a range of green lines spreading out to a range of file icons with various file types with a header of Standardised Tables.](docs/assets/images/taxpasta_diagram_v0.4.png)\n\n### Supported Taxonomic Profilers\n\nTaxpasta currently supports standardisation and generation of comparable\ntaxonomic tables for:\n\n-   [Bracken](https://ccb.jhu.edu/software/bracken/)\n-   [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)\n-   [DIAMOND](https://github.com/bbuchfink/diamond)\n-   [ganon](https://pirovc.github.io/ganon/)\n-   [Kaiju](https://kaiju.binf.ku.dk/)\n-   [Kraken2](https://ccb.jhu.edu/software/kraken2/)\n-   [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)\n-   [MEGAN6](http://www-ab.informatik.uni-tuebingen.de/software/megan6)/[MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)\n-   [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan)\n-   [mOTUs](https://motu-tool.org/)\n\nSee [supported profilers](https://taxpasta.readthedocs.io/en/latest/supported_profilers/)\nfor more information.\n\n## Install\n\nIt's as simple as:\n\n```shell\npip install taxpasta\n```\n\nTaxpasta is also available from the [Bioconda](https://bioconda.github.io/)\nchannel\n\n```shell\nconda install -c bioconda taxpasta\n```\n\nand thus automatically generated\n[Docker](https://quay.io/repository/biocontainers/taxpasta?tab=tags) and\n[Singularity](https://depot.galaxyproject.org/singularity/)\n[BioContainers](https://biocontainers.pro/) images also exist.\n\n### Optional Dependencies\n\nTaxpasta supports a number of extras that you can install for additional\nfeatures; primarily support for additional output file formats. You can install\nthem by specifying a comma separated list within square brackets, for example,\n\n```shell\npip install 'taxpasta[rich,biom]'\n```\n\n-   `rich` provides [rich](https://rich.readthedocs.io/)-formatted command line output and logging.\n-   `arrow` supports writing output tables in [Apache Arrow](https://arrow.apache.org/) format.\n-   `parquet` supports writing output tables in [Apache Parquet](https://parquet.apache.org/) format.\n-   `biom` supports writing output tables in [BIOM](https://biom-format.org/) format.\n-   `ods` supports writing output tables in [ODS](https://www.libreoffice.org/discover/what-is-opendocument/) format.\n-   `xlsx` supports writing output tables in [Microsoft Excel](https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247) format.\n-   `all` includes all of the above.\n-   `dev` provides all tools needed for contributing to taxpasta.\n\n## Usage\n\nThe main entry point for taxpasta is its command-line interface (CLI). You can interactively\nexplore the offered commands through the help system.\n\n```shell\ntaxpasta -h\n```\n\nTaxpasta currently offers two commands corresponding to the main\n[use-cases](#about). You can find out more in the [commands'\ndocumentation](https://taxpasta.readthedocs.io/en/latest/commands).\n\n### Standardise\n\nSince the [supported profilers](#supported-taxonomic-profilers) all produce\ntheir own flavour of tabular output, a quick way to normalize such files, is to\nstandardise them with taxpasta. You need to let taxpasta know what tool the file\nwas created by. As an example, let's standardise a MetaPhlAn profile. (You can\nfind an example file in our [test\ndata](https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt).)\n\n```shell\ncurl -O https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt\ntaxpasta standardise -p metaphlan -o standardised.tsv MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt\n```\n\nWith these minimal arguments, taxpasta produces a two column output consisting of\n\n| taxonomy_id | count |\n| ----------- | ----- |\n|             |       |\n\nYou can count on the second column being integers :wink:. Having such a simple\nand tidy table should make your downstream analysis much smoother to start out\nwith. Please have a look at the full [getting\nstarted](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started)\ntutorial for a more thorough introduction.\n\n### Merge\n\nConverting single tables is nice, but hopefully you have many shiny samples to\nanalyze. The `taxpasta merge` command works similarly to `standardise` except\nthat you provide multiple profiles as input. You can grab a few more 'MOCK' examples from\nour [test\ndata](https://github.com/taxprofiler/taxpasta/tree/main/tests/data/metaphlan) and\ntry it out.\n\n```shell\nLOCATION=https://raw.githubusercontent.com/taxprofiler/taxpasta/main/tests/data/metaphlan\ncurl -O \"${LOCATION}/MOCK_001_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt\"\ncurl -O \"${LOCATION}/MOCK_002_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt\"\ncurl -O \"${LOCATION}/MOCK_003_Illumina_Hiseq_3000_se_metaphlan3-db.metaphlan3_profile.txt\"\n\ntaxpasta merge -p metaphlan -o merged.tsv MOCK_*.metaphlan3_profile.txt\n```\n\nThe output of the `merge` command has one column for the taxonomic identifier and\none more column for each input profile. Again, have a look at the full\n[getting\nstarted](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started)\ntutorial for a more thorough introduction.\n\n## Citation\n\nIf you use TAXPASTA in your academic work, please cite our [article in the Journal of Open Source Software](https://doi.org/10.21105/joss.05627).\n\n> Beber, M. E., Borry, M., Stamouli, S., & Fellows Yates, J. A. (2023). TAXPASTA: TAXonomic Profile Aggregation and STAndardisation. Journal of Open Source Software, 8(87), 5627. https://doi.org/10.21105/joss.05627\n\n## Acknowledgments\n\nMany thanks to:\n\n-   [nf-core](https://nf-co.re) for bringing the original developers together\n-   [Zandra Fagern\u00e4s](https://twitter.com/ZandraSelina) for the logo design\n\n## Copyright\n\n-   Copyright \u00a9 2022, 2023, Moritz E. Beber, Maxime Borry, James A. Fellows\n    Yates, and Sofia Stamouli.\n-   Free software distributed under the [Apache Software License\n    2.0](https://www.apache.org/licenses/LICENSE-2.0).\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "TAXonomic Profile Aggregation and STAndardisation",
    "version": "0.6.1",
    "project_urls": {
        "Bug Tracker": "https://github.com/taxprofiler/taxpasta/issues",
        "Documentation": "https://taxpasta.readthedocs.io",
        "Download": "https://pypi.org/project/taxpasta/#files",
        "Homepage": "https://github.com/taxprofiler/taxpasta",
        "Source Code": "https://github.com/taxprofiler/taxpasta"
    },
    "split_keywords": [
        "bioinformatics",
        "classification",
        "merging",
        "metagenomic profile",
        "metagenomics",
        "profiling",
        "standardisation",
        "taxonomic profile",
        "taxonomy"
    ],
    "urls": [
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "1f32a449bd54ee940da59b0c6554ace87cbbd280b36402dccf0151c6591ed91a",
                "md5": "b89807c97da3b321995224dd800f9a46",
                "sha256": "01282943f6a4c90e920828f9b6d5929c79802c86763a3b7d66d8abe89e27f857"
            },
            "downloads": -1,
            "filename": "taxpasta-0.6.1-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "b89807c97da3b321995224dd800f9a46",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8",
            "size": 135743,
            "upload_time": "2023-10-04T08:39:26",
            "upload_time_iso_8601": "2023-10-04T08:39:26.555365Z",
            "url": "https://files.pythonhosted.org/packages/1f/32/a449bd54ee940da59b0c6554ace87cbbd280b36402dccf0151c6591ed91a/taxpasta-0.6.1-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "0c3c63785feea4337ca3d0a74c6819f6fba913870ea7027137764fba64826aa6",
                "md5": "fcdb6c691204d31e288a448a76e770d6",
                "sha256": "468141b00bb286e5b52fb47e7046af68d4f849eecb209a3ec77b0ce4da5dcf01"
            },
            "downloads": -1,
            "filename": "taxpasta-0.6.1.tar.gz",
            "has_sig": false,
            "md5_digest": "fcdb6c691204d31e288a448a76e770d6",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 44282,
            "upload_time": "2023-10-04T08:39:25",
            "upload_time_iso_8601": "2023-10-04T08:39:25.445682Z",
            "url": "https://files.pythonhosted.org/packages/0c/3c/63785feea4337ca3d0a74c6819f6fba913870ea7027137764fba64826aa6/taxpasta-0.6.1.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-10-04 08:39:25",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "taxprofiler",
    "github_project": "taxpasta",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "tox": true,
    "lcname": "taxpasta"
}
        
Elapsed time: 0.12933s