### GENCODEGenes
This package loads genes from GENCODE GTF/GFF files, groups transcripts by gene,
and provides methods for transcripts, so you can find exon coordinates, CDS
distances and sequences.
### Install
```sh
pip install gencodegenes
```
### Usage
```py
from gencodegenes import Gencode
gencode = Gencode(GTF_PATH)
# full function arguments are Gencode(gtf_path, fasta_path=None, coding_only=True)
# - fasta_path: pass in path to fasta file to get gene transcripts with sequence
# - coding_only: pass in False to include all transcripts, not just protein coding
# get gene by HGNC symbol
gene = gencode['OR5A1']
transcripts = gene.transcripts
canonical = gene.canonical # picks MANE transcript if available, if none named
# as MANE, picks the one tagged as appris_principal
# (or longest CDS if multiple), if none tagged, picks
# the longest protein coding, if none protein coding,
# picks the longest cDNA
gene.start, gene.end, gene.chrom, gene.strand, gene.symbol # other attributes available
# find gene nearest a genomic position, or overlapping a genomic region
gencode.nearest('chr1', 1000000)
gencode.in_region('chr1', 1000000, 2000000)
# and the transcript has a bunch of methods
tx = gene.canonical
tx.in_exons(pos) # check if pos in exons
tx.in_coding_region(pos) # check if pos in CDS
tx.get_coding_distance(pos) # get distance in CDS to CDS start
tx.get_closest_exon(pos) # find exon closest to position
tx.get_position_on_chrom(cds_pos) # convert CDS pos to genomic pos
tx.get_codon_info(pos) # get info about codon for a site
tx.get_codon_number_for_cds_pos(cds_pos) # convert CDS pos to codon number
tx.translate(seq) # translate DNA to AA (if opened with Fasta)
# the transcript also has associated data fields
tx.name # transcript ID
tx.chrom # transcript chromosome
tx.start # transcript start (TSS)
tx.end # transcript end
tx.cds_start # CDS start position
tx.cds_end # CDS end position
tx.type # transcript type e.g. protein_coding
tx.strand # strand (+ or -)
tx.exons # list of exon coordinates
tx.cds # list of CDS coordinates
tx.cds_sequence # get cDNA sequence (if Gencode was opened with fasta)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/jeremymcrae/gencodegenes",
"name": "gencodegenes",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": null,
"keywords": null,
"author": "Jeremy McRae",
"author_email": "jeremy.mcrae@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/42/5c/d626484c824dadc2d6c1185a633bbce4f362fb62e5dd15da989e5d613d30/gencodegenes-1.1.4.tar.gz",
"platform": null,
"description": "\n### GENCODEGenes\n\nThis package loads genes from GENCODE GTF/GFF files, groups transcripts by gene, \nand provides methods for transcripts, so you can find exon coordinates, CDS \ndistances and sequences.\n\n### Install\n```sh\npip install gencodegenes\n```\n\n### Usage\n\n```py\nfrom gencodegenes import Gencode\n\ngencode = Gencode(GTF_PATH)\n# full function arguments are Gencode(gtf_path, fasta_path=None, coding_only=True)\n# - fasta_path: pass in path to fasta file to get gene transcripts with sequence\n# - coding_only: pass in False to include all transcripts, not just protein coding\n\n# get gene by HGNC symbol\ngene = gencode['OR5A1']\ntranscripts = gene.transcripts\ncanonical = gene.canonical # picks MANE transcript if available, if none named\n # as MANE, picks the one tagged as appris_principal\n # (or longest CDS if multiple), if none tagged, picks\n # the longest protein coding, if none protein coding,\n # picks the longest cDNA \ngene.start, gene.end, gene.chrom, gene.strand, gene.symbol # other attributes available\n\n\n# find gene nearest a genomic position, or overlapping a genomic region\ngencode.nearest('chr1', 1000000)\ngencode.in_region('chr1', 1000000, 2000000)\n\n# and the transcript has a bunch of methods\ntx = gene.canonical\ntx.in_exons(pos) # check if pos in exons\ntx.in_coding_region(pos) # check if pos in CDS\ntx.get_coding_distance(pos) # get distance in CDS to CDS start\ntx.get_closest_exon(pos) # find exon closest to position\ntx.get_position_on_chrom(cds_pos) # convert CDS pos to genomic pos\ntx.get_codon_info(pos) # get info about codon for a site\ntx.get_codon_number_for_cds_pos(cds_pos) # convert CDS pos to codon number\ntx.translate(seq) # translate DNA to AA (if opened with Fasta)\n\n# the transcript also has associated data fields\ntx.name # transcript ID\ntx.chrom # transcript chromosome\ntx.start # transcript start (TSS)\ntx.end # transcript end\ntx.cds_start # CDS start position\ntx.cds_end # CDS end position \ntx.type # transcript type e.g. protein_coding\ntx.strand # strand (+ or -)\ntx.exons # list of exon coordinates\ntx.cds # list of CDS coordinates\ntx.cds_sequence # get cDNA sequence (if Gencode was opened with fasta)\n\n```\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Package to load genes from GENCODE GTF files",
"version": "1.1.4",
"project_urls": {
"Homepage": "https://github.com/jeremymcrae/gencodegenes"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "00f8b396813fc1d33488c5bdfd003329feae17289f9a2ecfecc5c8eb5aeaa56c",
"md5": "c36f54837e43a6fe44d1a7647581cb94",
"sha256": "bfa44bca41d90ae9592c069e4280587098e292f8f61eae6b1350a8712623b073"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp310-cp310-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "c36f54837e43a6fe44d1a7647581cb94",
"packagetype": "bdist_wheel",
"python_version": "cp310",
"requires_python": ">=3.8",
"size": 551492,
"upload_time": "2024-09-20T20:09:02",
"upload_time_iso_8601": "2024-09-20T20:09:02.989744Z",
"url": "https://files.pythonhosted.org/packages/00/f8/b396813fc1d33488c5bdfd003329feae17289f9a2ecfecc5c8eb5aeaa56c/gencodegenes-1.1.4-cp310-cp310-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "24c101d512b835adfbc5c7514c121ca6495534f7488d9d1e5f4ecf2c5801d4c0",
"md5": "85b9a8ef18a88ad16af08dcab6607c56",
"sha256": "627bc416afe6f351e38843f979b69f9b71cce3df7a04a7c13d2acef7c4589038"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "85b9a8ef18a88ad16af08dcab6607c56",
"packagetype": "bdist_wheel",
"python_version": "cp310",
"requires_python": ">=3.8",
"size": 2993899,
"upload_time": "2024-09-20T20:09:05",
"upload_time_iso_8601": "2024-09-20T20:09:05.084456Z",
"url": "https://files.pythonhosted.org/packages/24/c1/01d512b835adfbc5c7514c121ca6495534f7488d9d1e5f4ecf2c5801d4c0/gencodegenes-1.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "3a627685242a117f6024c95de0954edce36638f65550925dd98d664471e82adf",
"md5": "f3026745165971470be802229facb54f",
"sha256": "310d6310b3ee67df75ced41c82e29e5471d6f6002c0a2070749786969c353931"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp310-cp310-win_amd64.whl",
"has_sig": false,
"md5_digest": "f3026745165971470be802229facb54f",
"packagetype": "bdist_wheel",
"python_version": "cp310",
"requires_python": ">=3.8",
"size": 547831,
"upload_time": "2024-09-20T20:09:07",
"upload_time_iso_8601": "2024-09-20T20:09:07.273228Z",
"url": "https://files.pythonhosted.org/packages/3a/62/7685242a117f6024c95de0954edce36638f65550925dd98d664471e82adf/gencodegenes-1.1.4-cp310-cp310-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "0a6802899476850fa52e5e6bfeb2112953290d6f7df8286eaa8ec37fff5986cf",
"md5": "fa002cd9fea22bd6c42b16ad0b8d3195",
"sha256": "7217e6b064f5163711f96b2ca1bb07c0eba38676664d7402aedb74236162e02b"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp311-cp311-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "fa002cd9fea22bd6c42b16ad0b8d3195",
"packagetype": "bdist_wheel",
"python_version": "cp311",
"requires_python": ">=3.8",
"size": 551847,
"upload_time": "2024-09-20T20:09:09",
"upload_time_iso_8601": "2024-09-20T20:09:09.016880Z",
"url": "https://files.pythonhosted.org/packages/0a/68/02899476850fa52e5e6bfeb2112953290d6f7df8286eaa8ec37fff5986cf/gencodegenes-1.1.4-cp311-cp311-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "635a1bdb83c350864a7b29e7a96d7eb27fcc8d595ca24bb498951f46e67f6d45",
"md5": "380af460e9ca2c2a353bc7565c023f76",
"sha256": "1824c61b58783a0461d384e16beedb8dcd80f477135d3e039712a41c9b9c3eb9"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "380af460e9ca2c2a353bc7565c023f76",
"packagetype": "bdist_wheel",
"python_version": "cp311",
"requires_python": ">=3.8",
"size": 3047446,
"upload_time": "2024-09-20T20:09:11",
"upload_time_iso_8601": "2024-09-20T20:09:11.043565Z",
"url": "https://files.pythonhosted.org/packages/63/5a/1bdb83c350864a7b29e7a96d7eb27fcc8d595ca24bb498951f46e67f6d45/gencodegenes-1.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "e4b65da6f638546b397370b6255924c2921d5ccba8347079507ae94955ccebe8",
"md5": "3c13e2119625e9fa44e12887194f4f36",
"sha256": "523d5df52a9e28b111227538b6b06a631c2a8781c8f2db9a8f3a2737385556d7"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp311-cp311-win_amd64.whl",
"has_sig": false,
"md5_digest": "3c13e2119625e9fa44e12887194f4f36",
"packagetype": "bdist_wheel",
"python_version": "cp311",
"requires_python": ">=3.8",
"size": 548479,
"upload_time": "2024-09-20T20:09:12",
"upload_time_iso_8601": "2024-09-20T20:09:12.588338Z",
"url": "https://files.pythonhosted.org/packages/e4/b6/5da6f638546b397370b6255924c2921d5ccba8347079507ae94955ccebe8/gencodegenes-1.1.4-cp311-cp311-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "09872ff5159066904c6e613ecd9e4f024be4caa8fd13da09c800b3c6b7b43e9e",
"md5": "91807c0f7d9b6ad9cfcfe8b2a5843af5",
"sha256": "d52ec516ecad724e25935de1d8aeb535801c23ddfe364e902d34dca2efa16bc5"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp312-cp312-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "91807c0f7d9b6ad9cfcfe8b2a5843af5",
"packagetype": "bdist_wheel",
"python_version": "cp312",
"requires_python": ">=3.8",
"size": 552209,
"upload_time": "2024-09-20T20:09:14",
"upload_time_iso_8601": "2024-09-20T20:09:14.383510Z",
"url": "https://files.pythonhosted.org/packages/09/87/2ff5159066904c6e613ecd9e4f024be4caa8fd13da09c800b3c6b7b43e9e/gencodegenes-1.1.4-cp312-cp312-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "c491b4e7f1d8c5bcba4938bba36c14d0736375cf0968566e81be4973904d9ac6",
"md5": "af028785f63debc490abc1366a661906",
"sha256": "5787762e9f9192b5e927a346917922f19666f1b42f455fa24009500a9c5ef53b"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "af028785f63debc490abc1366a661906",
"packagetype": "bdist_wheel",
"python_version": "cp312",
"requires_python": ">=3.8",
"size": 3006576,
"upload_time": "2024-09-20T20:09:16",
"upload_time_iso_8601": "2024-09-20T20:09:16.422626Z",
"url": "https://files.pythonhosted.org/packages/c4/91/b4e7f1d8c5bcba4938bba36c14d0736375cf0968566e81be4973904d9ac6/gencodegenes-1.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "52343f051e7bcddffe801b51e911ea9f58d064ee7f32c68ca23b7b6619b17a74",
"md5": "9529469f0c19053ff628ec73a44c6dce",
"sha256": "082ae9410c76bffeace1d80b8a3d3f395a2889d6224f8bd5c345967efd02bfc5"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp312-cp312-win_amd64.whl",
"has_sig": false,
"md5_digest": "9529469f0c19053ff628ec73a44c6dce",
"packagetype": "bdist_wheel",
"python_version": "cp312",
"requires_python": ">=3.8",
"size": 548811,
"upload_time": "2024-09-20T20:09:18",
"upload_time_iso_8601": "2024-09-20T20:09:18.460159Z",
"url": "https://files.pythonhosted.org/packages/52/34/3f051e7bcddffe801b51e911ea9f58d064ee7f32c68ca23b7b6619b17a74/gencodegenes-1.1.4-cp312-cp312-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "d615b5d55987662b8803d287fbc8a6a38cf6ec6074c170f932cd8f01410d2749",
"md5": "ea308e2f6b31b135be3a8ef54dee9015",
"sha256": "f7bbc50ed4670c3e1c211651623b0ccba873d77b931fe054bda725ef257ee694"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp38-cp38-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "ea308e2f6b31b135be3a8ef54dee9015",
"packagetype": "bdist_wheel",
"python_version": "cp38",
"requires_python": ">=3.8",
"size": 552801,
"upload_time": "2024-09-20T20:09:20",
"upload_time_iso_8601": "2024-09-20T20:09:20.283905Z",
"url": "https://files.pythonhosted.org/packages/d6/15/b5d55987662b8803d287fbc8a6a38cf6ec6074c170f932cd8f01410d2749/gencodegenes-1.1.4-cp38-cp38-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "d3acf03bd544051572296278cfcd98b98c546a7c6b91431aa3a2be4fc960d81d",
"md5": "7715bda58bf55ce2b995dac9224c2108",
"sha256": "9701985a27eb19d0fe28250c4ef3cb39622825b1d4bae89840b98011a7af64a4"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "7715bda58bf55ce2b995dac9224c2108",
"packagetype": "bdist_wheel",
"python_version": "cp38",
"requires_python": ">=3.8",
"size": 3022841,
"upload_time": "2024-09-20T20:09:21",
"upload_time_iso_8601": "2024-09-20T20:09:21.802624Z",
"url": "https://files.pythonhosted.org/packages/d3/ac/f03bd544051572296278cfcd98b98c546a7c6b91431aa3a2be4fc960d81d/gencodegenes-1.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "b35e9bf1055e6e957c70f6aeb53423bd17a5c4c6f25b40e7eee62a3768cfc8c6",
"md5": "75bf41bb6a0e72da8c74470377823829",
"sha256": "bfad7fc4c37361ea2c87b562db401b003543f0de8ee1bc9b8c5b9593200b9ad9"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp38-cp38-win_amd64.whl",
"has_sig": false,
"md5_digest": "75bf41bb6a0e72da8c74470377823829",
"packagetype": "bdist_wheel",
"python_version": "cp38",
"requires_python": ">=3.8",
"size": 548427,
"upload_time": "2024-09-20T20:09:23",
"upload_time_iso_8601": "2024-09-20T20:09:23.302338Z",
"url": "https://files.pythonhosted.org/packages/b3/5e/9bf1055e6e957c70f6aeb53423bd17a5c4c6f25b40e7eee62a3768cfc8c6/gencodegenes-1.1.4-cp38-cp38-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "8a3f64570af7614acc7888f7d557bdf87c150fb0d5c68ba5880951ad5caaf045",
"md5": "6ab5e15cf75b78143ef9d32b67e44c05",
"sha256": "22c975fbad11bc5ab7e482cd450579e4f7a33e7f8c3ec6f30297a07636276f70"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp39-cp39-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "6ab5e15cf75b78143ef9d32b67e44c05",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.8",
"size": 552241,
"upload_time": "2024-09-20T20:09:25",
"upload_time_iso_8601": "2024-09-20T20:09:25.054826Z",
"url": "https://files.pythonhosted.org/packages/8a/3f/64570af7614acc7888f7d557bdf87c150fb0d5c68ba5880951ad5caaf045/gencodegenes-1.1.4-cp39-cp39-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "110982963c3b0c6904811b635a20a9e118cd7b287859e78dc5c2623a94fabb71",
"md5": "73fb2f350d1a7aa33be7b32ecb8d46d6",
"sha256": "96f0ede95707b9144814cfcd459323bf49936d30a0fae9f316b015ee44566eae"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "73fb2f350d1a7aa33be7b32ecb8d46d6",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.8",
"size": 2998074,
"upload_time": "2024-09-20T20:09:27",
"upload_time_iso_8601": "2024-09-20T20:09:27.038648Z",
"url": "https://files.pythonhosted.org/packages/11/09/82963c3b0c6904811b635a20a9e118cd7b287859e78dc5c2623a94fabb71/gencodegenes-1.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "b5dd7a2c72cb74678bc2c148dcfbb5718ca7b447d34d7c0146c9df0dd5397772",
"md5": "637083966bb5334754c10cd6f91074c7",
"sha256": "4695b3aa1e74b6d619b1007805e1a8f4bf31e8191decec825dd5030d057be5f1"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4-cp39-cp39-win_amd64.whl",
"has_sig": false,
"md5_digest": "637083966bb5334754c10cd6f91074c7",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.8",
"size": 548041,
"upload_time": "2024-09-20T20:09:28",
"upload_time_iso_8601": "2024-09-20T20:09:28.957875Z",
"url": "https://files.pythonhosted.org/packages/b5/dd/7a2c72cb74678bc2c148dcfbb5718ca7b447d34d7c0146c9df0dd5397772/gencodegenes-1.1.4-cp39-cp39-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "425cd626484c824dadc2d6c1185a633bbce4f362fb62e5dd15da989e5d613d30",
"md5": "67a37a208649b624b10e0dfa15887607",
"sha256": "a5096260bedff7fd703642c11e77ba0f37edbfa71e3b4f57ba6d5b98175ba582"
},
"downloads": -1,
"filename": "gencodegenes-1.1.4.tar.gz",
"has_sig": false,
"md5_digest": "67a37a208649b624b10e0dfa15887607",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 314091,
"upload_time": "2024-09-20T20:09:30",
"upload_time_iso_8601": "2024-09-20T20:09:30.065024Z",
"url": "https://files.pythonhosted.org/packages/42/5c/d626484c824dadc2d6c1185a633bbce4f362fb62e5dd15da989e5d613d30/gencodegenes-1.1.4.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-09-20 20:09:30",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "jeremymcrae",
"github_project": "gencodegenes",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "gencodegenes"
}