RWexptest


NameRWexptest JSON
Version 0.0.14 PyPI version JSON
download
home_page
SummaryA small example package
upload_time2023-11-09 04:32:57
maintainer
docs_urlNone
author
requires_python>=3.7
license
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # RWexptest

This is a simple example package.

## Pseqpa

这个工具包是用于对蛋白质序列进行简单处理的工作,其中涉及的主要函数功能有:

### excel_csv_to_fasta

```python
from RWexptest import Pseqpa

# 指定输入和输出文件夹路径
input_folder = "<需要转换的目标文件的路径>"
output_folder = "<保存路径>"
entry_column_name = "Entry"  # 请替换为您的entry列的名称
sequence_column_name = "Sequence"  # 请替换为您的sequence列的名称

# 调用函数并传递输入和输出文件夹路径
excel_csv_to_fasta(input_folder, output_folder, entry_column_name, sequence_column_name)
```

### process_fasta_files

```python
from RWexptest import Pseqpa

# 指定输入目录、输出目录、批次大小以及最小和最大蛋白质序列长度
input_directory = "<fasta格式文件路径>"
output_directory = "<处理后的保存路径>"
batch_size = 500 #将fasta格式蛋白质序列按500个进行一次划分
min_sequence_length = 10 #筛选蛋白质序列最低不能小于10个氨基酸
max_sequence_length = 6000 #筛选蛋白质序列最高不能超过6000个氨基酸

# 处理fasta文件并将其分成批次,只保留符合长度条件的序列
process_fasta_files(input_directory, output_directory, batch_size, min_sequence_length, max_sequence_length)
```

### create_blast_database(需要你的终端环境已经配置好了NCBI Blast工具)

```python
from RWexptest import Pseqpa

# 指定构建数据库对象、数据库位置和数据库类型
input_fasta_path = "<your_train_data_path/train_data.fasta>" #路径不能有空格,路径必须是英文
output_db_path = "<Blast_database_path/Train_protein_seq_database>" #路径不能有空格,路径必须是英文
dbtype = "prot"  # 蛋白质数据库

# 构建数据库
result_message = create_blast_database(input_fasta_path, output_db_path, dbtype)
```

### run_blastp(需要你的终端环境已经配置好了NCBI Blast工具)

```python
from RWexptest import Pseqpa

# 指定balst对象、数据库、结果目录和结果格式
query_fasta_path = "<your_test_data_path/test_data.fasta>" #路径不能有空格,路径必须是英文
blast_db_path = "<Blast_database_path/Train_protein_seq_database>" #路径不能有空格,路径必须是英文
output_file_path = "<your_save_path/test_data_blast_results.xml>" #路径不能有空格,路径必须是英文
custom_outfmt = 5  # 自定义输出格式

#进行同源性blast
result_message = run_blastp(query_fasta_path, blast_db_path, output_file_path, custom_outfmt)
```

### execute_blast_workflow(需要你的终端环境已经配置好了NCBI Blast工具)

```python
from RWexptest import Pseqpa

# 指定相关路径
input_fasta_path = "<your_train_data_path/train_data.fasta>" #路径不能有空格,路径必须是英文
output_db_path = "<Blast_database_path/Train_protein_seq_database>" #路径不能有空格,路径必须是英文
query_fasta_path = "<your_test_data_path/test_data.fasta>" #路径不能有空格,路径必须是英文
custom_outfmt = 5 #路径不能有空格,路径必须是英文
dbtype = "prot" #或者
xml_result_path = "<your_save_path/test_data_blast_results.xml>"  # 自定义XML结果的保存位置,路径不能有空格,路径必须是英文

# 一次性完成数据库的创建和blast工作并获得xml文件
result = execute_blast_workflow(input_fasta_path, output_db_path, dbtype, query_fasta_path, custom_outfmt, xml_result_path)
```

### parse_blast_xml_to_excel

```python
import pandas as pd
from Bio import SearchIO
from RWexptest import Pseqpa

# 调用函数并传递输入XML文件和输出Excel文件的路径
input_xml = '<经过NCBI Blast处理后获得的xml文件路径/result.xml>' #路径不能有空格,路劲必须是英文
output_excel = '<保存路径/reuslt.xlsx>' #路径不能有空格,路劲必须是英文

parse_blast_xml_to_excel(input_xml, output_excel)
```


            

Raw data

            {
    "_id": null,
    "home_page": "",
    "name": "RWexptest",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.7",
    "maintainer_email": "",
    "keywords": "",
    "author": "",
    "author_email": "RuiwangW <2219312248@qq.com>",
    "download_url": "https://files.pythonhosted.org/packages/68/36/326fbdde3e000f3c4fe996f602a9d85a41eed487ec84109c6d843cce4192/RWexptest-0.0.14.tar.gz",
    "platform": null,
    "description": "# RWexptest\r\n\r\nThis is a simple example package.\r\n\r\n## Pseqpa\r\n\r\n\u8fd9\u4e2a\u5de5\u5177\u5305\u662f\u7528\u4e8e\u5bf9\u86cb\u767d\u8d28\u5e8f\u5217\u8fdb\u884c\u7b80\u5355\u5904\u7406\u7684\u5de5\u4f5c\uff0c\u5176\u4e2d\u6d89\u53ca\u7684\u4e3b\u8981\u51fd\u6570\u529f\u80fd\u6709\uff1a\r\n\r\n### excel_csv_to_fasta\r\n\r\n```python\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u6307\u5b9a\u8f93\u5165\u548c\u8f93\u51fa\u6587\u4ef6\u5939\u8def\u5f84\r\ninput_folder = \"<\u9700\u8981\u8f6c\u6362\u7684\u76ee\u6807\u6587\u4ef6\u7684\u8def\u5f84>\"\r\noutput_folder = \"<\u4fdd\u5b58\u8def\u5f84>\"\r\nentry_column_name = \"Entry\"  # \u8bf7\u66ff\u6362\u4e3a\u60a8\u7684entry\u5217\u7684\u540d\u79f0\r\nsequence_column_name = \"Sequence\"  # \u8bf7\u66ff\u6362\u4e3a\u60a8\u7684sequence\u5217\u7684\u540d\u79f0\r\n\r\n# \u8c03\u7528\u51fd\u6570\u5e76\u4f20\u9012\u8f93\u5165\u548c\u8f93\u51fa\u6587\u4ef6\u5939\u8def\u5f84\r\nexcel_csv_to_fasta(input_folder, output_folder, entry_column_name, sequence_column_name)\r\n```\r\n\r\n### process_fasta_files\r\n\r\n```python\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u6307\u5b9a\u8f93\u5165\u76ee\u5f55\u3001\u8f93\u51fa\u76ee\u5f55\u3001\u6279\u6b21\u5927\u5c0f\u4ee5\u53ca\u6700\u5c0f\u548c\u6700\u5927\u86cb\u767d\u8d28\u5e8f\u5217\u957f\u5ea6\r\ninput_directory = \"<fasta\u683c\u5f0f\u6587\u4ef6\u8def\u5f84>\"\r\noutput_directory = \"<\u5904\u7406\u540e\u7684\u4fdd\u5b58\u8def\u5f84>\"\r\nbatch_size = 500 #\u5c06fasta\u683c\u5f0f\u86cb\u767d\u8d28\u5e8f\u5217\u6309500\u4e2a\u8fdb\u884c\u4e00\u6b21\u5212\u5206\r\nmin_sequence_length = 10 #\u7b5b\u9009\u86cb\u767d\u8d28\u5e8f\u5217\u6700\u4f4e\u4e0d\u80fd\u5c0f\u4e8e10\u4e2a\u6c28\u57fa\u9178\r\nmax_sequence_length = 6000 #\u7b5b\u9009\u86cb\u767d\u8d28\u5e8f\u5217\u6700\u9ad8\u4e0d\u80fd\u8d85\u8fc76000\u4e2a\u6c28\u57fa\u9178\r\n\r\n# \u5904\u7406fasta\u6587\u4ef6\u5e76\u5c06\u5176\u5206\u6210\u6279\u6b21\uff0c\u53ea\u4fdd\u7559\u7b26\u5408\u957f\u5ea6\u6761\u4ef6\u7684\u5e8f\u5217\r\nprocess_fasta_files(input_directory, output_directory, batch_size, min_sequence_length, max_sequence_length)\r\n```\r\n\r\n### create_blast_database\uff08\u9700\u8981\u4f60\u7684\u7ec8\u7aef\u73af\u5883\u5df2\u7ecf\u914d\u7f6e\u597d\u4e86NCBI Blast\u5de5\u5177\uff09\r\n\r\n```python\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u6307\u5b9a\u6784\u5efa\u6570\u636e\u5e93\u5bf9\u8c61\u3001\u6570\u636e\u5e93\u4f4d\u7f6e\u548c\u6570\u636e\u5e93\u7c7b\u578b\r\ninput_fasta_path = \"<your_train_data_path/train_data.fasta>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\noutput_db_path = \"<Blast_database_path/Train_protein_seq_database>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\ndbtype = \"prot\"  # \u86cb\u767d\u8d28\u6570\u636e\u5e93\r\n\r\n# \u6784\u5efa\u6570\u636e\u5e93\r\nresult_message = create_blast_database(input_fasta_path, output_db_path, dbtype)\r\n```\r\n\r\n### run_blastp\uff08\u9700\u8981\u4f60\u7684\u7ec8\u7aef\u73af\u5883\u5df2\u7ecf\u914d\u7f6e\u597d\u4e86NCBI Blast\u5de5\u5177\uff09\r\n\r\n```python\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u6307\u5b9abalst\u5bf9\u8c61\u3001\u6570\u636e\u5e93\u3001\u7ed3\u679c\u76ee\u5f55\u548c\u7ed3\u679c\u683c\u5f0f\r\nquery_fasta_path = \"<your_test_data_path/test_data.fasta>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\nblast_db_path = \"<Blast_database_path/Train_protein_seq_database>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\noutput_file_path = \"<your_save_path/test_data_blast_results.xml>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\ncustom_outfmt = 5  # \u81ea\u5b9a\u4e49\u8f93\u51fa\u683c\u5f0f\r\n\r\n#\u8fdb\u884c\u540c\u6e90\u6027blast\r\nresult_message = run_blastp(query_fasta_path, blast_db_path, output_file_path, custom_outfmt)\r\n```\r\n\r\n### execute_blast_workflow\uff08\u9700\u8981\u4f60\u7684\u7ec8\u7aef\u73af\u5883\u5df2\u7ecf\u914d\u7f6e\u597d\u4e86NCBI Blast\u5de5\u5177\uff09\r\n\r\n```python\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u6307\u5b9a\u76f8\u5173\u8def\u5f84\r\ninput_fasta_path = \"<your_train_data_path/train_data.fasta>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\noutput_db_path = \"<Blast_database_path/Train_protein_seq_database>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\nquery_fasta_path = \"<your_test_data_path/test_data.fasta>\" #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\ncustom_outfmt = 5 #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\ndbtype = \"prot\" #\u6216\u8005\r\nxml_result_path = \"<your_save_path/test_data_blast_results.xml>\"  # \u81ea\u5b9a\u4e49XML\u7ed3\u679c\u7684\u4fdd\u5b58\u4f4d\u7f6e\uff0c\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u5f84\u5fc5\u987b\u662f\u82f1\u6587\r\n\r\n# \u4e00\u6b21\u6027\u5b8c\u6210\u6570\u636e\u5e93\u7684\u521b\u5efa\u548cblast\u5de5\u4f5c\u5e76\u83b7\u5f97xml\u6587\u4ef6\r\nresult = execute_blast_workflow(input_fasta_path, output_db_path, dbtype, query_fasta_path, custom_outfmt, xml_result_path)\r\n```\r\n\r\n### parse_blast_xml_to_excel\r\n\r\n```python\r\nimport pandas as pd\r\nfrom Bio import SearchIO\r\nfrom RWexptest import Pseqpa\r\n\r\n# \u8c03\u7528\u51fd\u6570\u5e76\u4f20\u9012\u8f93\u5165XML\u6587\u4ef6\u548c\u8f93\u51faExcel\u6587\u4ef6\u7684\u8def\u5f84\r\ninput_xml = '<\u7ecf\u8fc7NCBI Blast\u5904\u7406\u540e\u83b7\u5f97\u7684xml\u6587\u4ef6\u8def\u5f84/result.xml>' #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u52b2\u5fc5\u987b\u662f\u82f1\u6587\r\noutput_excel = '<\u4fdd\u5b58\u8def\u5f84/reuslt.xlsx>' #\u8def\u5f84\u4e0d\u80fd\u6709\u7a7a\u683c\uff0c\u8def\u52b2\u5fc5\u987b\u662f\u82f1\u6587\r\n\r\nparse_blast_xml_to_excel(input_xml, output_excel)\r\n```\r\n\r\n",
    "bugtrack_url": null,
    "license": "",
    "summary": "A small example package",
    "version": "0.0.14",
    "project_urls": {
        "Bug Tracker": "https://github.com/pypa/sampleproject/issues",
        "Homepage": "https://github.com/pypa/sampleproject"
    },
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "e871b2f4ed6df3da8292d577528ac16b369668d28483cb37fcc72cb37ac36dcc",
                "md5": "cff3d3ab17dae6eea63cbb4216484439",
                "sha256": "c1a5051496177ce2ef575321d4fb17d9b5f830ba1a9c2127daaea0c83d7603ef"
            },
            "downloads": -1,
            "filename": "RWexptest-0.0.14-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "cff3d3ab17dae6eea63cbb4216484439",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.7",
            "size": 16391,
            "upload_time": "2023-11-09T04:32:55",
            "upload_time_iso_8601": "2023-11-09T04:32:55.754175Z",
            "url": "https://files.pythonhosted.org/packages/e8/71/b2f4ed6df3da8292d577528ac16b369668d28483cb37fcc72cb37ac36dcc/RWexptest-0.0.14-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "6836326fbdde3e000f3c4fe996f602a9d85a41eed487ec84109c6d843cce4192",
                "md5": "8c01df8b004076d1c28f08e820c0b5de",
                "sha256": "c21b8f1126730e3532e41c0df289fefa624db62977ea29cf438562a4a772e0a2"
            },
            "downloads": -1,
            "filename": "RWexptest-0.0.14.tar.gz",
            "has_sig": false,
            "md5_digest": "8c01df8b004076d1c28f08e820c0b5de",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.7",
            "size": 14002,
            "upload_time": "2023-11-09T04:32:57",
            "upload_time_iso_8601": "2023-11-09T04:32:57.749771Z",
            "url": "https://files.pythonhosted.org/packages/68/36/326fbdde3e000f3c4fe996f602a9d85a41eed487ec84109c6d843cce4192/RWexptest-0.0.14.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-11-09 04:32:57",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "pypa",
    "github_project": "sampleproject",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "tox": true,
    "lcname": "rwexptest"
}
        
Elapsed time: 0.26670s