# MolTx
[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)
[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)
## Installation
```
pip install moltx
```
## Usage
### Pretrain
```python
import torch
# prepare dataset
from moltx import datasets, tokenizers, models
ds = datasets.AdaMR2(device=torch.device('cpu'))
generic_smiles = ["C=CC=CC=C", "...."]
canonical_smiles = ["c1cccc1c", "..."]
tgt, out = ds(generic_smiles, canonical_smiles)
# train
import torch.nn as nn
from torch.optim import Adam
from moltx import nets, models
## use custom config
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR2.CONFIG_BASE
model = models.AdaMR2(conf)
crt = nn.CrossEntropyLoss(ignore_index=0)
optim = Adam(model.parameters(), lr=0.1)
optim.zero_grad()
pred = model(tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()
# save ckpt
torch.save(model.state_dict(), '/path/to/adamr.ckpt')
```
### Finetune
```python
# Classifier finetune
from moltx import datasets
seq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles
ds = datasets.AdaMR2Classifier(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
labels = [0, 1]
tgt, out = ds(smiles, labels, seq_len)
from moltx import nets, models
pretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Classifier(num_classes=2, conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss()
optim = Adam(model.parameters(), lr=0.1)
optim.zero_grad()
pred = model(tgt)
loss = crt(pred, out)
loss.backward()
optim.step()
torch.save(model.state_dict(), '/path/to/classifier.ckpt')
# Regression finetune
ds = datasets.AdaMR2Regression(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
values = [0.23, 0.12]
tgt, out = ds(smiles, values, seq_len)
model = models.AdaMR2Regression(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.MSELoss()
optim.zero_grad()
pred = model(tgt)
loss = crt(pred, out)
loss.backward()
optim.step()
torch.save(model.state_dict(), '/path/to/regression.ckpt')
# Distributed Generation
ds = datasets.AdaMR2DistGeneration(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
tgt, out = ds(smiles, seq_len)
model = models.AdaMR2DistGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)
optim.zero_grad()
pred = model(tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()
torch.save(model.state_dict(), '/path/to/distgen.ckpt')
# Goal Generation
ds = datasets.AdaMR2GoalGeneration(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
goals = [0.23, 0.12]
tgt, out = ds(smiles, goals, seq_len)
model = models.AdaMR2GoalGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)
optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()
torch.save(model.state_dict(), '/path/to/goalgen.ckpt')
```
### Inference
```python
from moltx import nets, models, pipelines, tokenizers
# AdaMR
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2(conf)
model.load_ckpt('/path/to/adamr.ckpt')
pipeline = pipelines.AdaMR2(model)
pipeline("C=CC=CC=C")
# {"smiles": ["c1ccccc1"], probabilities: [0.9]}
# Classifier
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Classifier(2, conf)
model.load_ckpt('/path/to/classifier.ckpt')
pipeline = pipelines.AdaMR2Classifier(model)
pipeline("C=CC=CC=C")
# {"label": [1], "probability": [0.67]}
# Regression
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Regression(2, conf)
model.load_ckpt('/path/to/regression.ckpt')
pipeline = pipelines.AdaMR2Regression(model)
pipeline("C=CC=CC=C")
# {"value": [0.467], "probability": [0.67]}
# DistGeneration
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2DistGeneration(conf)
model.load_ckpt('/path/to/distgen.ckpt')
pipeline = pipelines.AdaMR2DistGeneration(model)
pipeline(k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}
# GoalGeneration
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2GoalGeneration(conf)
model.load_ckpt('/path/to/goalgen.ckpt')
pipeline = pipelines.AdaMRGoalGeneration(model)
pipeline(0.48, k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}
```
Raw data
{
"_id": null,
"home_page": null,
"name": "moltx",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.9",
"maintainer_email": null,
"keywords": "molcule, AI, deep learning, transformer",
"author": "Michael Ding",
"author_email": "yandy.ding@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/c1/02/3d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d/moltx-2.0.0.tar.gz",
"platform": null,
"description": "# MolTx\n\n[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)\n[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)\n![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)\n\n## Installation\n\n```\npip install moltx\n```\n\n## Usage\n\n### Pretrain\n\n```python\nimport torch\n\n# prepare dataset\nfrom moltx import datasets, tokenizers, models\nds = datasets.AdaMR2(device=torch.device('cpu'))\ngeneric_smiles = [\"C=CC=CC=C\", \"....\"]\ncanonical_smiles = [\"c1cccc1c\", \"...\"]\ntgt, out = ds(generic_smiles, canonical_smiles)\n\n# train\nimport torch.nn as nn\nfrom torch.optim import Adam\nfrom moltx import nets, models\n\n## use custom config\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR2.CONFIG_BASE\nmodel = models.AdaMR2(conf)\n\ncrt = nn.CrossEntropyLoss(ignore_index=0)\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\n# save ckpt\ntorch.save(model.state_dict(), '/path/to/adamr.ckpt')\n```\n\n\n### Finetune\n\n\n```python\n# Classifier finetune\nfrom moltx import datasets\n\nseq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles\nds = datasets.AdaMR2Classifier(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nlabels = [0, 1]\ntgt, out = ds(smiles, labels, seq_len)\n\nfrom moltx import nets, models\npretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Classifier(num_classes=2, conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss()\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/classifier.ckpt')\n\n# Regression finetune\nds = datasets.AdaMR2Regression(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nvalues = [0.23, 0.12]\ntgt, out = ds(smiles, values, seq_len)\n\nmodel = models.AdaMR2Regression(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.MSELoss()\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/regression.ckpt')\n\n# Distributed Generation\nds = datasets.AdaMR2DistGeneration(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\ntgt, out = ds(smiles, seq_len)\n\nmodel = models.AdaMR2DistGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/distgen.ckpt')\n\n# Goal Generation\nds = datasets.AdaMR2GoalGeneration(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\ngoals = [0.23, 0.12]\ntgt, out = ds(smiles, goals, seq_len)\n\nmodel = models.AdaMR2GoalGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/goalgen.ckpt')\n```\n\n### Inference\n\n```python\nfrom moltx import nets, models, pipelines, tokenizers\n# AdaMR\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2(conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\npipeline = pipelines.AdaMR2(model)\npipeline(\"C=CC=CC=C\")\n# {\"smiles\": [\"c1ccccc1\"], probabilities: [0.9]}\n\n# Classifier\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Classifier(2, conf)\nmodel.load_ckpt('/path/to/classifier.ckpt')\npipeline = pipelines.AdaMR2Classifier(model)\npipeline(\"C=CC=CC=C\")\n# {\"label\": [1], \"probability\": [0.67]}\n\n# Regression\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Regression(2, conf)\nmodel.load_ckpt('/path/to/regression.ckpt')\npipeline = pipelines.AdaMR2Regression(model)\npipeline(\"C=CC=CC=C\")\n# {\"value\": [0.467], \"probability\": [0.67]}\n\n# DistGeneration\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2DistGeneration(conf)\nmodel.load_ckpt('/path/to/distgen.ckpt')\npipeline = pipelines.AdaMR2DistGeneration(model)\npipeline(k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n\n# GoalGeneration\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2GoalGeneration(conf)\nmodel.load_ckpt('/path/to/goalgen.ckpt')\npipeline = pipelines.AdaMRGoalGeneration(model)\npipeline(0.48, k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n```\n",
"bugtrack_url": null,
"license": null,
"summary": "Molcule Transformer X Model",
"version": "2.0.0",
"project_urls": null,
"split_keywords": [
"molcule",
" ai",
" deep learning",
" transformer"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "7796ed0db05546e141f3b97f4de9028994de665221e6db612d1b68e659c6902e",
"md5": "e4f6459a1023cc3c4b78baddc4132120",
"sha256": "446ecd4c3e215f4e25c3313ab89b9ee1520e76e35518c5ca430534b8a6aa3e00"
},
"downloads": -1,
"filename": "moltx-2.0.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "e4f6459a1023cc3c4b78baddc4132120",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.9",
"size": 41457,
"upload_time": "2024-09-22T01:48:52",
"upload_time_iso_8601": "2024-09-22T01:48:52.780666Z",
"url": "https://files.pythonhosted.org/packages/77/96/ed0db05546e141f3b97f4de9028994de665221e6db612d1b68e659c6902e/moltx-2.0.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "c1023d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d",
"md5": "a41e0ce97a1ef52225669fb77cbcd37d",
"sha256": "9a1dc7c141d3ee9a94e0e1cd18bd55420e96c7258bbd8fcbf1baeb134cac7d50"
},
"downloads": -1,
"filename": "moltx-2.0.0.tar.gz",
"has_sig": false,
"md5_digest": "a41e0ce97a1ef52225669fb77cbcd37d",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.9",
"size": 43683,
"upload_time": "2024-09-22T01:48:54",
"upload_time_iso_8601": "2024-09-22T01:48:54.271046Z",
"url": "https://files.pythonhosted.org/packages/c1/02/3d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d/moltx-2.0.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-09-22 01:48:54",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "moltx"
}