moltx


Namemoltx JSON
Version 2.0.0 PyPI version JSON
download
home_pageNone
SummaryMolcule Transformer X Model
upload_time2024-09-22 01:48:54
maintainerNone
docs_urlNone
authorMichael Ding
requires_python>=3.9
licenseNone
keywords molcule ai deep learning transformer
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # MolTx

[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)
[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)

## Installation

```
pip install moltx
```

## Usage

### Pretrain

```python
import torch

# prepare dataset
from moltx import datasets, tokenizers, models
ds = datasets.AdaMR2(device=torch.device('cpu'))
generic_smiles = ["C=CC=CC=C", "...."]
canonical_smiles = ["c1cccc1c", "..."]
tgt, out = ds(generic_smiles, canonical_smiles)

# train
import torch.nn as nn
from torch.optim import Adam
from moltx import nets, models

## use custom config
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR2.CONFIG_BASE
model = models.AdaMR2(conf)

crt = nn.CrossEntropyLoss(ignore_index=0)
optim = Adam(model.parameters(), lr=0.1)

optim.zero_grad()
pred = model(tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

# save ckpt
torch.save(model.state_dict(), '/path/to/adamr.ckpt')
```


### Finetune


```python
# Classifier finetune
from moltx import datasets

seq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles
ds = datasets.AdaMR2Classifier(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
labels = [0, 1]
tgt, out = ds(smiles, labels, seq_len)

from moltx import nets, models
pretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Classifier(num_classes=2, conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss()
optim = Adam(model.parameters(), lr=0.1)

optim.zero_grad()
pred = model(tgt)
loss = crt(pred, out)
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/classifier.ckpt')

# Regression finetune
ds = datasets.AdaMR2Regression(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
values = [0.23, 0.12]
tgt, out = ds(smiles, values, seq_len)

model = models.AdaMR2Regression(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.MSELoss()

optim.zero_grad()
pred = model(tgt)
loss = crt(pred, out)
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/regression.ckpt')

# Distributed Generation
ds = datasets.AdaMR2DistGeneration(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
tgt, out = ds(smiles, seq_len)

model = models.AdaMR2DistGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)

optim.zero_grad()
pred = model(tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/distgen.ckpt')

# Goal Generation
ds = datasets.AdaMR2GoalGeneration(device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
goals = [0.23, 0.12]
tgt, out = ds(smiles, goals, seq_len)

model = models.AdaMR2GoalGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/goalgen.ckpt')
```

### Inference

```python
from moltx import nets, models, pipelines, tokenizers
# AdaMR
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2(conf)
model.load_ckpt('/path/to/adamr.ckpt')
pipeline = pipelines.AdaMR2(model)
pipeline("C=CC=CC=C")
# {"smiles": ["c1ccccc1"], probabilities: [0.9]}

# Classifier
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Classifier(2, conf)
model.load_ckpt('/path/to/classifier.ckpt')
pipeline = pipelines.AdaMR2Classifier(model)
pipeline("C=CC=CC=C")
# {"label": [1], "probability": [0.67]}

# Regression
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2Regression(2, conf)
model.load_ckpt('/path/to/regression.ckpt')
pipeline = pipelines.AdaMR2Regression(model)
pipeline("C=CC=CC=C")
# {"value": [0.467], "probability": [0.67]}

# DistGeneration
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2DistGeneration(conf)
model.load_ckpt('/path/to/distgen.ckpt')
pipeline = pipelines.AdaMR2DistGeneration(model)
pipeline(k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}

# GoalGeneration
conf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR2GoalGeneration(conf)
model.load_ckpt('/path/to/goalgen.ckpt')
pipeline = pipelines.AdaMRGoalGeneration(model)
pipeline(0.48, k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}
```

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "moltx",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.9",
    "maintainer_email": null,
    "keywords": "molcule, AI, deep learning, transformer",
    "author": "Michael Ding",
    "author_email": "yandy.ding@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/c1/02/3d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d/moltx-2.0.0.tar.gz",
    "platform": null,
    "description": "# MolTx\n\n[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)\n[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)\n![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)\n\n## Installation\n\n```\npip install moltx\n```\n\n## Usage\n\n### Pretrain\n\n```python\nimport torch\n\n# prepare dataset\nfrom moltx import datasets, tokenizers, models\nds = datasets.AdaMR2(device=torch.device('cpu'))\ngeneric_smiles = [\"C=CC=CC=C\", \"....\"]\ncanonical_smiles = [\"c1cccc1c\", \"...\"]\ntgt, out = ds(generic_smiles, canonical_smiles)\n\n# train\nimport torch.nn as nn\nfrom torch.optim import Adam\nfrom moltx import nets, models\n\n## use custom config\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR2.CONFIG_BASE\nmodel = models.AdaMR2(conf)\n\ncrt = nn.CrossEntropyLoss(ignore_index=0)\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\n# save ckpt\ntorch.save(model.state_dict(), '/path/to/adamr.ckpt')\n```\n\n\n### Finetune\n\n\n```python\n# Classifier finetune\nfrom moltx import datasets\n\nseq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles\nds = datasets.AdaMR2Classifier(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nlabels = [0, 1]\ntgt, out = ds(smiles, labels, seq_len)\n\nfrom moltx import nets, models\npretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Classifier(num_classes=2, conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss()\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/classifier.ckpt')\n\n# Regression finetune\nds = datasets.AdaMR2Regression(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nvalues = [0.23, 0.12]\ntgt, out = ds(smiles, values, seq_len)\n\nmodel = models.AdaMR2Regression(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.MSELoss()\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/regression.ckpt')\n\n# Distributed Generation\nds = datasets.AdaMR2DistGeneration(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\ntgt, out = ds(smiles, seq_len)\n\nmodel = models.AdaMR2DistGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/distgen.ckpt')\n\n# Goal Generation\nds = datasets.AdaMR2GoalGeneration(device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\ngoals = [0.23, 0.12]\ntgt, out = ds(smiles, goals, seq_len)\n\nmodel = models.AdaMR2GoalGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/goalgen.ckpt')\n```\n\n### Inference\n\n```python\nfrom moltx import nets, models, pipelines, tokenizers\n# AdaMR\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2(conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\npipeline = pipelines.AdaMR2(model)\npipeline(\"C=CC=CC=C\")\n# {\"smiles\": [\"c1ccccc1\"], probabilities: [0.9]}\n\n# Classifier\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Classifier(2, conf)\nmodel.load_ckpt('/path/to/classifier.ckpt')\npipeline = pipelines.AdaMR2Classifier(model)\npipeline(\"C=CC=CC=C\")\n# {\"label\": [1], \"probability\": [0.67]}\n\n# Regression\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2Regression(2, conf)\nmodel.load_ckpt('/path/to/regression.ckpt')\npipeline = pipelines.AdaMR2Regression(model)\npipeline(\"C=CC=CC=C\")\n# {\"value\": [0.467], \"probability\": [0.67]}\n\n# DistGeneration\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2DistGeneration(conf)\nmodel.load_ckpt('/path/to/distgen.ckpt')\npipeline = pipelines.AdaMR2DistGeneration(model)\npipeline(k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n\n# GoalGeneration\nconf = models.AdaMR2.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR2GoalGeneration(conf)\nmodel.load_ckpt('/path/to/goalgen.ckpt')\npipeline = pipelines.AdaMRGoalGeneration(model)\npipeline(0.48, k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n```\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "Molcule Transformer X Model",
    "version": "2.0.0",
    "project_urls": null,
    "split_keywords": [
        "molcule",
        " ai",
        " deep learning",
        " transformer"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "7796ed0db05546e141f3b97f4de9028994de665221e6db612d1b68e659c6902e",
                "md5": "e4f6459a1023cc3c4b78baddc4132120",
                "sha256": "446ecd4c3e215f4e25c3313ab89b9ee1520e76e35518c5ca430534b8a6aa3e00"
            },
            "downloads": -1,
            "filename": "moltx-2.0.0-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "e4f6459a1023cc3c4b78baddc4132120",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.9",
            "size": 41457,
            "upload_time": "2024-09-22T01:48:52",
            "upload_time_iso_8601": "2024-09-22T01:48:52.780666Z",
            "url": "https://files.pythonhosted.org/packages/77/96/ed0db05546e141f3b97f4de9028994de665221e6db612d1b68e659c6902e/moltx-2.0.0-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "c1023d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d",
                "md5": "a41e0ce97a1ef52225669fb77cbcd37d",
                "sha256": "9a1dc7c141d3ee9a94e0e1cd18bd55420e96c7258bbd8fcbf1baeb134cac7d50"
            },
            "downloads": -1,
            "filename": "moltx-2.0.0.tar.gz",
            "has_sig": false,
            "md5_digest": "a41e0ce97a1ef52225669fb77cbcd37d",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.9",
            "size": 43683,
            "upload_time": "2024-09-22T01:48:54",
            "upload_time_iso_8601": "2024-09-22T01:48:54.271046Z",
            "url": "https://files.pythonhosted.org/packages/c1/02/3d833b7c097b2bc45de14bd7922cd35ecbba876e1e17b97a8711744be34d/moltx-2.0.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-09-22 01:48:54",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "moltx"
}
        
Elapsed time: 0.42773s