moltx

Name	moltx JSON
Version	1.0.2 JSON
	download
home_page	None
Summary	Molcule Transformer X Model
upload_time	2024-05-13 09:51:24
maintainer	None
docs_url	None
author	Michael Ding
requires_python	>=3.8
license	None
keywords	molcule ai deep learning transformer
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            # MolTx

[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)
[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)

## Installation

```
pip install moltx
```

## Usage

### Pretrain

```python
import torch

# prepare dataset
from moltx import datasets, tokenizers, models
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Pretrain)
ds = datasets.AdaMR(tokenizer=tk, device=torch.device('cpu'))
generic_smiles = ["C=CC=CC=C", "...."]
canonical_smiles = ["c1cccc1c", "..."]
src, tgt, out = ds(generic_smiles, canonical_smiles)

# train
import torch.nn as nn
from torch.optim import Adam
from moltx import nets, models

## use custom config
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR(conf)

crt = nn.CrossEntropyLoss(ignore_index=0)
optim = Adam(model.parameters(), lr=0.1)

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

# save ckpt
torch.save(model.state_dict(), '/path/to/adamr.ckpt')
```


### Finetune


```python
# Classifier finetune
from moltx import datasets, tokenizers
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Prediction)

seq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles
ds = datasets.AdaMRClassifier(tokenizer=tk, device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
labels = [0, 1]
src, tgt, out = ds(smiles, labels, seq_len)

from moltx import nets, models
pretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMRClassifier(num_classes=2, conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss()
optim = Adam(model.parameters(), lr=0.1)

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred, out)
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/classifier.ckpt')

# Regression finetune
ds = datasets.AdaMRRegression(tokenizer=tk, device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
values = [0.23, 0.12]
src, tgt, out = ds(smiles, values, seq_len)

model = models.AdaMRRegression(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.MSELoss()

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred, out)
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/regression.ckpt')

# Distributed Generation
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)
ds = datasets.AdaMRDistGeneration(tokenizer=tk, device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
src, tgt, out = ds(smiles, seq_len)

model = models.AdaMRDistGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/distgen.ckpt')

# Goal Generation
ds = datasets.AdaMRGoalGeneration(tokenizer=tk, device=torch.device('cpu'))
smiles = ["c1cccc1c", "CC[N+](C)(C)Cc1ccccc1Br"]
goals = [0.23, 0.12]
src, tgt, out = ds(smiles, goals, seq_len)

model = models.AdaMRGoalGeneration(conf=pretrained_conf)
model.load_ckpt('/path/to/adamr.ckpt')
crt = nn.CrossEntropyLoss(ignore_index=0)

optim.zero_grad()
pred = model(src, tgt)
loss = crt(pred.view(-1, pred.size(-1)), out.view(-1))
loss.backward()
optim.step()

torch.save(model.state_dict(), '/path/to/goalgen.ckpt')
```

### Inference

```python
from moltx import nets, models, pipelines, tokenizers
# AdaMR
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMR(conf)
model.load_ckpt('/path/to/adamr.ckpt')
pipeline = pipelines.AdaMR(tk, model)
pipeline("C=CC=CC=C")
# {"smiles": ["c1ccccc1"], probabilities: [0.9]}

# Classifier
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Prediction)
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMRClassifier(2, conf)
model.load_ckpt('/path/to/classifier.ckpt')
pipeline = pipelines.AdaMRClassifier(tk, model)
pipeline("C=CC=CC=C")
# {"label": [1], "probability": [0.67]}

# Regression
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMRRegression(2, conf)
model.load_ckpt('/path/to/regression.ckpt')
pipeline = pipelines.AdaMRRegression(tk, model)
pipeline("C=CC=CC=C")
# {"value": [0.467], "probability": [0.67]}

# DistGeneration
tk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMRDistGeneration(conf)
model.load_ckpt('/path/to/distgen.ckpt')
pipeline = pipelines.AdaMRDistGeneration(tk, model)
pipeline(k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}

# GoalGeneration
conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE
model = models.AdaMRGoalGeneration(conf)
model.load_ckpt('/path/to/goalgen.ckpt')
pipeline = pipelines.AdaMRGoalGeneration(tk, model)
pipeline(0.48, k=2)
# {"smiles": ["c1ccccc1", "...."], probabilities: [0.9, 0.1]}
```

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "moltx",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": null,
    "keywords": "molcule, AI, deep learning, transformer",
    "author": "Michael Ding",
    "author_email": "yandy.ding@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/35/ba/0d73d4cd646d539e14cbdb919e0fdf06ed9682171a9714e49c922619debd/moltx-1.0.2.tar.gz",
    "platform": null,
    "description": "# MolTx\n\n[![CI](https://github.com/js-ish/MolTx/actions/workflows/test.yml/badge.svg)](https://github.com/js-ish/MolTx/actions/workflows/test.yml?query=branch%3Amain)\n[![Coverage Status](https://coveralls.io/repos/github/js-ish/MolTx/badge.svg?branch=main)](https://coveralls.io/github/js-ish/MolTx?branch=main)\n![PyPI - Python Version](https://img.shields.io/pypi/pyversions/moltx)\n\n## Installation\n\n```\npip install moltx\n```\n\n## Usage\n\n### Pretrain\n\n```python\nimport torch\n\n# prepare dataset\nfrom moltx import datasets, tokenizers, models\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Pretrain)\nds = datasets.AdaMR(tokenizer=tk, device=torch.device('cpu'))\ngeneric_smiles = [\"C=CC=CC=C\", \"....\"]\ncanonical_smiles = [\"c1cccc1c\", \"...\"]\nsrc, tgt, out = ds(generic_smiles, canonical_smiles)\n\n# train\nimport torch.nn as nn\nfrom torch.optim import Adam\nfrom moltx import nets, models\n\n## use custom config\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR(conf)\n\ncrt = nn.CrossEntropyLoss(ignore_index=0)\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\n# save ckpt\ntorch.save(model.state_dict(), '/path/to/adamr.ckpt')\n```\n\n\n### Finetune\n\n\n```python\n# Classifier finetune\nfrom moltx import datasets, tokenizers\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Prediction)\n\nseq_len = 256 # max token lens of smiles in datasets, if None, use max token lens in smiles\nds = datasets.AdaMRClassifier(tokenizer=tk, device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nlabels = [0, 1]\nsrc, tgt, out = ds(smiles, labels, seq_len)\n\nfrom moltx import nets, models\npretrained_conf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMRClassifier(num_classes=2, conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss()\noptim = Adam(model.parameters(), lr=0.1)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/classifier.ckpt')\n\n# Regression finetune\nds = datasets.AdaMRRegression(tokenizer=tk, device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nvalues = [0.23, 0.12]\nsrc, tgt, out = ds(smiles, values, seq_len)\n\nmodel = models.AdaMRRegression(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.MSELoss()\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred, out)\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/regression.ckpt')\n\n# Distributed Generation\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)\nds = datasets.AdaMRDistGeneration(tokenizer=tk, device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\nsrc, tgt, out = ds(smiles, seq_len)\n\nmodel = models.AdaMRDistGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/distgen.ckpt')\n\n# Goal Generation\nds = datasets.AdaMRGoalGeneration(tokenizer=tk, device=torch.device('cpu'))\nsmiles = [\"c1cccc1c\", \"CC[N+](C)(C)Cc1ccccc1Br\"]\ngoals = [0.23, 0.12]\nsrc, tgt, out = ds(smiles, goals, seq_len)\n\nmodel = models.AdaMRGoalGeneration(conf=pretrained_conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\ncrt = nn.CrossEntropyLoss(ignore_index=0)\n\noptim.zero_grad()\npred = model(src, tgt)\nloss = crt(pred.view(-1, pred.size(-1)), out.view(-1))\nloss.backward()\noptim.step()\n\ntorch.save(model.state_dict(), '/path/to/goalgen.ckpt')\n```\n\n### Inference\n\n```python\nfrom moltx import nets, models, pipelines, tokenizers\n# AdaMR\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMR(conf)\nmodel.load_ckpt('/path/to/adamr.ckpt')\npipeline = pipelines.AdaMR(tk, model)\npipeline(\"C=CC=CC=C\")\n# {\"smiles\": [\"c1ccccc1\"], probabilities: [0.9]}\n\n# Classifier\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Prediction)\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMRClassifier(2, conf)\nmodel.load_ckpt('/path/to/classifier.ckpt')\npipeline = pipelines.AdaMRClassifier(tk, model)\npipeline(\"C=CC=CC=C\")\n# {\"label\": [1], \"probability\": [0.67]}\n\n# Regression\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMRRegression(2, conf)\nmodel.load_ckpt('/path/to/regression.ckpt')\npipeline = pipelines.AdaMRRegression(tk, model)\npipeline(\"C=CC=CC=C\")\n# {\"value\": [0.467], \"probability\": [0.67]}\n\n# DistGeneration\ntk = tokenizers.MoltxTokenizer.from_pretrain(models.AdaMRTokenizerConfig.Generation)\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMRDistGeneration(conf)\nmodel.load_ckpt('/path/to/distgen.ckpt')\npipeline = pipelines.AdaMRDistGeneration(tk, model)\npipeline(k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n\n# GoalGeneration\nconf = models.AdaMR.CONFIG_LARGE # or models.AdaMR.CONFIG_BASE\nmodel = models.AdaMRGoalGeneration(conf)\nmodel.load_ckpt('/path/to/goalgen.ckpt')\npipeline = pipelines.AdaMRGoalGeneration(tk, model)\npipeline(0.48, k=2)\n# {\"smiles\": [\"c1ccccc1\", \"....\"], probabilities: [0.9, 0.1]}\n```\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "Molcule Transformer X Model",
    "version": "1.0.2",
    "project_urls": null,
    "split_keywords": [
        "molcule",
        " ai",
        " deep learning",
        " transformer"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "509cdd8c4dceccaa59559eb5c9a8674db4138f13a3f42f19bd45ef8b20acb122",
                "md5": "d61bab35b97e8bbf698512621e969b1d",
                "sha256": "11ffd9ba041ee66e2a323bd12e576f32fa9b0659c7bbbbd61b9dd13b6a8431cf"
            },
            "downloads": -1,
            "filename": "moltx-1.0.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "d61bab35b97e8bbf698512621e969b1d",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8",
            "size": 41469,
            "upload_time": "2024-05-13T09:51:22",
            "upload_time_iso_8601": "2024-05-13T09:51:22.486920Z",
            "url": "https://files.pythonhosted.org/packages/50/9c/dd8c4dceccaa59559eb5c9a8674db4138f13a3f42f19bd45ef8b20acb122/moltx-1.0.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "35ba0d73d4cd646d539e14cbdb919e0fdf06ed9682171a9714e49c922619debd",
                "md5": "20bd1a508ea77dd5612cd9f1988f8e45",
                "sha256": "4f5da606a50ce56e4f85b4df1309932e62ef23e3813045be94f4ee594a6889e6"
            },
            "downloads": -1,
            "filename": "moltx-1.0.2.tar.gz",
            "has_sig": false,
            "md5_digest": "20bd1a508ea77dd5612cd9f1988f8e45",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 43488,
            "upload_time": "2024-05-13T09:51:24",
            "upload_time_iso_8601": "2024-05-13T09:51:24.065989Z",
            "url": "https://files.pythonhosted.org/packages/35/ba/0d73d4cd646d539e14cbdb919e0fdf06ed9682171a9714e49c922619debd/moltx-1.0.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-05-13 09:51:24",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "moltx"
}

Michael Ding