[![Multi-Modality](agorabanner.png)](https://discord.gg/qUtxnK2NMf)
# Zamba
Implementation of Zamba, the joint mamba-transformer model!, It's now fully ready to train! [PAPER LINK](https://arxiv.org/abs/2405.16712)
# Install
`pip3 install zamba-torch`
## Usage
```python
import torch # Importing the torch library for deep learning operations
from zamba_torch.main import (
Zamba,
) # Importing the ZambaBlock class from the zamba.main module
# # Example usage
x = torch.randint(
0, 256, (1, 512)
) # Generating a random tensor of shape (1, 512, 512
model = Zamba(
dim=512, # Setting the dimension of the model to 512
heads=8, # Setting the number of attention heads to 8
dim_head=64, # Setting the dimension of each attention head to 64
d_state=512, # Setting the state dimension to 512
dt_rank=128, # Setting the rank of the temporal kernel to 128
d_conv=256, # Setting the dimension of the convolutional layer to 256
vocab_size=256, # Setting the size of the vocabulary to 256
max_seq_len=512, # Setting the maximum sequence length to 512
)
print(
model(x)
) # Printing the output of the model when applied to the input tensor
```
# License
MIT
## Citation
```bibtex
@misc{glorioso2024zamba,
title={Zamba: A Compact 7B SSM Hybrid Model},
author={Paolo Glorioso and Quentin Anthony and Yury Tokpanov and James Whittington and Jonathan Pilault and Adam Ibrahim and Beren Millidge},
year={2024},
eprint={2405.16712},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
```
Raw data
{
"_id": null,
"home_page": "https://github.com/kyegomez/zamba",
"name": "zamba-torch",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.10",
"maintainer_email": null,
"keywords": "artificial intelligence, deep learning, optimizers, Prompt Engineering",
"author": "Kye Gomez",
"author_email": "kye@apac.ai",
"download_url": "https://files.pythonhosted.org/packages/a1/93/6ca8f6bd7912e1ca3b3694a93a044d9dc71f0b592a39eb4570981fc7475f/zamba_torch-0.0.4.tar.gz",
"platform": null,
"description": "[![Multi-Modality](agorabanner.png)](https://discord.gg/qUtxnK2NMf)\n\n# Zamba\nImplementation of Zamba, the joint mamba-transformer model!, It's now fully ready to train! [PAPER LINK](https://arxiv.org/abs/2405.16712)\n\n# Install\n`pip3 install zamba-torch`\n\n## Usage\n```python\nimport torch # Importing the torch library for deep learning operations\nfrom zamba_torch.main import (\n Zamba,\n) # Importing the ZambaBlock class from the zamba.main module\n\n# # Example usage\nx = torch.randint(\n 0, 256, (1, 512)\n) # Generating a random tensor of shape (1, 512, 512\n\nmodel = Zamba(\n dim=512, # Setting the dimension of the model to 512\n heads=8, # Setting the number of attention heads to 8\n dim_head=64, # Setting the dimension of each attention head to 64\n d_state=512, # Setting the state dimension to 512\n dt_rank=128, # Setting the rank of the temporal kernel to 128\n d_conv=256, # Setting the dimension of the convolutional layer to 256\n vocab_size=256, # Setting the size of the vocabulary to 256\n max_seq_len=512, # Setting the maximum sequence length to 512\n)\n\nprint(\n model(x)\n) # Printing the output of the model when applied to the input tensor\n```\n\n# License\nMIT\n\n## Citation\n```bibtex\n@misc{glorioso2024zamba,\n title={Zamba: A Compact 7B SSM Hybrid Model}, \n author={Paolo Glorioso and Quentin Anthony and Yury Tokpanov and James Whittington and Jonathan Pilault and Adam Ibrahim and Beren Millidge},\n year={2024},\n eprint={2405.16712},\n archivePrefix={arXiv},\n primaryClass={cs.LG}\n}\n```",
"bugtrack_url": null,
"license": "MIT",
"summary": "zamba - Pytorch",
"version": "0.0.4",
"project_urls": {
"Documentation": "https://github.com/kyegomez/zamba",
"Homepage": "https://github.com/kyegomez/zamba",
"Repository": "https://github.com/kyegomez/zamba"
},
"split_keywords": [
"artificial intelligence",
" deep learning",
" optimizers",
" prompt engineering"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "c68d519930bc287d929f3c212b3381f1583ce4442e7601431a3308a7575f51e3",
"md5": "5c4c8ab91eee7ff15fb4fe1b829a9953",
"sha256": "6bd61568a8725fb3c5003edeaf9199b4182abb9dd54756c4c9cb33896399ec2c"
},
"downloads": -1,
"filename": "zamba_torch-0.0.4-py3-none-any.whl",
"has_sig": false,
"md5_digest": "5c4c8ab91eee7ff15fb4fe1b829a9953",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.10",
"size": 6155,
"upload_time": "2024-05-28T17:27:08",
"upload_time_iso_8601": "2024-05-28T17:27:08.727247Z",
"url": "https://files.pythonhosted.org/packages/c6/8d/519930bc287d929f3c212b3381f1583ce4442e7601431a3308a7575f51e3/zamba_torch-0.0.4-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "a1936ca8f6bd7912e1ca3b3694a93a044d9dc71f0b592a39eb4570981fc7475f",
"md5": "32e2c7a313b406dc8f01b5a7d86081be",
"sha256": "ce0083dbcda191fb5077de30850576a1fc03b8d1f054dff480a587d095f94547"
},
"downloads": -1,
"filename": "zamba_torch-0.0.4.tar.gz",
"has_sig": false,
"md5_digest": "32e2c7a313b406dc8f01b5a7d86081be",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.10",
"size": 5442,
"upload_time": "2024-05-28T17:27:09",
"upload_time_iso_8601": "2024-05-28T17:27:09.720414Z",
"url": "https://files.pythonhosted.org/packages/a1/93/6ca8f6bd7912e1ca3b3694a93a044d9dc71f0b592a39eb4570981fc7475f/zamba_torch-0.0.4.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-05-28 17:27:09",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "kyegomez",
"github_project": "zamba",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"requirements": [],
"lcname": "zamba-torch"
}