Usage Sample
''''''''''''
.. code:: python
from sklearn.model_selection import train_test_split
import torch
from transformers import BertTokenizer
from nlpx.dataset import TextDataset, text_collate
from nlpx.model.wrapper import ClassifyModelWrapper
from transformers_model import AutoCNNTextClassifier, AutoCNNTokenClassifier, \
BertDataset, BertCollator, BertTokenizeCollator
texts = [[str],]
labels = [0, 0, 1, 2, 1...]
pretrained_path = "clue/albert_chinese_tiny"
classes = ['class1', 'class2', 'class3'...]
train_texts, test_texts, y_train, y_test = train_test_split(texts, labels, test_size=0.2)
train_set = TextDataset(train_texts, y_train)
test_set = TextDataset(test_texts, y_test)
################################### TextClassifier ##################################
model = AutoCNNTextClassifier(pretrained_path, len(classes))
wrapper = ClassifyModelWrapper(model, classes)
_ = wrapper.train(train_set, test_set, collate_fn=text_collate)
################################### TokenClassifier #################################
tokenizer = BertTokenizer.from_pretrained(pretrained_path)
##################### BertTokenizeCollator #########################
model = AutoCNNTokenClassifier(pretrained_path, len(classes))
wrapper = ClassifyModelWrapper(model, classes)
_ = wrapper.train(train_set, test_set, collate_fn=BertTokenizeCollator(tokenizer, 256))
##################### BertCollator ##################################
train_tokenizies = tokenizer.batch_encode_plus(
train_texts,
max_length=256,
padding="max_length",
truncation=True,
return_token_type_ids=True,
return_attention_mask=True,
return_tensors="pt",
)
test_tokenizies = tokenizer.batch_encode_plus(
test_texts,
max_length=256,
padding="max_length",
truncation=True,
return_token_type_ids=True,
return_attention_mask=True,
return_tensors="pt",
)
train_set = BertDataset(train_tokenizies, y_train)
test_set = BertDataset(test_tokenizies, y_test)
model = AutoCNNTokenClassifier(pretrained_path, len(classes))
wrapper = ClassifyModelWrapper(model, classes)
_ = wrapper.train(train_set, test_set, collate_fn=BertCollator())
Raw data
{
"_id": null,
"home_page": "https://gitee.com/summry/torch-model-hub",
"name": "transformers-model",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.6",
"maintainer_email": null,
"keywords": "Pytorch, AI, Machine learning, Deep learning, Bert, llm, transformers",
"author": "summy",
"author_email": "fkfkfk2024@2925.com",
"download_url": "https://files.pythonhosted.org/packages/47/0e/7afad5b0693a5cda83ea1f66af1f0cfe86fd334468106914a105dce7a2ed/transformers-model-0.0.8.tar.gz",
"platform": null,
"description": "Usage Sample\n''''''''''''\n\n.. code:: python\n\n from sklearn.model_selection import train_test_split\n import torch\n from transformers import BertTokenizer\n from nlpx.dataset import TextDataset, text_collate\n from nlpx.model.wrapper import ClassifyModelWrapper\n from transformers_model import AutoCNNTextClassifier, AutoCNNTokenClassifier, \\\n BertDataset, BertCollator, BertTokenizeCollator\n\n texts = [[str],]\n labels = [0, 0, 1, 2, 1...]\n pretrained_path = \"clue/albert_chinese_tiny\"\n classes = ['class1', 'class2', 'class3'...]\n train_texts, test_texts, y_train, y_test = train_test_split(texts, labels, test_size=0.2)\n \n train_set = TextDataset(train_texts, y_train)\n test_set = TextDataset(test_texts, y_test)\n\n ################################### TextClassifier ##################################\n model = AutoCNNTextClassifier(pretrained_path, len(classes))\n wrapper = ClassifyModelWrapper(model, classes)\n _ = wrapper.train(train_set, test_set, collate_fn=text_collate)\n\n ################################### TokenClassifier #################################\n tokenizer = BertTokenizer.from_pretrained(pretrained_path)\n\n ##################### BertTokenizeCollator #########################\n model = AutoCNNTokenClassifier(pretrained_path, len(classes))\n wrapper = ClassifyModelWrapper(model, classes)\n _ = wrapper.train(train_set, test_set, collate_fn=BertTokenizeCollator(tokenizer, 256))\n\n ##################### BertCollator ##################################\n train_tokenizies = tokenizer.batch_encode_plus(\n train_texts,\n max_length=256,\n padding=\"max_length\",\n truncation=True,\n return_token_type_ids=True,\n return_attention_mask=True,\n return_tensors=\"pt\",\n )\n\n test_tokenizies = tokenizer.batch_encode_plus(\n test_texts,\n max_length=256,\n padding=\"max_length\",\n truncation=True,\n return_token_type_ids=True,\n return_attention_mask=True,\n return_tensors=\"pt\",\n )\n\n train_set = BertDataset(train_tokenizies, y_train)\n test_set = BertDataset(test_tokenizies, y_test)\n\n model = AutoCNNTokenClassifier(pretrained_path, len(classes))\n wrapper = ClassifyModelWrapper(model, classes)\n _ = wrapper.train(train_set, test_set, collate_fn=BertCollator())\n\n\n",
"bugtrack_url": null,
"license": null,
"summary": "Model hub for transformers.",
"version": "0.0.8",
"project_urls": {
"Homepage": "https://gitee.com/summry/torch-model-hub"
},
"split_keywords": [
"pytorch",
" ai",
" machine learning",
" deep learning",
" bert",
" llm",
" transformers"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "470e7afad5b0693a5cda83ea1f66af1f0cfe86fd334468106914a105dce7a2ed",
"md5": "7d7e4f5cd55e3f7d59cbb865e7e8d4c7",
"sha256": "d561ccddd16d43a976385ac2497fa8a4ead6c533c60a5650f75f322bc6734e8c"
},
"downloads": -1,
"filename": "transformers-model-0.0.8.tar.gz",
"has_sig": false,
"md5_digest": "7d7e4f5cd55e3f7d59cbb865e7e8d4c7",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6",
"size": 6940,
"upload_time": "2025-03-11T02:40:38",
"upload_time_iso_8601": "2025-03-11T02:40:38.106322Z",
"url": "https://files.pythonhosted.org/packages/47/0e/7afad5b0693a5cda83ea1f66af1f0cfe86fd334468106914a105dce7a2ed/transformers-model-0.0.8.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-03-11 02:40:38",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "transformers-model"
}