RiskBERT


NameRiskBERT JSON
Version 0.0.9 PyPI version JSON
download
home_page
SummaryGeneralized semantic regression with a BERT base.
upload_time2024-02-20 08:16:37
maintainer
docs_urlNone
author
requires_python>=3.7
licenseBSD-3-Clause
keywords llm bert glm
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # generalized-semantic-regression
RiskBERT is a significant step forward, making it easier than ever to incorporate text fragments into various applications, such as insurance frequency and severity models, or other GLM-based models. Feel free to explore and utilize RiskBERT for your text analysis needs.

To learn more about the RiskBERT implementation read this article: https://www.thebigdatablog.com/generalized-semantic-regression-using-contextual-embeddings/

Example: 
`pip install RiskBERT`

```
from transformers import AutoTokenizer
import torch
from RiskBERT import glmModel, RiskBertModel
from RiskBERT import trainer, evaluate_model
from RiskBERT.simulation.data_functions import Data
from RiskBERT.utils import DataConstructor

# Set device to gpu if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Init the model
model_dataset = Data(20000, scores=torch.tensor([[0.2],[0.4]]), weigth=5)
pre_model= "distilbert-base-uncased"
model = RiskBertModel(model=pre_model, input_dim=2, dropout=0.4, freeze_bert=True, mode="CLS")
tokenizer = AutoTokenizer.from_pretrained(pre_model)
# Train the model
model, Total_Loss, Validation_Loss, Test_Loss = trainer(model =model, 
        model_dataset=model_dataset, 
        epochs=100,
        batch_size=1000,
        evaluate_fkt=evaluate_model,
        tokenizer=tokenizer, 
        optimizer=torch.optim.SGD(model.parameters(), lr=0.001),
        device = device
        )

# Predict from the model
my_data = DataConstructor(
    sentences=[["Dies ist ein Test"],["Hallo Welt", "RiskBERT ist das Beste"]], 
    covariates=[[1,5],[2,6]],
    tokenizer= tokenizer).prepare_for_model()
my_prediction=model(**my_data)

```

# Upload to pip
```
python -m pip install build twine
python -m build
twine check dist/*
twine upload dist/*`
````


            

Raw data

            {
    "_id": null,
    "home_page": "",
    "name": "RiskBERT",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.7",
    "maintainer_email": "",
    "keywords": "LLM,BERT,GLM",
    "author": "",
    "author_email": "Heiko Wagner <heikowagner@t-online.de>",
    "download_url": "https://files.pythonhosted.org/packages/15/c0/c92d553c20a8e3878ccc44d9105839a20b24b7fcef1a8bf335c134908874/RiskBERT-0.0.9.tar.gz",
    "platform": null,
    "description": "# generalized-semantic-regression\nRiskBERT is a significant step forward, making it easier than ever to incorporate text fragments into various applications, such as insurance frequency and severity models, or other GLM-based models. Feel free to explore and utilize RiskBERT for your text analysis needs.\n\nTo learn more about the RiskBERT implementation read this article: https://www.thebigdatablog.com/generalized-semantic-regression-using-contextual-embeddings/\n\nExample: \n`pip install RiskBERT`\n\n```\nfrom transformers import AutoTokenizer\nimport torch\nfrom RiskBERT import glmModel, RiskBertModel\nfrom RiskBERT import trainer, evaluate_model\nfrom RiskBERT.simulation.data_functions import Data\nfrom RiskBERT.utils import DataConstructor\n\n# Set device to gpu if available\ndevice = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n\n# Init the model\nmodel_dataset = Data(20000, scores=torch.tensor([[0.2],[0.4]]), weigth=5)\npre_model= \"distilbert-base-uncased\"\nmodel = RiskBertModel(model=pre_model, input_dim=2, dropout=0.4, freeze_bert=True, mode=\"CLS\")\ntokenizer = AutoTokenizer.from_pretrained(pre_model)\n# Train the model\nmodel, Total_Loss, Validation_Loss, Test_Loss = trainer(model =model, \n        model_dataset=model_dataset, \n        epochs=100,\n        batch_size=1000,\n        evaluate_fkt=evaluate_model,\n        tokenizer=tokenizer, \n        optimizer=torch.optim.SGD(model.parameters(), lr=0.001),\n        device = device\n        )\n\n# Predict from the model\nmy_data = DataConstructor(\n    sentences=[[\"Dies ist ein Test\"],[\"Hallo Welt\", \"RiskBERT ist das Beste\"]], \n    covariates=[[1,5],[2,6]],\n    tokenizer= tokenizer).prepare_for_model()\nmy_prediction=model(**my_data)\n\n```\n\n# Upload to pip\n```\npython -m pip install build twine\npython -m build\ntwine check dist/*\ntwine upload dist/*`\n````\n\n",
    "bugtrack_url": null,
    "license": "BSD-3-Clause",
    "summary": "Generalized semantic regression with a BERT base.",
    "version": "0.0.9",
    "project_urls": null,
    "split_keywords": [
        "llm",
        "bert",
        "glm"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "35a0d5768ccba22360beca65a5b13a7a7f78da337ce74f334cef2e019ea3b837",
                "md5": "50a9dd37387190fa30466f953d5585b0",
                "sha256": "720f02e9303eb36911c36d73916111b29858aed4f1e0298902480b09710a8c3c"
            },
            "downloads": -1,
            "filename": "RiskBERT-0.0.9-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "50a9dd37387190fa30466f953d5585b0",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.7",
            "size": 16201,
            "upload_time": "2024-02-20T08:16:35",
            "upload_time_iso_8601": "2024-02-20T08:16:35.322771Z",
            "url": "https://files.pythonhosted.org/packages/35/a0/d5768ccba22360beca65a5b13a7a7f78da337ce74f334cef2e019ea3b837/RiskBERT-0.0.9-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "15c0c92d553c20a8e3878ccc44d9105839a20b24b7fcef1a8bf335c134908874",
                "md5": "a83a0fbc87dbcb32e4d0efde1d4b6975",
                "sha256": "60497ca774844542f2984c3b1e1a47c22e563ceac72f596aac2899ff5e0ede08"
            },
            "downloads": -1,
            "filename": "RiskBERT-0.0.9.tar.gz",
            "has_sig": false,
            "md5_digest": "a83a0fbc87dbcb32e4d0efde1d4b6975",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.7",
            "size": 21637,
            "upload_time": "2024-02-20T08:16:37",
            "upload_time_iso_8601": "2024-02-20T08:16:37.018314Z",
            "url": "https://files.pythonhosted.org/packages/15/c0/c92d553c20a8e3878ccc44d9105839a20b24b7fcef1a8bf335c134908874/RiskBERT-0.0.9.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-02-20 08:16:37",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "riskbert"
}
        
Elapsed time: 0.17993s