# pytorch-common
A [Pypi module](https://pypi.org/project/pytorch-common/) with pytorch common tools like:
## Build release
**Step 1**: Increase version into next files:
```bash
pytorch_common/__init__.py
pyproject.toml
```
**Step 2**: Build release.
```bash
$ poetry build ✔
Building pytorch-common (0.2.3)
- Building sdist
- Built pytorch-common-0.2.3.tar.gz
- Building wheel
- Built pytorch_common-0.2.3-py3-none-any.whl
```
**Step 3**: Publish release to PyPI repository.
```bash
$ poetry publish ✔
Publishing pytorch-common (0.2.3) to PyPI
- Uploading pytorch-common-0.2.3.tar.gz 100%
- Uploading pytorch_common-0.2.3-py3-none-any.whl 100%
```
## Features
* **Callbacks** (keras style)
* **Validation**: Model validation.
* **ReduceLROnPlateau**:
* Reduce learning rate when a metric has stopped improving.
* Models often benefit from reducing the learning rate by a factor
of 2-10 once learning stagnates. This scheduler reads a metrics
quantity and if no improvement is seen for a 'patience' number
of epochs, the learning rate is reduced.
* **EarlyStop**:
* Stop training when model has stopped improving a specified metric.
* **SaveBestModel**:
* Save model weights to file while model validation metric improve.
* **Logger**:
* Logs context properties.
* In general is used to log performance metrics every n epochs.
* **MetricsPlotter**:
* Plot evaluation metrics.
* This graph is updated every n epochs during training process.
* Allow save each plot into a file.
* **Callback** and **OutputCallback**:
* Base classes.
* **CallbackManager**:
* Simplify callbacks support to fit custom models.
* **StratifiedKFoldCV**:
* Support parallel fold processing on CPU.
* **Mixins**
* `FiMixin`
* `fit(data_loader, loss_fn, epochs, optimizer, callbacks, verbose, extra_ctx, train_fn)`
* `CommonMixin`
* `params()`: Get model params.
* Get associated `device`.
* `PredictMixin`
* `evaluate(data_loader)`
* `evaluate_score(data_loader, score_fn)`
* `predict(features)`
* `PersistentMixin`
* `save(path)`
* `load(path)`
* **Utils**
* device management
* `Stopwatch`
* data split
* os
* model
* `LoggerBuilder`
* Dict Utils
* `WeightsFileResolver`: Resolver best model weights file path using a given metric like `min` `eva_loss`, `max` `eval_acc`, etc...
* **Plot**
* Plot primitives like `plot_loss`.
## Examples
### Device management
```python
import pytorch_common.util as pu
# Setup prefered device.
pu.set_device_name('gpu') # / 'cpu'
# Setup GPU memory fraction for a process (%).
pu.set_device_memory(
'gpu' # / 'cpu',
process_memory_fraction=0.5
)
# Get prefered device.
# Note: In case the preferred device is not found, it returns CPU as fallback.
device = pu.get_device()
```
### Logging
```python
import logging
import pytorch_common.util as pu
## Default loggin in console...
pu.LoggerBuilder() \
.on_console() \
.build()
## Setup format and level...
pu.LoggerBuilder() \
.level(logging.ERROR) \
.on_console('%(asctime)s - %(levelname)s - %(message)s') \
.build()
```
### Stopwatch
```python
import logging
import pytorch_common.util as pu
sw = pu.Stopwatch()
# Call any demanding process...
# Get resposne time.
resposne_time = sw.elapsed_time()
# Log resposne time.
logging.info(sw.to_str())
```
### Dataset split
```python
import pytorch_common.util as pu
dataset = ... # <-- Torch.utils.data.Dataset
train_subset, test_subset = pu.train_val_split(
dataset,
train_percent = .7
)
train_subset, val_subset, test_subset = pu.train_val_test_split(
dataset,
train_percent = .7,
val_percent = .15
)
```
### Kfolding
```python
import logging
from pytorch_common.kfoldcv import StratifiedKFoldCV, \
ParallelKFoldCVStrategy, \
NonParallelKFoldCVStrategy
# Call your model under this function..
def train_fold_fn(dataset, train_idx, val_idx, params, fold):
pass
# Get dataset labels
def get_y_values_fn(dataset):
pass
cv = StratifiedKFoldCV(
train_fold_fn,
get_y_values_fn,
strategy=NonParallelKFoldCVStrategy() # or ParallelKFoldCVStrategy()
k_fold = 5
)
# Model hyperparams...
params = {
'seed': 42,
'lr': 0.01,
'epochs': 50,
'batch_size': 4000,
...
}
# Train model...
result = cv.train(dataset, params)
logging.info('CV results: {}'.format(result))
```
### Assertions
```python
from pytorch_common.error import Assertions, Checker
# Check functions and construtor params usign assertions..
param_value = -1
# Raise an exception with 404103 eror code when the condition is not met
Assertions.positive_int(404103, param_value, 'param name')
Assertions.positive_float(404103, param_value, 'param name')
# Other options
Assertions.is_class(404205, param_value, 'param name', aClass)
Assertions.is_tensor(404401, param_value, 'param name')
Assertions.has_shape(404401, param_value, (3, 4), 'param name')
# Assertions was impelemented using a Checker builder:
Checker(error_code, value, name) \
.is_not_none() \
.is_int() \
.is_positive() \
.check()
# Other checker options..
# .is_not_none()
# .is_int()
# .is_float()
# .is_positive()
# .is_a(aclass)
# .is_tensor()
# .has_shape(shape)
```
### Callbacks
```python
from pytorch_common.callbacks import CallbackManager
from pytorch_common.modules import FitContextFactory
from pytorch_common.callbacks import EarlyStop, \
ReduceLROnPlateau, \
Validation
from pytorch_common.callbacks.output import Logger, \
MetricsPlotter
def train_method(model, epochs, optimizer, loss_fn, callbacks):
callback_manager = CallbackManager(
ctx = FitContextFactory.create(model, loss_fn, epochs, optimizer),
callbacks = callbacks
)
for epoch in range(epochs):
callback_manager.on_epoch_start(epoch)
# train model...
callback_manager.on_epoch_end(train_loss)
if callback_manager.break_training():
break
return callback_manager.ctx
model = # Create my model...
optimizer = # My optimizer...
loss_fn = # my lost function
callbacks = [
# Log context variables after each epoch...
Logger(['fold', 'time', 'epoch', 'lr', 'train_loss', 'val_loss', ... ]),
EarlyStop(metric='val_auc', mode='max', patience=3),
ReduceLROnPlateau(metric='val_auc'),
Validation(
val_set,
metrics = {
'my_metric_name': lambda y_pred, y_true: # calculate validation metic,
...
},
each_n_epochs=5
),
SaveBestModel(metric='val_loss'),
MetricsPlotter(metrics=['train_loss', 'val_loss'])
]
train_method(model, epochs=100, optimizer, loss_fn, callbacks)
```
### Utils
#### WeightsFileResolver
```bash
$ ls ./wegiths
2023-08-21_15-17-49--gfm--epoch_2--val_loss_1.877971887588501.pt
2023-08-21_15-13-09--gfm--epoch_3--val_loss_1.8183038234710693.pt
2023-08-19_20-00-19--gfm--epoch_10--val_loss_0.9969356060028076.pt
2023-08-19_19-59-39--gfm--epoch_4--val_loss_1.4990438222885132.pt
``````
```python
import pytorch_common.util as pu
resolver = pu.WeightsFileResolver('./weights')
file_path = resolver(experiment='gfm', metric='val_loss', min_value=True)
print(file_path)
```
```bash
'./weights/2023-08-19_20-00-19--gfm--epoch_10--val_loss_0.9969356060028076.pt'
``````
Go to next projects to see funcional code examples:
- https://github.com/adrianmarino/deep-fm
- https://github.com/adrianmarino/attention
Raw data
{
"_id": null,
"home_page": "https://github.com/adrianmarino/pytorch-common/tree/master",
"name": "pytorch-common",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": "",
"keywords": "pytorch,common",
"author": "adrianmarino",
"author_email": "adrianmarino@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/53/7d/c310ce4bef5e4d74d3668d2162a3f527002d270717801a6bdc2d4e9c3dca/pytorch_common-0.3.8.tar.gz",
"platform": null,
"description": "# pytorch-common\n\nA [Pypi module](https://pypi.org/project/pytorch-common/) with pytorch common tools like:\n\n\n## Build release\n\n**Step 1**: Increase version into next files:\n\n```bash\npytorch_common/__init__.py\npyproject.toml\n```\n\n**Step 2**: Build release.\n\n```bash\n$ poetry build \ue0b2 \u2714 \ue0b3 \n\nBuilding pytorch-common (0.2.3)\n - Building sdist\n - Built pytorch-common-0.2.3.tar.gz\n - Building wheel\n - Built pytorch_common-0.2.3-py3-none-any.whl\n```\n\n**Step 3**: Publish release to PyPI repository.\n\n```bash\n$ poetry publish \ue0b2 \u2714 \ue0b3 \n\nPublishing pytorch-common (0.2.3) to PyPI\n - Uploading pytorch-common-0.2.3.tar.gz 100%\n - Uploading pytorch_common-0.2.3-py3-none-any.whl 100%\n```\n\n\n## Features\n\n* **Callbacks** (keras style)\n * **Validation**: Model validation.\n * **ReduceLROnPlateau**:\n * Reduce learning rate when a metric has stopped improving.\n * Models often benefit from reducing the learning rate by a factor\n of 2-10 once learning stagnates. This scheduler reads a metrics\n quantity and if no improvement is seen for a 'patience' number\n of epochs, the learning rate is reduced.\n * **EarlyStop**:\n * Stop training when model has stopped improving a specified metric.\n * **SaveBestModel**:\n * Save model weights to file while model validation metric improve.\n * **Logger**:\n * Logs context properties.\n * In general is used to log performance metrics every n epochs.\n * **MetricsPlotter**:\n * Plot evaluation metrics.\n * This graph is updated every n epochs during training process.\n * Allow save each plot into a file.\n * **Callback** and **OutputCallback**:\n * Base classes.\n * **CallbackManager**:\n * Simplify callbacks support to fit custom models.\n* **StratifiedKFoldCV**:\n * Support parallel fold processing on CPU.\n* **Mixins**\n * `FiMixin`\n * `fit(data_loader, loss_fn, epochs, optimizer, callbacks, verbose, extra_ctx, train_fn)` \n * `CommonMixin`\n * `params()`: Get model params.\n * Get associated `device`.\n * `PredictMixin`\n * `evaluate(data_loader)`\n * `evaluate_score(data_loader, score_fn)`\n * `predict(features)`\n * `PersistentMixin`\n * `save(path)`\n * `load(path)`\n* **Utils**\n * device management\n * `Stopwatch`\n * data split\n * os\n * model\n * `LoggerBuilder`\n * Dict Utils\n * `WeightsFileResolver`: Resolver best model weights file path using a given metric like `min` `eva_loss`, `max` `eval_acc`, etc...\n* **Plot**\n * Plot primitives like `plot_loss`.\n\n## Examples\n\n### Device management\n\n\n```python\nimport pytorch_common.util as pu\n\n# Setup prefered device.\npu.set_device_name('gpu') # / 'cpu'\n\n# Setup GPU memory fraction for a process (%).\npu.set_device_memory(\n 'gpu' # / 'cpu',\n process_memory_fraction=0.5\n)\n\n# Get prefered device.\n# Note: In case the preferred device is not found, it returns CPU as fallback.\ndevice = pu.get_device()\n```\n\n### Logging\n\n\n```python\nimport logging\nimport pytorch_common.util as pu\n\n## Default loggin in console...\npu.LoggerBuilder() \\\n .on_console() \\\n .build()\n\n## Setup format and level...\npu.LoggerBuilder() \\\n .level(logging.ERROR) \\\n .on_console('%(asctime)s - %(levelname)s - %(message)s') \\\n .build()\n```\n\n\n### Stopwatch\n\n\n```python\nimport logging\nimport pytorch_common.util as pu\n\nsw = pu.Stopwatch()\n\n# Call any demanding process...\n\n# Get resposne time.\nresposne_time = sw.elapsed_time()\n\n# Log resposne time.\nlogging.info(sw.to_str())\n```\n\n\n### Dataset split\n\n\n```python\nimport pytorch_common.util as pu\n\ndataset = ... # <-- Torch.utils.data.Dataset\n\ntrain_subset, test_subset = pu.train_val_split(\n dataset,\n train_percent = .7\n)\n\ntrain_subset, val_subset, test_subset = pu.train_val_test_split(\n dataset,\n train_percent = .7,\n val_percent = .15\n)\n```\n\n\n### Kfolding\n\n```python\nimport logging\nfrom pytorch_common.kfoldcv import StratifiedKFoldCV, \\\n ParallelKFoldCVStrategy, \\\n NonParallelKFoldCVStrategy\n\n# Call your model under this function..\ndef train_fold_fn(dataset, train_idx, val_idx, params, fold):\n pass\n\n# Get dataset labels\ndef get_y_values_fn(dataset):\n pass\n\ncv = StratifiedKFoldCV(\n train_fold_fn,\n get_y_values_fn,\n strategy=NonParallelKFoldCVStrategy() # or ParallelKFoldCVStrategy()\n k_fold = 5\n)\n\n# Model hyperparams...\nparams = {\n 'seed': 42,\n 'lr': 0.01,\n 'epochs': 50,\n 'batch_size': 4000,\n ...\n}\n\n# Train model...\nresult = cv.train(dataset, params)\n\nlogging.info('CV results: {}'.format(result))\n```\n\n\n### Assertions\n\n\n```python\nfrom pytorch_common.error import Assertions, Checker\n\n# Check functions and construtor params usign assertions..\n\nparam_value = -1\n\n# Raise an exception with 404103 eror code when the condition is not met \nAssertions.positive_int(404103, param_value, 'param name')\n\nAssertions.positive_float(404103, param_value, 'param name')\n\n# Other options\nAssertions.is_class(404205, param_value, 'param name', aClass)\n\nAssertions.is_tensor(404401, param_value, 'param name')\n\nAssertions.has_shape(404401, param_value, (3, 4), 'param name')\n\n# Assertions was impelemented using a Checker builder:\n\n Checker(error_code, value, name) \\\n .is_not_none() \\\n .is_int() \\\n .is_positive() \\\n .check()\n\n# Other checker options..\n# .is_not_none()\n# .is_int()\n# .is_float()\n# .is_positive()\n# .is_a(aclass)\n# .is_tensor()\n# .has_shape(shape)\n```\n\n\n### Callbacks\n\n```python\nfrom pytorch_common.callbacks import CallbackManager\nfrom pytorch_common.modules import FitContextFactory\n\nfrom pytorch_common.callbacks import EarlyStop, \\\n ReduceLROnPlateau, \\\n Validation\n\nfrom pytorch_common.callbacks.output import Logger, \\\n MetricsPlotter\n\n\ndef train_method(model, epochs, optimizer, loss_fn, callbacks):\n callback_manager = CallbackManager(\n ctx = FitContextFactory.create(model, loss_fn, epochs, optimizer), \n callbacks = callbacks\n )\n\n for epoch in range(epochs):\n callback_manager.on_epoch_start(epoch)\n\n # train model...\n\n callback_manager.on_epoch_end(train_loss)\n\n if callback_manager.break_training():\n break\n\n return callback_manager.ctx\n\n\nmodel = # Create my model...\noptimizer = # My optimizer...\nloss_fn = # my lost function\n\ncallbacks = [\n # Log context variables after each epoch...\n Logger(['fold', 'time', 'epoch', 'lr', 'train_loss', 'val_loss', ... ]),\n\n EarlyStop(metric='val_auc', mode='max', patience=3),\n \n ReduceLROnPlateau(metric='val_auc'),\n \n Validation(\n val_set,\n metrics = {\n 'my_metric_name': lambda y_pred, y_true: # calculate validation metic,\n ...\n },\n each_n_epochs=5\n ),\n \n SaveBestModel(metric='val_loss'),\n \n MetricsPlotter(metrics=['train_loss', 'val_loss'])\n]\n\n\ntrain_method(model, epochs=100, optimizer, loss_fn, callbacks)\n```\n\n\n\n### Utils\n\n#### WeightsFileResolver\n\n```bash\n$ ls ./wegiths\n\n2023-08-21_15-17-49--gfm--epoch_2--val_loss_1.877971887588501.pt\n2023-08-21_15-13-09--gfm--epoch_3--val_loss_1.8183038234710693.pt\n2023-08-19_20-00-19--gfm--epoch_10--val_loss_0.9969356060028076.pt\n2023-08-19_19-59-39--gfm--epoch_4--val_loss_1.4990438222885132.pt\n``````\n\n```python\nimport pytorch_common.util as pu\n\nresolver = pu.WeightsFileResolver('./weights')\n\nfile_path = resolver(experiment='gfm', metric='val_loss', min_value=True)\n\nprint(file_path)\n```\n\n```bash\n'./weights/2023-08-19_20-00-19--gfm--epoch_10--val_loss_0.9969356060028076.pt'\n``````\n\n\nGo to next projects to see funcional code examples:\n\n- https://github.com/adrianmarino/deep-fm\n- https://github.com/adrianmarino/attention\n\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Common torch tools and extension",
"version": "0.3.8",
"project_urls": {
"Homepage": "https://github.com/adrianmarino/pytorch-common/tree/master",
"Repository": "https://github.com/adrianmarino/pytorch-common/tree/master"
},
"split_keywords": [
"pytorch",
"common"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "56eac6042781078cac5dcf670f6b6c461380a1ed319728626c476d2472a4f3f6",
"md5": "8a2a168c4c579b472df0d37ad2b66d57",
"sha256": "bcace7a652175f3b8e4d63e36f0fba1647c8657261f106ccdac3f704108e629f"
},
"downloads": -1,
"filename": "pytorch_common-0.3.8-py3-none-any.whl",
"has_sig": false,
"md5_digest": "8a2a168c4c579b472df0d37ad2b66d57",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 26696,
"upload_time": "2023-09-02T22:46:00",
"upload_time_iso_8601": "2023-09-02T22:46:00.724778Z",
"url": "https://files.pythonhosted.org/packages/56/ea/c6042781078cac5dcf670f6b6c461380a1ed319728626c476d2472a4f3f6/pytorch_common-0.3.8-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "537dc310ce4bef5e4d74d3668d2162a3f527002d270717801a6bdc2d4e9c3dca",
"md5": "0405987791d3284b3f7b808726059e7b",
"sha256": "c7e23af70b709e1c50264067124cc307182fc76acf0634a0173b18725585b156"
},
"downloads": -1,
"filename": "pytorch_common-0.3.8.tar.gz",
"has_sig": false,
"md5_digest": "0405987791d3284b3f7b808726059e7b",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.7",
"size": 34709,
"upload_time": "2023-09-02T22:46:02",
"upload_time_iso_8601": "2023-09-02T22:46:02.999456Z",
"url": "https://files.pythonhosted.org/packages/53/7d/c310ce4bef5e4d74d3668d2162a3f527002d270717801a6bdc2d4e9c3dca/pytorch_common-0.3.8.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-09-02 22:46:02",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "adrianmarino",
"github_project": "pytorch-common",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "pytorch-common"
}