# Custom Data-Generator for multiple-input multiple-output models in TF-Keras
Develop your own Keras DataGenerator in TF-Keras to load and batch every data type with any format from a massive dataset in computers with limited main or GPU memory... mimo-keras is a package that enables feeding models with any format and any number of inputs and outputs.
mimo-keras — Never use Keras ImageDataGenerator to load data in batch
----------------------------------------------------------------------
Have you ever used ImageDataGenerator(), load_form_directory(), or load_from_dataframe() to load batch data and feed your deep model in Keras? mimo-keras makes the data loader quite simple and straightforward even for multiple input/output models or data with formats that are not supported by default in Keras.
## mimo-keras is like ImageDataGenerator().load_from_directory(), but better:
1. Supports pandas, images and other formats in one generator without needing to define a new data generator for each input or output.
2. It can load data in every format.
3. You can write your own data loader function.
4. You can use your custom preprocessing pipeline without limitation.
```python
from mimo_keras import MIMODataGenerator
def load_and_preprocess_matrix(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
matrix = np.load(parameters.get('matrix_path'))
if len(np.shape(matrix)) == 2:
matrix = np.expand_dims(matrix, axis=-1)
matrix = (matrix - np.mean(matrix)) / np.std(matrix)
return matrix
data = pd.DaraFrame(columns=['year', 'resolution', 'label', 'location_1', 'location_2', 'matrix_path'])
...
# split datahere
...
# first input with shape (m, n, c,)
matrix_input = ('matrix', # Name of the model IO.
['matrix_path'], # These column names and their values are sent to the your function for each sample in batch generation.
load_and_preprocess_matrix # A function or callable class to load data and preprocessing. Use 'raw' to send values to the model IO directly.
)
# second input with shape (4,1,)
metadata_input = ('metadata', ['year', 'resolution', 'location_1', 'location_2'], 'raw')
# output (this model has only one output but you can define multiple outputs like inputs)
output = ('metadata', ['label'], 'raw') # binarry classification
train_generator = MIMODataGenerator(data_table=data_train
model_inputs=[matrix_input, metadata_input],
model_outputs=[output],
shuffle=True,
batch_size=BATCH_SIZE
)
validation_generaetor = MIMODataGenerator(data_table=data_validation
model_inputs=[matrix_input, metadata_input],
model_outputs=[output],
shuffle=False,
batch_size=BATCH_SIZE
)
model.fit(generator = train_generator,
validation_data = validation_generator,
epochs=EPOCHS
)
```
for more complicated model you can use only sample_id to generate data for each IO. for example to train a recommender system (DLRM) using a massive dataset:
```python
from mimo_keras import MIMODataGenerator
def load_user_history(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
uid = parameters.get('user_id')
...
return user_history
def load_product_history(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
pid = parameters.get('product_id')
...
return product_history
def get_user_product_score(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
uid, pid = parameters.get('user_id'), parameters.get('product_id')
...
return user_product_score
data = pd.DaraFrame(columns=['sample_id', 'user_id', 'product_id', 'label'])
# First input
input_user = ('user_history', ['user_id'], load_user_history)
# Second input
input_product = ('product_history', ['product_id'], load_product_history)
# Output
output = ('score', ['user_id', 'product_id'], get_user_product_score)
train_generator = MIMODataGenerator(data_table=data_train
model_inputs=[input_user, input_product],
model_outputs=[output],
shuffle=True,
batch_size=BATCH_SIZE
)
```
example for loading .nifti file to train multi-dimentional medical image processing model:
```python
from mimo_keras import MIMODataGenerator
import nibabel as nib
def load_mri_scan(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
return normalize_image(nib.load(parameters.get('image_path')).get_fdata())
def load_pet_scan(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
mri_path = parameters.get('image_path')
return normalize_image(nib.load(mri_path.replace('_mri_', '_pet_scan_')).get_fdata())
def load_mask(feature_values, feature_names):
parameters = dict(zip(feature_names, feature_values))
mri_path = parameters.get('image_path')
return binarize_image(nib.load(mri_path.replace('_mri_', '_mask_')).get_fdata())
data = pd.DaraFrame(columns=['sample_id', 'image_path', 'disease_type'])
# First input
input_mri = ('mri_scan', ['image_path'], load_mri_scan)
# Second input
input_pet = ('pet_scan', ['image_path'], load_pet_scan)
# First Output
output_mask = ('mask', ['image_path'], load_mask)
# Second Output
output_disease = ('disease_type', ['disease_type'], 'raw')
train_generator = MIMODataGenerator(data_table=data_train
model_inputs=[input_mri, input_pet],
model_outputs=[output_mask, output_disease],
shuffle=True,
batch_size=BATCH_SIZE
)
```
Calculating the metric to evaluate the model:
```python
from mimo_keras import MIMODataGenerator
.
.
.
input = ('input_data', ['s_room', 'n_bedroom', 's_total', 'city', 'floor', 'location'], 'raw')
output = ('output_data', ['price'], 'raw')
test_generaetor = MIMODataGenerator(data_table=data_test
model_inputs=[input],
model_outputs=[output],
shuffle=False,
batch_size=BATCH_SIZE
)
y_pred = model.predict(test_generator)
y_target = test_generator.get_io_data_values_by_name('output_data', 'all')
# or y_target = test_generator.data_table.price.to_list()
mae = mean_absolute_error(y_target, y_pred)
```
#large_dataset #massive_dataset #MRI_keras #data_generator_for_medical_images #fMRI_keras #graph_neural_networks #deep_learning_with_limited_GPU_memory #TensorFlow
Raw data
{
"_id": null,
"home_page": "https://github.com/simkarwin/mimo_keras",
"name": "mimo-keras",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "keras data generator,data generator,multi-input multi-output model,medical image processing",
"author": "Simkarwin",
"author_email": "simkarwin@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/ab/26/ee45bd5536a9e2b2d359400c383504d347e400469c4b6e2f9e9153dfcc7e/mimo_keras-0.1.17.tar.gz",
"platform": null,
"description": "# Custom Data-Generator for multiple-input multiple-output models in TF-Keras\nDevelop your own Keras DataGenerator in TF-Keras to load and batch every data type with any format from a massive dataset in computers with limited main or GPU memory... mimo-keras is a package that enables feeding models with any format and any number of inputs and outputs.\n\nmimo-keras \u2014 Never use Keras ImageDataGenerator to load data in batch\n----------------------------------------------------------------------\nHave you ever used ImageDataGenerator(), load_form_directory(), or load_from_dataframe() to load batch data and feed your deep model in Keras? mimo-keras makes the data loader quite simple and straightforward even for multiple input/output models or data with formats that are not supported by default in Keras.\n\n## mimo-keras is like ImageDataGenerator().load_from_directory(), but better:\n\n1. Supports pandas, images and other formats in one generator without needing to define a new data generator for each input or output.\n2. It can load data in every format.\n3. You can write your own data loader function.\n4. You can use your custom preprocessing pipeline without limitation.\n\n\n```python\nfrom mimo_keras import MIMODataGenerator\n\ndef load_and_preprocess_matrix(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n matrix = np.load(parameters.get('matrix_path'))\n if len(np.shape(matrix)) == 2:\n matrix = np.expand_dims(matrix, axis=-1)\n matrix = (matrix - np.mean(matrix)) / np.std(matrix)\n return matrix\n\n\ndata = pd.DaraFrame(columns=['year', 'resolution', 'label', 'location_1', 'location_2', 'matrix_path'])\n...\n# split datahere\n...\n\n\n# first input with shape (m, n, c,)\nmatrix_input = ('matrix', # Name of the model IO.\n ['matrix_path'], # These column names and their values are sent to the your function for each sample in batch generation.\n load_and_preprocess_matrix # A function or callable class to load data and preprocessing. Use 'raw' to send values to the model IO directly.\n )\n# second input with shape (4,1,)\nmetadata_input = ('metadata', ['year', 'resolution', 'location_1', 'location_2'], 'raw')\n# output (this model has only one output but you can define multiple outputs like inputs)\noutput = ('metadata', ['label'], 'raw') # binarry classification\ntrain_generator = MIMODataGenerator(data_table=data_train\n model_inputs=[matrix_input, metadata_input],\n model_outputs=[output],\n shuffle=True,\n batch_size=BATCH_SIZE\n )\n\nvalidation_generaetor = MIMODataGenerator(data_table=data_validation\n model_inputs=[matrix_input, metadata_input],\n model_outputs=[output],\n shuffle=False,\n batch_size=BATCH_SIZE\n )\n\nmodel.fit(generator = train_generator,\n validation_data = validation_generator,\n epochs=EPOCHS\n )\n```\n\nfor more complicated model you can use only sample_id to generate data for each IO. for example to train a recommender system (DLRM) using a massive dataset:\n\n\n```python\nfrom mimo_keras import MIMODataGenerator\n\ndef load_user_history(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n uid = parameters.get('user_id')\n ...\n return user_history\n\ndef load_product_history(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n pid = parameters.get('product_id')\n ...\n return product_history\n \ndef get_user_product_score(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n uid, pid = parameters.get('user_id'), parameters.get('product_id') \n ...\n return user_product_score\n\ndata = pd.DaraFrame(columns=['sample_id', 'user_id', 'product_id', 'label'])\n\n# First input\ninput_user = ('user_history', ['user_id'], load_user_history)\n# Second input\ninput_product = ('product_history', ['product_id'], load_product_history)\n# Output\noutput = ('score', ['user_id', 'product_id'], get_user_product_score)\n\ntrain_generator = MIMODataGenerator(data_table=data_train\n model_inputs=[input_user, input_product],\n model_outputs=[output],\n shuffle=True,\n batch_size=BATCH_SIZE\n )\n```\n\nexample for loading .nifti file to train multi-dimentional medical image processing model:\n\n```python\nfrom mimo_keras import MIMODataGenerator\nimport nibabel as nib\n\ndef load_mri_scan(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n return normalize_image(nib.load(parameters.get('image_path')).get_fdata())\n\ndef load_pet_scan(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n mri_path = parameters.get('image_path')\n return normalize_image(nib.load(mri_path.replace('_mri_', '_pet_scan_')).get_fdata())\n \ndef load_mask(feature_values, feature_names):\n parameters = dict(zip(feature_names, feature_values))\n mri_path = parameters.get('image_path')\n return binarize_image(nib.load(mri_path.replace('_mri_', '_mask_')).get_fdata())\n\n\ndata = pd.DaraFrame(columns=['sample_id', 'image_path', 'disease_type'])\n\n# First input\ninput_mri = ('mri_scan', ['image_path'], load_mri_scan)\n# Second input\ninput_pet = ('pet_scan', ['image_path'], load_pet_scan)\n# First Output\noutput_mask = ('mask', ['image_path'], load_mask)\n# Second Output\noutput_disease = ('disease_type', ['disease_type'], 'raw')\n\ntrain_generator = MIMODataGenerator(data_table=data_train\n model_inputs=[input_mri, input_pet],\n model_outputs=[output_mask, output_disease],\n shuffle=True,\n batch_size=BATCH_SIZE\n )\n```\n\nCalculating the metric to evaluate the model:\n\n```python\nfrom mimo_keras import MIMODataGenerator\n\n.\n.\n.\n\ninput = ('input_data', ['s_room', 'n_bedroom', 's_total', 'city', 'floor', 'location'], 'raw')\noutput = ('output_data', ['price'], 'raw')\n\ntest_generaetor = MIMODataGenerator(data_table=data_test\n model_inputs=[input],\n model_outputs=[output],\n shuffle=False,\n batch_size=BATCH_SIZE\n )\ny_pred = model.predict(test_generator)\ny_target = test_generator.get_io_data_values_by_name('output_data', 'all')\n# or y_target = test_generator.data_table.price.to_list()\n\nmae = mean_absolute_error(y_target, y_pred)\n```\n\n#large_dataset #massive_dataset #MRI_keras #data_generator_for_medical_images #fMRI_keras #graph_neural_networks #deep_learning_with_limited_GPU_memory #TensorFlow\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "A DataGenerator for Keras multiple-input multiple-output models and massive datasets with any type of data",
"version": "0.1.17",
"split_keywords": [
"keras data generator",
"data generator",
"multi-input multi-output model",
"medical image processing"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "a04d2e8c0772ef40679b05ba6d2e9023",
"sha256": "42b48cb29b0f26858cf2fa4fbfc0e44f831bcb1ce0e84db92a4ed6037e67b7e7"
},
"downloads": -1,
"filename": "mimo_keras-0.1.17-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a04d2e8c0772ef40679b05ba6d2e9023",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 6767,
"upload_time": "2022-12-26T15:48:17",
"upload_time_iso_8601": "2022-12-26T15:48:17.051903Z",
"url": "https://files.pythonhosted.org/packages/fb/55/efb00193620c4129812589dccf7f00c70be3879020b19dbfd193a4d5a3a4/mimo_keras-0.1.17-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "8452ff80ea3afebc2b23d48cb223cfa7",
"sha256": "534d1b836f4bbae592e31e64556057e5c20f27c08fe56f0883c3f9ceea05046a"
},
"downloads": -1,
"filename": "mimo_keras-0.1.17.tar.gz",
"has_sig": false,
"md5_digest": "8452ff80ea3afebc2b23d48cb223cfa7",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 6505,
"upload_time": "2022-12-26T15:48:18",
"upload_time_iso_8601": "2022-12-26T15:48:18.130041Z",
"url": "https://files.pythonhosted.org/packages/ab/26/ee45bd5536a9e2b2d359400c383504d347e400469c4b6e2f9e9153dfcc7e/mimo_keras-0.1.17.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-12-26 15:48:18",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "simkarwin",
"github_project": "mimo_keras",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "mimo-keras"
}