# Vrame
Vrame is a Python library designed to efficiently parse and validate all columns in pandas DataFrame. It leverages the vectorized operations of pandas to significantly speed up the validation process, making it a powerful tool for data validation in data science and machine learning projects.
## Features
- **Vectorized Validation**: Utilizes pandas' vectorized operations for fast and efficient data validation.
- **Similar Syntax to Pydantic**: Offers a familiar API for those who have used Pydantic, making it easy to adopt.
- **Custom Validators**: Allows for the definition of custom validation rules to meet specific data requirements.
## Installation
To install Vrame, use pip:
pip install vrame
## Usage
First, define a schema for your DataFrame using Vrame's syntax, which is similar to Pydantic's:
import pandas as pd
import numpy as np
from vrame.basemodel import BaseModel
from vrame.column_types import (
Integer,
Float,
Boolean,
Datetime,
String,
List,
Tuple,
Dictionary,
Set,
Object
)
class Model(BaseModel):
integer = Integer(lower=-1, upper=6, nullable=True)
float = Float(lower=-1.0, upper=6.0, nullable=True)
bool = Boolean(nullable=True)
datetime = Datetime(lower="2024-03-20", upper="2024-03-21", nullable=True)
list = List(nullable=True, min_items=1, max_items=3)
tuple = Tuple(nullable=True, min_items=1, max_items=3)
dictionary = Dictionary(nullable=True, min_items=1, max_items=3)
set = Set(nullable=True, min_items=1, max_items=3)
string = String(min_length=0, max_length=5, nullable=True)
object = Object(nullable=True)
if __name__ == "__main__":
df = pd.DataFrame(
{
'integer': [1, "2", 3, 4, np.nan],
'float': [1.0, "2.0", 3, "4", "5"],
'bool': [True, "False", "True", False, False],
'datetime': [
"2024-03-20",
"2024-03-21",
"2024-03-21",
"2024-03-21",
"2024-03-21"
],
'list': [[1, 2], "[3, 4]", [5, 6], [7, 8], [9, 10]],
'tuple': [(1, 2), "(3, 4)", (5, 6), (7, 8), (9, 10)],
'dictionary': [
{'a': 1, 'b': 2.1},
"{'e': 3, 'f': 4.0}",
{'a': 1, 'b': 2.1},
{'a': 1, 'b': 2.1},
{'a': 1, 'b': 2.1}
],
'set': [{1, 2}, "{1, 2}", {1, 2}, {1, 2}, {1, 2}],
'string': ["str1", "str2", "", "12345", "I"],
'object': [1, 2.0, False, np.nan, None]
}
)
m = Model(df)
df = m.parse_and_validate()
Raw data
{
"_id": null,
"home_page": null,
"name": "Vrame",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": "pandas, pd, dataframe, df, validation, validate, parsing, parse, pydantic, Pydantic, effcient, effciently",
"author": null,
"author_email": "Yuen Shing Yan Hindy <yuenshingyan@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/56/c4/28144a7cb9e0c51a1c7365b106885ad3b4ca6bf35ffaefa302072595a946/Vrame-1.0.3.tar.gz",
"platform": null,
"description": "# Vrame\r\n\r\nVrame is a Python library designed to efficiently parse and validate all columns in pandas DataFrame. It leverages the vectorized operations of pandas to significantly speed up the validation process, making it a powerful tool for data validation in data science and machine learning projects.\r\n\r\n## Features\r\n\r\n- **Vectorized Validation**: Utilizes pandas' vectorized operations for fast and efficient data validation.\r\n- **Similar Syntax to Pydantic**: Offers a familiar API for those who have used Pydantic, making it easy to adopt.\r\n- **Custom Validators**: Allows for the definition of custom validation rules to meet specific data requirements.\r\n\r\n## Installation\r\n\r\nTo install Vrame, use pip:\r\n\r\n pip install vrame\r\n\r\n\r\n## Usage\r\n\r\nFirst, define a schema for your DataFrame using Vrame's syntax, which is similar to Pydantic's:\r\n \r\n import pandas as pd\r\n import numpy as np\r\n from vrame.basemodel import BaseModel\r\n from vrame.column_types import (\r\n Integer,\r\n Float,\r\n Boolean,\r\n Datetime,\r\n String,\r\n List,\r\n Tuple,\r\n Dictionary,\r\n Set,\r\n Object\r\n )\r\n \r\n \r\n class Model(BaseModel):\r\n integer = Integer(lower=-1, upper=6, nullable=True)\r\n float = Float(lower=-1.0, upper=6.0, nullable=True)\r\n bool = Boolean(nullable=True)\r\n datetime = Datetime(lower=\"2024-03-20\", upper=\"2024-03-21\", nullable=True)\r\n list = List(nullable=True, min_items=1, max_items=3)\r\n tuple = Tuple(nullable=True, min_items=1, max_items=3)\r\n dictionary = Dictionary(nullable=True, min_items=1, max_items=3)\r\n set = Set(nullable=True, min_items=1, max_items=3)\r\n string = String(min_length=0, max_length=5, nullable=True)\r\n object = Object(nullable=True)\r\n \r\n \r\n if __name__ == \"__main__\":\r\n df = pd.DataFrame(\r\n {\r\n 'integer': [1, \"2\", 3, 4, np.nan],\r\n 'float': [1.0, \"2.0\", 3, \"4\", \"5\"],\r\n 'bool': [True, \"False\", \"True\", False, False],\r\n 'datetime': [\r\n \"2024-03-20\",\r\n \"2024-03-21\",\r\n \"2024-03-21\",\r\n \"2024-03-21\",\r\n \"2024-03-21\"\r\n ],\r\n 'list': [[1, 2], \"[3, 4]\", [5, 6], [7, 8], [9, 10]],\r\n 'tuple': [(1, 2), \"(3, 4)\", (5, 6), (7, 8), (9, 10)],\r\n 'dictionary': [\r\n {'a': 1, 'b': 2.1},\r\n \"{'e': 3, 'f': 4.0}\",\r\n {'a': 1, 'b': 2.1},\r\n {'a': 1, 'b': 2.1},\r\n {'a': 1, 'b': 2.1}\r\n ],\r\n 'set': [{1, 2}, \"{1, 2}\", {1, 2}, {1, 2}, {1, 2}],\r\n 'string': [\"str1\", \"str2\", \"\", \"12345\", \"I\"],\r\n 'object': [1, 2.0, False, np.nan, None]\r\n }\r\n )\r\n\r\n m = Model(df)\r\n df = m.parse_and_validate()\r\n",
"bugtrack_url": null,
"license": null,
"summary": "Efficiently parse and validate all columns in pandas DataFrame.",
"version": "1.0.3",
"project_urls": {
"Homepage": "https://github.com/yuenshingyan/Vrame",
"Issues": "https://github.com/yuenshingyan/Vrame/issues"
},
"split_keywords": [
"pandas",
" pd",
" dataframe",
" df",
" validation",
" validate",
" parsing",
" parse",
" pydantic",
" pydantic",
" effcient",
" effciently"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "08ae8e98f28f35762d0f66b94b13afabc0f8dc6b7d557217cdc64565337c4829",
"md5": "5c46456946c77d0194055d8728e47363",
"sha256": "fea500594250dde9fd438cdd419c4417234d19a9b8cc662cd6f38d3229c2b125"
},
"downloads": -1,
"filename": "Vrame-1.0.3-py3-none-any.whl",
"has_sig": false,
"md5_digest": "5c46456946c77d0194055d8728e47363",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 11496,
"upload_time": "2024-03-29T12:22:51",
"upload_time_iso_8601": "2024-03-29T12:22:51.366364Z",
"url": "https://files.pythonhosted.org/packages/08/ae/8e98f28f35762d0f66b94b13afabc0f8dc6b7d557217cdc64565337c4829/Vrame-1.0.3-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "56c428144a7cb9e0c51a1c7365b106885ad3b4ca6bf35ffaefa302072595a946",
"md5": "7b54f4a65489b8c332c9b9f33ade43f1",
"sha256": "442fa0c253cce08a3adbe87349739873f8e69381debd8009f9bd2e743302a0d3"
},
"downloads": -1,
"filename": "Vrame-1.0.3.tar.gz",
"has_sig": false,
"md5_digest": "7b54f4a65489b8c332c9b9f33ade43f1",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 9899,
"upload_time": "2024-03-29T12:22:53",
"upload_time_iso_8601": "2024-03-29T12:22:53.268679Z",
"url": "https://files.pythonhosted.org/packages/56/c4/28144a7cb9e0c51a1c7365b106885ad3b4ca6bf35ffaefa302072595a946/Vrame-1.0.3.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-03-29 12:22:53",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "yuenshingyan",
"github_project": "Vrame",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "vrame"
}