# aws_feature_store
It is a simplified implementation of SageMaker Feature Store approach.
## Installation
---------------
Use the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.
```bash
pip install aws_feature_store
```
## Initialize feature group
---------------------------
```python
from aws_feature_store import FeatureGroup,FeatureDefinition,FeatureTypeEnum
bucket_name = '{bucket_for_feature_store}'
s3_folder = '{folder_for_feature_store}'
my_feature_name = '{your_feature_name}'
feature_group_name = f'{my_feature_name}/commit_id={my_feature_name}_{commit_id}'
feature_group = FeatureGroup(
name=feature_group_name,
boto3_session = boto3_session,
s3_uri=f"s3://{bucket_name}/{s3_folder}"
)
```
## Create feature group
--------------------
```python
def create_feature_group(feature_group):
description="What is my feature group about"
feature_script_repo="{repo_link_to_script}"
data_source="{what data are used}"
record_identifier_feature_name = "column name to store id"
event_time_feature_name = "{column name to store timestamp}"
partition_columns=['biz_id','customer_id']
feature_definitions=[
FeatureDefinition(feature_name="column_name1", feature_type=FeatureTypeEnum.INTEGRAL),
FeatureDefinition(feature_name="column_name2", feature_type=FeatureTypeEnum.STRING),
]
feature_group.create(
record_identifier_name=record_identifier_feature_name,
event_time_feature_name=event_time_feature_name,
feature_script_repo=feature_script_repo,
partition_columns=partition_columns,
data_source=data_source,
description=description,
file_format='parquet/json',
feature_definitions=feature_definitions
)
return feature_group
if feature_group.exists() is None:
feature_group = create_feature_group(feature_group)
```
## Ingest data
--------------
```python
import pandas as pd
data = pd.read_json('data.json')
feature_group.ingest_data_frame(data,f"mlfow_parent_run_id={parent_run_id}/{filename_without_extention}")
```
Raw data
{
"_id": null,
"home_page": "https://github.com/bilykigor/aws_feature_store",
"name": "aws-feature-store",
"maintainer": "",
"docs_url": null,
"requires_python": ">= 3.6",
"maintainer_email": "",
"keywords": "Simplified SageMaker Feature Store",
"author": "Ihor Bilyk",
"author_email": "",
"download_url": "https://files.pythonhosted.org/packages/17/aa/625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da/aws_feature_store-0.0.17.tar.gz",
"platform": null,
"description": "# aws_feature_store\n\nIt is a simplified implementation of SageMaker Feature Store approach.\n\n## Installation\n---------------\n\nUse the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.\n\n```bash\npip install aws_feature_store\n```\n\n\n## Initialize feature group\n---------------------------\n\n```python\nfrom aws_feature_store import FeatureGroup,FeatureDefinition,FeatureTypeEnum\n\n\nbucket_name = '{bucket_for_feature_store}'\ns3_folder = '{folder_for_feature_store}'\nmy_feature_name = '{your_feature_name}'\n\nfeature_group_name = f'{my_feature_name}/commit_id={my_feature_name}_{commit_id}'\nfeature_group = FeatureGroup(\n name=feature_group_name,\n boto3_session = boto3_session,\n s3_uri=f\"s3://{bucket_name}/{s3_folder}\"\n )\n```\n\n\n## Create feature group\n--------------------\n\n```python\n\ndef create_feature_group(feature_group):\n description=\"What is my feature group about\"\n feature_script_repo=\"{repo_link_to_script}\"\n data_source=\"{what data are used}\"\n\n record_identifier_feature_name = \"column name to store id\" \n event_time_feature_name = \"{column name to store timestamp}\"\n\n partition_columns=['biz_id','customer_id']\n \n feature_definitions=[\n FeatureDefinition(feature_name=\"column_name1\", feature_type=FeatureTypeEnum.INTEGRAL),\n FeatureDefinition(feature_name=\"column_name2\", feature_type=FeatureTypeEnum.STRING),\n ]\n \n feature_group.create(\n record_identifier_name=record_identifier_feature_name,\n event_time_feature_name=event_time_feature_name,\n feature_script_repo=feature_script_repo,\n partition_columns=partition_columns,\n data_source=data_source,\n description=description,\n file_format='parquet/json',\n feature_definitions=feature_definitions\n )\n \n return feature_group\n\nif feature_group.exists() is None:\n feature_group = create_feature_group(feature_group)\n\n```\n\n## Ingest data\n--------------\n\n```python\nimport pandas as pd\ndata = pd.read_json('data.json')\nfeature_group.ingest_data_frame(data,f\"mlfow_parent_run_id={parent_run_id}/{filename_without_extention}\")\n```\n",
"bugtrack_url": null,
"license": "Apache License 2.0",
"summary": "Simplified SageMaker Feature Store",
"version": "0.0.17",
"project_urls": {
"Homepage": "https://github.com/bilykigor/aws_feature_store"
},
"split_keywords": [
"simplified",
"sagemaker",
"feature",
"store"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "6a9edc130182c60b335547c6eaab1ee6fb6701e8a9b939198d8ce38e01945d25",
"md5": "eaf0763fb7d5675b1db8013549203bd7",
"sha256": "0396c8f4ab09fa28c9e287f99aff335cd89fe45a2d62344d7dead52dd0b548e2"
},
"downloads": -1,
"filename": "aws_feature_store-0.0.17-py3-none-any.whl",
"has_sig": false,
"md5_digest": "eaf0763fb7d5675b1db8013549203bd7",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">= 3.6",
"size": 11564,
"upload_time": "2023-08-02T15:46:37",
"upload_time_iso_8601": "2023-08-02T15:46:37.111247Z",
"url": "https://files.pythonhosted.org/packages/6a/9e/dc130182c60b335547c6eaab1ee6fb6701e8a9b939198d8ce38e01945d25/aws_feature_store-0.0.17-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "17aa625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da",
"md5": "063203adf96bbfbe6f92de9319caa192",
"sha256": "c56bd83c7d38496dab17ee568f7a99428b468506efbd3c13b5ea704e5a818a70"
},
"downloads": -1,
"filename": "aws_feature_store-0.0.17.tar.gz",
"has_sig": false,
"md5_digest": "063203adf96bbfbe6f92de9319caa192",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">= 3.6",
"size": 11271,
"upload_time": "2023-08-02T15:46:38",
"upload_time_iso_8601": "2023-08-02T15:46:38.874856Z",
"url": "https://files.pythonhosted.org/packages/17/aa/625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da/aws_feature_store-0.0.17.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-08-02 15:46:38",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bilykigor",
"github_project": "aws_feature_store",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "aws-feature-store"
}