aws-feature-store


Nameaws-feature-store JSON
Version 0.0.17 PyPI version JSON
download
home_pagehttps://github.com/bilykigor/aws_feature_store
SummarySimplified SageMaker Feature Store
upload_time2023-08-02 15:46:38
maintainer
docs_urlNone
authorIhor Bilyk
requires_python>= 3.6
licenseApache License 2.0
keywords simplified sagemaker feature store
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # aws_feature_store

It is a simplified implementation of SageMaker Feature Store approach.

## Installation
---------------

Use the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.

```bash
pip install aws_feature_store
```


## Initialize feature group
---------------------------

```python
from aws_feature_store import FeatureGroup,FeatureDefinition,FeatureTypeEnum


bucket_name = '{bucket_for_feature_store}'
s3_folder = '{folder_for_feature_store}'
my_feature_name = '{your_feature_name}'

feature_group_name = f'{my_feature_name}/commit_id={my_feature_name}_{commit_id}'
feature_group = FeatureGroup(
        name=feature_group_name,
        boto3_session = boto3_session,
        s3_uri=f"s3://{bucket_name}/{s3_folder}"
        )
```


## Create feature group
--------------------

```python

def create_feature_group(feature_group):
    description="What is my feature group about"
    feature_script_repo="{repo_link_to_script}"
    data_source="{what data are used}"

    record_identifier_feature_name = "column name to store id" 
    event_time_feature_name = "{column name to store timestamp}"

    partition_columns=['biz_id','customer_id']
    
    feature_definitions=[
        FeatureDefinition(feature_name="column_name1", feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name="column_name2", feature_type=FeatureTypeEnum.STRING),
        ]
    
    feature_group.create(
        record_identifier_name=record_identifier_feature_name,
        event_time_feature_name=event_time_feature_name,
        feature_script_repo=feature_script_repo,
        partition_columns=partition_columns,
        data_source=data_source,
        description=description,
        file_format='parquet/json',
        feature_definitions=feature_definitions
    )
    
    return feature_group

if feature_group.exists() is None:
    feature_group = create_feature_group(feature_group)

```

## Ingest data
--------------

```python
import pandas as pd
data = pd.read_json('data.json')
feature_group.ingest_data_frame(data,f"mlfow_parent_run_id={parent_run_id}/{filename_without_extention}")
```

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/bilykigor/aws_feature_store",
    "name": "aws-feature-store",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">= 3.6",
    "maintainer_email": "",
    "keywords": "Simplified SageMaker Feature Store",
    "author": "Ihor Bilyk",
    "author_email": "",
    "download_url": "https://files.pythonhosted.org/packages/17/aa/625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da/aws_feature_store-0.0.17.tar.gz",
    "platform": null,
    "description": "# aws_feature_store\n\nIt is a simplified implementation of SageMaker Feature Store approach.\n\n## Installation\n---------------\n\nUse the package manager [pip](https://pip.pypa.io/en/stable/) to install foobar.\n\n```bash\npip install aws_feature_store\n```\n\n\n## Initialize feature group\n---------------------------\n\n```python\nfrom aws_feature_store import FeatureGroup,FeatureDefinition,FeatureTypeEnum\n\n\nbucket_name = '{bucket_for_feature_store}'\ns3_folder = '{folder_for_feature_store}'\nmy_feature_name = '{your_feature_name}'\n\nfeature_group_name = f'{my_feature_name}/commit_id={my_feature_name}_{commit_id}'\nfeature_group = FeatureGroup(\n        name=feature_group_name,\n        boto3_session = boto3_session,\n        s3_uri=f\"s3://{bucket_name}/{s3_folder}\"\n        )\n```\n\n\n## Create feature group\n--------------------\n\n```python\n\ndef create_feature_group(feature_group):\n    description=\"What is my feature group about\"\n    feature_script_repo=\"{repo_link_to_script}\"\n    data_source=\"{what data are used}\"\n\n    record_identifier_feature_name = \"column name to store id\" \n    event_time_feature_name = \"{column name to store timestamp}\"\n\n    partition_columns=['biz_id','customer_id']\n    \n    feature_definitions=[\n        FeatureDefinition(feature_name=\"column_name1\", feature_type=FeatureTypeEnum.INTEGRAL),\n        FeatureDefinition(feature_name=\"column_name2\", feature_type=FeatureTypeEnum.STRING),\n        ]\n    \n    feature_group.create(\n        record_identifier_name=record_identifier_feature_name,\n        event_time_feature_name=event_time_feature_name,\n        feature_script_repo=feature_script_repo,\n        partition_columns=partition_columns,\n        data_source=data_source,\n        description=description,\n        file_format='parquet/json',\n        feature_definitions=feature_definitions\n    )\n    \n    return feature_group\n\nif feature_group.exists() is None:\n    feature_group = create_feature_group(feature_group)\n\n```\n\n## Ingest data\n--------------\n\n```python\nimport pandas as pd\ndata = pd.read_json('data.json')\nfeature_group.ingest_data_frame(data,f\"mlfow_parent_run_id={parent_run_id}/{filename_without_extention}\")\n```\n",
    "bugtrack_url": null,
    "license": "Apache License 2.0",
    "summary": "Simplified SageMaker Feature Store",
    "version": "0.0.17",
    "project_urls": {
        "Homepage": "https://github.com/bilykigor/aws_feature_store"
    },
    "split_keywords": [
        "simplified",
        "sagemaker",
        "feature",
        "store"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "6a9edc130182c60b335547c6eaab1ee6fb6701e8a9b939198d8ce38e01945d25",
                "md5": "eaf0763fb7d5675b1db8013549203bd7",
                "sha256": "0396c8f4ab09fa28c9e287f99aff335cd89fe45a2d62344d7dead52dd0b548e2"
            },
            "downloads": -1,
            "filename": "aws_feature_store-0.0.17-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "eaf0763fb7d5675b1db8013549203bd7",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">= 3.6",
            "size": 11564,
            "upload_time": "2023-08-02T15:46:37",
            "upload_time_iso_8601": "2023-08-02T15:46:37.111247Z",
            "url": "https://files.pythonhosted.org/packages/6a/9e/dc130182c60b335547c6eaab1ee6fb6701e8a9b939198d8ce38e01945d25/aws_feature_store-0.0.17-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "17aa625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da",
                "md5": "063203adf96bbfbe6f92de9319caa192",
                "sha256": "c56bd83c7d38496dab17ee568f7a99428b468506efbd3c13b5ea704e5a818a70"
            },
            "downloads": -1,
            "filename": "aws_feature_store-0.0.17.tar.gz",
            "has_sig": false,
            "md5_digest": "063203adf96bbfbe6f92de9319caa192",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">= 3.6",
            "size": 11271,
            "upload_time": "2023-08-02T15:46:38",
            "upload_time_iso_8601": "2023-08-02T15:46:38.874856Z",
            "url": "https://files.pythonhosted.org/packages/17/aa/625c36b445e1720416166fc26f474921115a2060777b90c489731debb5da/aws_feature_store-0.0.17.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-08-02 15:46:38",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "bilykigor",
    "github_project": "aws_feature_store",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "lcname": "aws-feature-store"
}
        
Elapsed time: 0.10720s