# ⚡️ sema4ai-data
Python library to develop data packages for Sema4.ai. Build powerful data-driven actions that can query databases and work with various data sources.
This library is designed to work with `Sema4.ai Data Server`, which is included in the [Sema4.ai Data Access](https://marketplace.visualstudio.com/items?itemName=sema4ai.sema4ai-data-access) VSCode extension.
## Installation
```bash
pip install sema4ai-data
```
## Quick Start
```python
from typing import Annotated
from sema4ai.data import query, DataSource, DataSourceSpec
from sema4ai.actions import Response, Table
# Define a data source
PostgresDataSource = Annotated[DataSource, DataSourceSpec(
name="my_postgres_db",
engine="postgres",
description="Main PostgreSQL database"
)]
# Create a data query
@query
def get_users(datasource: PostgresDataSource, limit: int = 10) -> Response[Table]:
"""Get users from the database."""
result = datasource.query("SELECT * FROM `my_postgres_db`.users LIMIT 5", [limit])
return Response(result=result.to_table())
```
## Core Concepts
### DataSource
The `DataSource` class is the main interface for executing queries against configured data sources. It's automatically injected by the framework when you use the `@query` decorator.
**Key Methods:**
- `query(sql, params=None)` - Execute SQL queries with optional parameters
- `native_query(sql, params=None)` - Execute engine-specific queries
- `connection()` - Get the underlying data server connection
### DataSourceSpec
Used to specify the configuration of a data source through type annotations:
```python
from typing import Annotated
from sema4ai.data import DataSource, DataSourceSpec
# Database data source
DatabaseSource = Annotated[DataSource, DataSourceSpec(
name="my_database",
engine="postgres", # or "mysql", "sqlite", etc.
description="Production database"
)]
# File-based data source
FileSource = Annotated[DataSource, DataSourceSpec(
engine="files",
file="data/customers.csv",
created_table="customers",
description="Customer data from CSV"
)]
# Knowledge base for semantic search
KnowledgeBaseSource = Annotated[DataSource, DataSourceSpec(
name="company_kb",
engine="sema4_knowledge_base",
description="Company knowledge base for semantic search"
)]
```
**Parameters:**
- `engine` (required) - The data source engine type
- `name` - Name of the data source
- `description` - Human-readable description
- `file` - File path for file-based sources
- `created_table` - Table name created from files
- `setup_sql` - SQL commands to run on setup
- `setup_sql_files` - SQL files to execute on setup
## Decorators
### @query
The main decorator for creating data queries that can be executed by sema4ai actions:
```python
from sema4ai.data import query
from sema4ai.actions import Response, Table
@query
def get_countries(datasource: PostgresCustomersDataSource) -> str:
sql = """
SELECT distinct(country)
FROM public_demo.demo_customers
LIMIT 100;
"""
result = datasource.query(sql)
return result.to_markdown()
```
**Parameters:**
- `is_consequential` - Whether the action has side effects or updates a resource (default: False)
- `display_name` - Custom display name for the action
### @predict ⚠️ **DEPRECATED**
**Note**: The `@predict` decorator is deprecated as of version 1.0.3. Use `@query` instead for all operations including predictions.
```python
# OLD (deprecated):
@predict
def predict_something(datasource: SomeDataSource):
pass
# NEW (recommended):
@query
def predict_something(datasource: SomeDataSource):
pass
```
### ResultSet
The `ResultSet` class represents query results and provides various methods to work with the data:
```python
# Convert to different formats
result = datasource.query("SELECT * FROM `my_database`.users")
# As a table for actions
table = result.to_table()
# As a list of dictionaries
dicts = result.to_dict_list()
# As structured objects
from pydantic import BaseModel
class User(BaseModel):
id: int
name: str
email: str
users = result.build_list(User)
# Iterate over results
for row_dict in result.iter_as_dicts():
print(row_dict)
for row_tuple in result.iter_as_tuples():
print(row_tuple)
```
#### Basic Database Query
```python
from typing import Annotated
from pydantic import BaseModel
from sema4ai.data import query, DataSource, DataSourceSpec
from sema4ai.actions import Response
class Product(BaseModel):
id: int
name: str
price: float
category: str
ProductDB = Annotated[DataSource, DataSourceSpec(
name="products",
engine="postgres",
description="Product catalog database"
)]
@query
def search_products(
category: str,
max_price: float,
datasource: ProductDB
) -> Response[list[Product]]:
"""Search products by category and price."""
result = datasource.query(
"""
SELECT id, name, price, category
FROM products.products
WHERE category = ? AND price <= ?
ORDER BY price ASC
""",
[category, max_price]
)
return Response(result=result.build_list(Product))
```
#### File-based Data Source
```python
SalesData = Annotated[DataSource, DataSourceSpec(
engine="files",
file="data/sales_2024.csv",
created_table="sales",
description="Sales data for 2024"
)]
@query
def monthly_sales_report(
month: int,
datasource: SalesData
) -> Response[Table]:
"""Generate monthly sales report."""
result = datasource.query(
"""
SELECT
product_category,
SUM(amount) as total_sales,
COUNT(*) as transaction_count
FROM files.sales
WHERE MONTH(sale_date) = ?
GROUP BY product_category
ORDER BY total_sales DESC
""",
[month]
)
return Response(result=result.to_table())
```
#### Knowledge Base Search
```python
KnowledgeBase = Annotated[DataSource, DataSourceSpec(
name="company_kb",
engine="sema4_knowledge_base",
description="Company knowledge base for semantic search"
)]
@query
def search_knowledge(
query_text: str,
relevance_threshold: float = 0.7,
datasource: KnowledgeBase
) -> Response[Table]:
"""Search company knowledge base."""
result = datasource.query(
"""
SELECT chunk_content, relevance_score, document_name
FROM company_kb
WHERE content = ? AND relevance_threshold = ?
ORDER BY relevance_score DESC
LIMIT 5
""",
[query_text, relevance_threshold]
)
return Response(result=result.to_table())
```
#### Using native_query for Engine-Specific Syntax
```python
@query
def get_user_by_id(
user_id: int,
datasource: MyDataSource
) -> Response[Table]:
"""Get user using native SQL syntax."""
# Uses engine-specific syntax, automatically wrapped
result = datasource.native_query(
"SELECT * FROM user_info WHERE id = $id",
{"id": user_id}
)
return Response(result=result.to_table())
```
## API Reference
### Functions
#### `query(func=None, *, is_consequential=None, display_name=None)`
Decorator for creating query actions.
#### `predict(func=None, *, is_consequential=None, display_name=None)` ⚠️ **DEPRECATED**
**Deprecated**: Use `@query` instead. This decorator is deprecated as of version 1.0.3.
#### `get_connection() -> DataServerConnection`
Get a connection to the data server.
#### `metadata(package_root: Path) -> dict`
Get metadata about data sources in a package.
#### `get_snowflake_connection_details()`
Get Snowflake-specific connection configuration.
### Classes
#### `DataSource`
Main interface for executing queries against data sources.
**Methods:**
- `query(sql: str, params: list = None) -> ResultSet`
- `native_query(sql: str, params: dict = None) -> ResultSet`
- `connection() -> DataServerConnection`
**Properties:**
- `datasource_name: str` - Name of the data source
#### `DataSourceSpec`
Configuration specification for data sources.
#### `ResultSet`
Container for query results with conversion methods.
**Methods:**
- `to_table() -> Table` - Convert to sema4ai Table
- `to_dict_list() -> list[dict]` - Convert to list of dictionaries
- `build_list(item_class: type[T]) -> list[T]` - Build typed object list
- `iter_as_dicts() -> Iterator[dict]` - Iterate as dictionaries
- `iter_as_tuples() -> Iterator[tuple]` - Iterate as tuples
- `to_pandas_df() -> pd.DataFrame` - Convert to pandas DataFrame
- `to_markdown_table() -> str` - Convert to markdown table
### Data Models
#### `SourceInfo`
Information about a data source configuration.
#### `TableInfo`
Metadata about database tables.
#### `ColumnInfo`
Information about table columns.
#### `KnowledgeBaseInfo`
Metadata about knowledge base configurations.
## Changelog
## Unreleased
## 1.0.7 - 2025-07-28
- Improve readme and add changelog when publishing to pypi
## 1.0.6 - 2025-06-18
- Simplify error message on `run_sql` function call.
## 1.0.5 - 2025-05-20
- Allow extra fields in `sf-auth.json` without changing behaviour of `get_snowflake_connection_details`.
## 1.0.4 - 2025-05-13
- Add `sema4_knowledge_base` engine to support knowledge base as a data source
## 1.0.3 - 2025-04-24
- Add deprecation warning for `@predict` decorator and `DataServerConnection.predict` method as Lightwood is being
phased out for data server predictions. Use `@query` or `connection.query()` instead.
- Update to latest `sema4ai-actions` version
## 1.0.2 - 2025-03-06
- Fix Snowflake local auth file path for Windows
## 1.0.1 - 2025-02-28
- Fix to the private key passphrase hanling
## 1.0.0 - 2025-02-25
- Add `private_key_file_pwd` to snowflake connection details when it exists in auth config file
- `SnowflakeAuthenticationError` now inherits from `ActionError`.
## 0.1.0 - 2025-02-18
- Added `native_query()` method which will automatically wrap the query in a `SELECT * FROM <datasource_name> (<query>)` clause
so that the query can be executed in the native SQL syntax of the data source instead of the syntax required by
the data server.
- If no parameters are provided, the query is returned as is (even if parameters are detected in the query -- added so that
the user can do the escaping themselves if needed if the SQL syntax accepts the parameters in a different way).
## 0.0.9 - 2025-02-14
- Correct the local authentication JSON file path for Snowflake in get_snowflake_connection_details
## 0.0.8 - 2025-02-14
- Add `get_snowflake_connection_details` helper function to get the connection details for Snowflake.
## 0.0.7 - 2025-02-06
- Corrected typo in `ColumInfo`.
- Updated `list_knowledge_bases` method to return `KnowledgeBaseInfo`.
## 0.0.6 - 2025-01-31
- Add data utilitary methods to `DataServerConnection`
## 0.0.5 - 2024-12-20
- Added `execute_sql()` to the `DataSource` class.
## 0.0.4 - 2024-12-19
- New utility methods for the `ResultSet` class:
- `to_dataframe()` (alias for `as_dataframe`)
- `to_table()` (creates a `Table` object that can be used to build a structured response)
- `to_dict_list()` (returns a list of dictionaries)
- `__iter__()` (same as `iter_as_dicts`)
- `__len__()`
- Retry login if the server returns a 401 error.
- Retry SQL requests (once) if the server returns an unexpected error (as it may be a transient error).
- Added `sema4ai.data.get_connection()` to get the configured connection to the data server.
- **Backward incompatible change**: The queries/predictions must always use the full data source name to access a table and not just the table name
regardless of the data source name configured in the `DataSourceSpec`.
i.e.: SQL like `SELECT * FROM my_datasource.my_table` is required instead of `SELECT * FROM my_table`.
## 0.0.3 - 2024-11-27
- Using REST API instead of PyMySQL.
- ResultSet APIs (provisional):
- `iter_as_dicts()` (new in 0.0.3)
- `iter_as_tuples()` (new in 0.0.3)
- `as_dataframe()` (new in 0.0.1)
- `build_list(item_class)` (new in 0.0.1)
- `to_markdown()` (new in 0.0.1)
## 0.0.2 - 2024-11-25
- Changed metadata format to have `_` instead of `-` in names.
- Made `defined_at/file` in metadata relative.
- Added support for `setup_sql_files` in `DataSourceSpec`.
- Default datasource named `models` is used for custom and prediction engines.
## 0.0.1 - 2024-11-18
- Initial release
- Added API:
- `from sema4ai.data import query` to mark function as `@query`
- `from sema4ai.data import predict` to mark function as `@predict`
- `from sema4ai.data import DataSource` to define a data source
- `from sema4ai.data import DataSourceSpec` to define a data source specification using an `Annotated` type
## License
See [LICENSE](LICENSE) - Sema4.ai End User License Agreement
Raw data
{
"_id": null,
"home_page": "https://github.com/Sema4AI/data/",
"name": "sema4ai-data",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.10",
"maintainer_email": null,
"keywords": null,
"author": "Fabio Z.",
"author_email": "fabio@sema4.ai",
"download_url": "https://files.pythonhosted.org/packages/71/4b/1f5813d7b865479bb656cf5b3af5064554d096c70d268eedc1c340c3f304/sema4ai_data-1.0.7.tar.gz",
"platform": null,
"description": "# \u26a1\ufe0f sema4ai-data\n\nPython library to develop data packages for Sema4.ai. Build powerful data-driven actions that can query databases and work with various data sources.\nThis library is designed to work with `Sema4.ai Data Server`, which is included in the [Sema4.ai Data Access](https://marketplace.visualstudio.com/items?itemName=sema4ai.sema4ai-data-access) VSCode extension.\n\n\n## Installation\n\n```bash\npip install sema4ai-data\n```\n\n## Quick Start\n\n```python\nfrom typing import Annotated\nfrom sema4ai.data import query, DataSource, DataSourceSpec\nfrom sema4ai.actions import Response, Table\n\n# Define a data source\nPostgresDataSource = Annotated[DataSource, DataSourceSpec(\n name=\"my_postgres_db\",\n engine=\"postgres\",\n description=\"Main PostgreSQL database\"\n)]\n\n# Create a data query\n@query\ndef get_users(datasource: PostgresDataSource, limit: int = 10) -> Response[Table]:\n \"\"\"Get users from the database.\"\"\"\n result = datasource.query(\"SELECT * FROM `my_postgres_db`.users LIMIT 5\", [limit])\n return Response(result=result.to_table())\n```\n\n## Core Concepts\n\n### DataSource\n\nThe `DataSource` class is the main interface for executing queries against configured data sources. It's automatically injected by the framework when you use the `@query` decorator.\n\n**Key Methods:**\n- `query(sql, params=None)` - Execute SQL queries with optional parameters\n- `native_query(sql, params=None)` - Execute engine-specific queries\n- `connection()` - Get the underlying data server connection\n\n### DataSourceSpec\n\nUsed to specify the configuration of a data source through type annotations:\n\n```python\nfrom typing import Annotated\nfrom sema4ai.data import DataSource, DataSourceSpec\n\n# Database data source\nDatabaseSource = Annotated[DataSource, DataSourceSpec(\n name=\"my_database\",\n engine=\"postgres\", # or \"mysql\", \"sqlite\", etc.\n description=\"Production database\"\n)]\n\n# File-based data source\nFileSource = Annotated[DataSource, DataSourceSpec(\n engine=\"files\",\n file=\"data/customers.csv\",\n created_table=\"customers\",\n description=\"Customer data from CSV\"\n)]\n\n# Knowledge base for semantic search\nKnowledgeBaseSource = Annotated[DataSource, DataSourceSpec(\n name=\"company_kb\",\n engine=\"sema4_knowledge_base\",\n description=\"Company knowledge base for semantic search\"\n)]\n```\n\n**Parameters:**\n- `engine` (required) - The data source engine type\n- `name` - Name of the data source\n- `description` - Human-readable description\n- `file` - File path for file-based sources\n- `created_table` - Table name created from files\n- `setup_sql` - SQL commands to run on setup\n- `setup_sql_files` - SQL files to execute on setup\n\n## Decorators\n\n### @query\n\nThe main decorator for creating data queries that can be executed by sema4ai actions:\n\n```python\nfrom sema4ai.data import query\nfrom sema4ai.actions import Response, Table\n\n@query\ndef get_countries(datasource: PostgresCustomersDataSource) -> str:\n sql = \"\"\"\n SELECT distinct(country)\n FROM public_demo.demo_customers\n LIMIT 100;\n \"\"\"\n\n result = datasource.query(sql)\n return result.to_markdown()\n```\n\n**Parameters:**\n- `is_consequential` - Whether the action has side effects or updates a resource (default: False)\n- `display_name` - Custom display name for the action\n\n### @predict \u26a0\ufe0f **DEPRECATED**\n\n**Note**: The `@predict` decorator is deprecated as of version 1.0.3. Use `@query` instead for all operations including predictions.\n\n```python\n# OLD (deprecated):\n@predict\ndef predict_something(datasource: SomeDataSource):\n pass\n\n# NEW (recommended):\n@query\ndef predict_something(datasource: SomeDataSource):\n pass\n```\n\n### ResultSet\n\nThe `ResultSet` class represents query results and provides various methods to work with the data:\n\n```python\n# Convert to different formats\nresult = datasource.query(\"SELECT * FROM `my_database`.users\")\n\n# As a table for actions\ntable = result.to_table()\n\n# As a list of dictionaries\ndicts = result.to_dict_list()\n\n# As structured objects\nfrom pydantic import BaseModel\n\nclass User(BaseModel):\n id: int\n name: str\n email: str\n\nusers = result.build_list(User)\n\n# Iterate over results\nfor row_dict in result.iter_as_dicts():\n print(row_dict)\n\nfor row_tuple in result.iter_as_tuples():\n print(row_tuple)\n```\n\n#### Basic Database Query\n\n```python\nfrom typing import Annotated\nfrom pydantic import BaseModel\nfrom sema4ai.data import query, DataSource, DataSourceSpec\nfrom sema4ai.actions import Response\n\nclass Product(BaseModel):\n id: int\n name: str\n price: float\n category: str\n\nProductDB = Annotated[DataSource, DataSourceSpec(\n name=\"products\",\n engine=\"postgres\",\n description=\"Product catalog database\"\n)]\n\n@query\ndef search_products(\n category: str,\n max_price: float,\n datasource: ProductDB\n) -> Response[list[Product]]:\n \"\"\"Search products by category and price.\"\"\"\n result = datasource.query(\n \"\"\"\n SELECT id, name, price, category\n FROM products.products\n WHERE category = ? AND price <= ?\n ORDER BY price ASC\n \"\"\",\n [category, max_price]\n )\n return Response(result=result.build_list(Product))\n```\n\n#### File-based Data Source\n\n```python\nSalesData = Annotated[DataSource, DataSourceSpec(\n engine=\"files\",\n file=\"data/sales_2024.csv\",\n created_table=\"sales\",\n description=\"Sales data for 2024\"\n)]\n\n@query\ndef monthly_sales_report(\n month: int,\n datasource: SalesData\n) -> Response[Table]:\n \"\"\"Generate monthly sales report.\"\"\"\n result = datasource.query(\n \"\"\"\n SELECT\n product_category,\n SUM(amount) as total_sales,\n COUNT(*) as transaction_count\n FROM files.sales\n WHERE MONTH(sale_date) = ?\n GROUP BY product_category\n ORDER BY total_sales DESC\n \"\"\",\n [month]\n )\n return Response(result=result.to_table())\n```\n\n#### Knowledge Base Search\n\n```python\nKnowledgeBase = Annotated[DataSource, DataSourceSpec(\n name=\"company_kb\",\n engine=\"sema4_knowledge_base\",\n description=\"Company knowledge base for semantic search\"\n)]\n\n@query\ndef search_knowledge(\n query_text: str,\n relevance_threshold: float = 0.7,\n datasource: KnowledgeBase\n) -> Response[Table]:\n \"\"\"Search company knowledge base.\"\"\"\n result = datasource.query(\n \"\"\"\n SELECT chunk_content, relevance_score, document_name\n FROM company_kb\n WHERE content = ? AND relevance_threshold = ?\n ORDER BY relevance_score DESC\n LIMIT 5\n \"\"\",\n [query_text, relevance_threshold]\n )\n return Response(result=result.to_table())\n```\n\n#### Using native_query for Engine-Specific Syntax\n\n```python\n@query\ndef get_user_by_id(\n user_id: int,\n datasource: MyDataSource\n) -> Response[Table]:\n \"\"\"Get user using native SQL syntax.\"\"\"\n # Uses engine-specific syntax, automatically wrapped\n result = datasource.native_query(\n \"SELECT * FROM user_info WHERE id = $id\",\n {\"id\": user_id}\n )\n return Response(result=result.to_table())\n```\n\n## API Reference\n\n### Functions\n\n#### `query(func=None, *, is_consequential=None, display_name=None)`\nDecorator for creating query actions.\n\n#### `predict(func=None, *, is_consequential=None, display_name=None)` \u26a0\ufe0f **DEPRECATED**\n**Deprecated**: Use `@query` instead. This decorator is deprecated as of version 1.0.3.\n\n#### `get_connection() -> DataServerConnection`\nGet a connection to the data server.\n\n#### `metadata(package_root: Path) -> dict`\nGet metadata about data sources in a package.\n\n#### `get_snowflake_connection_details()`\nGet Snowflake-specific connection configuration.\n\n### Classes\n\n#### `DataSource`\nMain interface for executing queries against data sources.\n\n**Methods:**\n- `query(sql: str, params: list = None) -> ResultSet`\n- `native_query(sql: str, params: dict = None) -> ResultSet`\n- `connection() -> DataServerConnection`\n\n**Properties:**\n- `datasource_name: str` - Name of the data source\n\n#### `DataSourceSpec`\nConfiguration specification for data sources.\n\n#### `ResultSet`\nContainer for query results with conversion methods.\n\n**Methods:**\n- `to_table() -> Table` - Convert to sema4ai Table\n- `to_dict_list() -> list[dict]` - Convert to list of dictionaries\n- `build_list(item_class: type[T]) -> list[T]` - Build typed object list\n- `iter_as_dicts() -> Iterator[dict]` - Iterate as dictionaries\n- `iter_as_tuples() -> Iterator[tuple]` - Iterate as tuples\n- `to_pandas_df() -> pd.DataFrame` - Convert to pandas DataFrame\n- `to_markdown_table() -> str` - Convert to markdown table\n\n### Data Models\n\n#### `SourceInfo`\nInformation about a data source configuration.\n\n#### `TableInfo`\nMetadata about database tables.\n\n#### `ColumnInfo`\nInformation about table columns.\n\n#### `KnowledgeBaseInfo`\nMetadata about knowledge base configurations.\n\n## Changelog\n\n## Unreleased\n\n## 1.0.7 - 2025-07-28\n\n- Improve readme and add changelog when publishing to pypi\n\n## 1.0.6 - 2025-06-18\n\n- Simplify error message on `run_sql` function call.\n\n## 1.0.5 - 2025-05-20\n\n- Allow extra fields in `sf-auth.json` without changing behaviour of `get_snowflake_connection_details`.\n\n## 1.0.4 - 2025-05-13\n\n- Add `sema4_knowledge_base` engine to support knowledge base as a data source\n\n## 1.0.3 - 2025-04-24\n\n- Add deprecation warning for `@predict` decorator and `DataServerConnection.predict` method as Lightwood is being\n phased out for data server predictions. Use `@query` or `connection.query()` instead.\n- Update to latest `sema4ai-actions` version\n\n## 1.0.2 - 2025-03-06\n\n- Fix Snowflake local auth file path for Windows\n\n## 1.0.1 - 2025-02-28\n\n- Fix to the private key passphrase hanling\n\n## 1.0.0 - 2025-02-25\n\n- Add `private_key_file_pwd` to snowflake connection details when it exists in auth config file\n- `SnowflakeAuthenticationError` now inherits from `ActionError`.\n\n## 0.1.0 - 2025-02-18\n\n- Added `native_query()` method which will automatically wrap the query in a `SELECT * FROM <datasource_name> (<query>)` clause\n so that the query can be executed in the native SQL syntax of the data source instead of the syntax required by\n the data server.\n- If no parameters are provided, the query is returned as is (even if parameters are detected in the query -- added so that\n the user can do the escaping themselves if needed if the SQL syntax accepts the parameters in a different way).\n\n## 0.0.9 - 2025-02-14\n\n- Correct the local authentication JSON file path for Snowflake in get_snowflake_connection_details\n\n## 0.0.8 - 2025-02-14\n\n- Add `get_snowflake_connection_details` helper function to get the connection details for Snowflake.\n\n## 0.0.7 - 2025-02-06\n\n- Corrected typo in `ColumInfo`.\n- Updated `list_knowledge_bases` method to return `KnowledgeBaseInfo`.\n\n## 0.0.6 - 2025-01-31\n\n- Add data utilitary methods to `DataServerConnection`\n\n## 0.0.5 - 2024-12-20\n\n- Added `execute_sql()` to the `DataSource` class.\n\n## 0.0.4 - 2024-12-19\n\n- New utility methods for the `ResultSet` class:\n - `to_dataframe()` (alias for `as_dataframe`)\n - `to_table()` (creates a `Table` object that can be used to build a structured response)\n - `to_dict_list()` (returns a list of dictionaries)\n - `__iter__()` (same as `iter_as_dicts`)\n - `__len__()`\n- Retry login if the server returns a 401 error.\n- Retry SQL requests (once) if the server returns an unexpected error (as it may be a transient error).\n- Added `sema4ai.data.get_connection()` to get the configured connection to the data server.\n- **Backward incompatible change**: The queries/predictions must always use the full data source name to access a table and not just the table name\n regardless of the data source name configured in the `DataSourceSpec`.\n i.e.: SQL like `SELECT * FROM my_datasource.my_table` is required instead of `SELECT * FROM my_table`.\n\n## 0.0.3 - 2024-11-27\n\n- Using REST API instead of PyMySQL.\n- ResultSet APIs (provisional):\n - `iter_as_dicts()` (new in 0.0.3)\n - `iter_as_tuples()` (new in 0.0.3)\n - `as_dataframe()` (new in 0.0.1)\n - `build_list(item_class)` (new in 0.0.1)\n - `to_markdown()` (new in 0.0.1)\n\n## 0.0.2 - 2024-11-25\n\n- Changed metadata format to have `_` instead of `-` in names.\n- Made `defined_at/file` in metadata relative.\n- Added support for `setup_sql_files` in `DataSourceSpec`.\n- Default datasource named `models` is used for custom and prediction engines.\n\n## 0.0.1 - 2024-11-18\n\n- Initial release\n- Added API:\n - `from sema4ai.data import query` to mark function as `@query`\n - `from sema4ai.data import predict` to mark function as `@predict`\n - `from sema4ai.data import DataSource` to define a data source\n - `from sema4ai.data import DataSourceSpec` to define a data source specification using an `Annotated` type\n\n## License\n\nSee [LICENSE](LICENSE) - Sema4.ai End User License Agreement\n",
"bugtrack_url": null,
"license": "Sema4.ai Proprietary (see LICENSE)",
"summary": "Sema4AI Data",
"version": "1.0.7",
"project_urls": {
"Homepage": "https://github.com/Sema4AI/data/",
"Repository": "https://github.com/Sema4AI/data/"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "4d99498d2cba8277bc3fbdc76ab52b4f5a806a7cdf3a4bf8309daecd18d3a6d2",
"md5": "284f2048ebeec2c4fd80a7a68a1fcbae",
"sha256": "ff3e8907f56fbff9655772c18fcc640ad43b6d27bb60052b1c1a2b6083437fdb"
},
"downloads": -1,
"filename": "sema4ai_data-1.0.7-py3-none-any.whl",
"has_sig": false,
"md5_digest": "284f2048ebeec2c4fd80a7a68a1fcbae",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.10",
"size": 36691,
"upload_time": "2025-07-29T08:47:31",
"upload_time_iso_8601": "2025-07-29T08:47:31.066987Z",
"url": "https://files.pythonhosted.org/packages/4d/99/498d2cba8277bc3fbdc76ab52b4f5a806a7cdf3a4bf8309daecd18d3a6d2/sema4ai_data-1.0.7-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "714b1f5813d7b865479bb656cf5b3af5064554d096c70d268eedc1c340c3f304",
"md5": "402fc38cb83ced32ad6eb55314e94078",
"sha256": "d3116a3c2a393f789414edd96ccfdb16398502f7d177bda303b7e791bfad73ae"
},
"downloads": -1,
"filename": "sema4ai_data-1.0.7.tar.gz",
"has_sig": false,
"md5_digest": "402fc38cb83ced32ad6eb55314e94078",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.10",
"size": 35215,
"upload_time": "2025-07-29T08:47:31",
"upload_time_iso_8601": "2025-07-29T08:47:31.991983Z",
"url": "https://files.pythonhosted.org/packages/71/4b/1f5813d7b865479bb656cf5b3af5064554d096c70d268eedc1c340c3f304/sema4ai_data-1.0.7.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-07-29 08:47:31",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "Sema4AI",
"github_project": "data",
"github_not_found": true,
"lcname": "sema4ai-data"
}