# Apache Airflow Provider for Onehouse
This is the Apache Airflow provider for Onehouse. It provides operators and sensors for managing Onehouse resources through Apache Airflow.
## Requirements
- Apache Airflow >= 2.9.2
- Python >= 3.10
## Installation
You can install this provider package via pip:
```bash
pip install apache-airflow-providers-onehouse
```
## Configuration
1. Set up an Airflow connection with the following details:
- Connection Id: `onehouse_default` (or your custom connection id)
- Connection Type: `Generic`
- Host: `https://api.onehouse.ai`
- Extra: Configure the following JSON:
```json
{
"project_uid": "your-project-uid",
"user_id": "your-user-id",
"api_key": "your-api-key",
"api_secret": "your-api-secret",
"link_uid": "your-link-uid",
"region": "your-region"
}
```
## Usage
### Basic Example DAG
```python
from datetime import datetime, timedelta
from airflow import DAG
from airflow_providers_onehouse.operators.clusters import (
OnehouseCreateClusterOperator,
OnehouseDeleteClusterOperator,
)
from airflow_providers_onehouse.operators.jobs import (
OnehouseCreateJobOperator,
OnehouseRunJobOperator,
OnehouseDeleteJobOperator,
)
from airflow_providers_onehouse.sensors.onehouse import OnehouseJobRunSensor, OnehouseCreateClusterSensor
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'retries': 1,
'retry_delay': timedelta(minutes=1),
}
cluster_name = "cluster_1"
job_name = "job_1"
bucket_name = "bucket-name"
dag_path = "s3a://{bucket_name}/path/to/hello_world_job.py"
venv_path = "s3a://{bucket_name}/path/to/venv.tar.gz"
with DAG(
dag_id="example_dag",
default_args=default_args,
description="Example DAG",
schedule_interval=None,
start_date=datetime(2025, 4, 28),
catchup=False,
tags=["onehouse", "example", "dag"],
) as dag:
create_cluster = OnehouseCreateClusterOperator(
task_id="create_onehouse_cluster",
cluster_name=cluster_name,
cluster_type="Spark",
max_ocu=1,
min_ocu=1,
conn_id="onehouse_default",
)
wait_for_cluster_ready = OnehouseCreateClusterSensor(
task_id="wait_for_cluster_ready",
cluster_name="{{ ti.xcom_pull(task_ids='create_onehouse_cluster') }}",
conn_id="onehouse_default",
poke_interval=30,
timeout=60 * 30,
)
create_onehouse_job = OnehouseCreateJobOperator(
task_id="create_onehouse_job",
job_name=job_name,
job_type="PYTHON",
parameters=[
"--conf", f"spark.archives={venv_path}#environment",
"--conf", "spark.pyspark.python=./environment/bin/python",
dag_path,
],
cluster_name=cluster_name,
conn_id="onehouse_default",
)
run_onehouse_job = OnehouseRunJobOperator(
task_id="run_onehouse_job",
job_name=job_name,
conn_id="onehouse_default",
)
wait_for_job = OnehouseJobRunSensor(
task_id="wait_for_job_completion",
job_name=job_name,
job_run_id="{{ ti.xcom_pull(task_ids='run_onehouse_job') }}",
conn_id="onehouse_default",
poke_interval=30,
timeout=60 * 60,
)
delete_onehouse_job = OnehouseDeleteJobOperator(
task_id="delete_onehouse_job",
job_name=job_name,
conn_id="onehouse_default",
)
delete_onehouse_cluster = OnehouseDeleteClusterOperator(
task_id="delete_onehouse_cluster",
cluster_name=cluster_name,
conn_id="onehouse_default",
)
(
create_cluster
>> wait_for_cluster_ready
>> create_onehouse_job
>> run_onehouse_job
>> wait_for_job
>> delete_onehouse_job
>> delete_onehouse_cluster
)
```
Raw data
{
"_id": null,
"home_page": null,
"name": "apache-airflow-providers-onehouse",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": "airflow, onehouse, provider",
"author": "OneHouse",
"author_email": null,
"download_url": "https://files.pythonhosted.org/packages/57/39/33133b3c0d6e86f8848af60215080d0012c6d24195dc39c7f8822576110e/apache_airflow_providers_onehouse-0.2.2.tar.gz",
"platform": null,
"description": "# Apache Airflow Provider for Onehouse\n\nThis is the Apache Airflow provider for Onehouse. It provides operators and sensors for managing Onehouse resources through Apache Airflow.\n\n## Requirements\n\n- Apache Airflow >= 2.9.2\n- Python >= 3.10\n\n## Installation\n\nYou can install this provider package via pip:\n\n```bash\npip install apache-airflow-providers-onehouse\n```\n\n## Configuration\n\n1. Set up an Airflow connection with the following details:\n\n - Connection Id: `onehouse_default` (or your custom connection id)\n - Connection Type: `Generic`\n - Host: `https://api.onehouse.ai`\n - Extra: Configure the following JSON:\n ```json\n {\n \"project_uid\": \"your-project-uid\",\n \"user_id\": \"your-user-id\",\n \"api_key\": \"your-api-key\",\n \"api_secret\": \"your-api-secret\",\n \"link_uid\": \"your-link-uid\",\n \"region\": \"your-region\"\n }\n ```\n\n## Usage\n\n### Basic Example DAG\n\n```python\nfrom datetime import datetime, timedelta\nfrom airflow import DAG\nfrom airflow_providers_onehouse.operators.clusters import (\n OnehouseCreateClusterOperator,\n OnehouseDeleteClusterOperator,\n)\nfrom airflow_providers_onehouse.operators.jobs import (\n OnehouseCreateJobOperator,\n OnehouseRunJobOperator,\n OnehouseDeleteJobOperator,\n)\nfrom airflow_providers_onehouse.sensors.onehouse import OnehouseJobRunSensor, OnehouseCreateClusterSensor\n\ndefault_args = {\n 'owner': 'airflow',\n 'depends_on_past': False,\n 'retries': 1,\n 'retry_delay': timedelta(minutes=1),\n}\n\ncluster_name = \"cluster_1\"\njob_name = \"job_1\"\n\nbucket_name = \"bucket-name\"\ndag_path = \"s3a://{bucket_name}/path/to/hello_world_job.py\"\nvenv_path = \"s3a://{bucket_name}/path/to/venv.tar.gz\"\n\nwith DAG(\n dag_id=\"example_dag\",\n default_args=default_args,\n description=\"Example DAG\",\n schedule_interval=None,\n start_date=datetime(2025, 4, 28),\n catchup=False,\n tags=[\"onehouse\", \"example\", \"dag\"],\n) as dag:\n\n create_cluster = OnehouseCreateClusterOperator(\n task_id=\"create_onehouse_cluster\",\n cluster_name=cluster_name,\n cluster_type=\"Spark\",\n max_ocu=1,\n min_ocu=1,\n conn_id=\"onehouse_default\",\n )\n\n wait_for_cluster_ready = OnehouseCreateClusterSensor(\n task_id=\"wait_for_cluster_ready\",\n cluster_name=\"{{ ti.xcom_pull(task_ids='create_onehouse_cluster') }}\",\n conn_id=\"onehouse_default\",\n poke_interval=30,\n timeout=60 * 30,\n )\n\n create_onehouse_job = OnehouseCreateJobOperator(\n task_id=\"create_onehouse_job\",\n job_name=job_name,\n job_type=\"PYTHON\",\n parameters=[\n \"--conf\", f\"spark.archives={venv_path}#environment\",\n \"--conf\", \"spark.pyspark.python=./environment/bin/python\",\n dag_path,\n ],\n cluster_name=cluster_name,\n conn_id=\"onehouse_default\",\n )\n\n run_onehouse_job = OnehouseRunJobOperator(\n task_id=\"run_onehouse_job\",\n job_name=job_name,\n conn_id=\"onehouse_default\",\n )\n\n wait_for_job = OnehouseJobRunSensor(\n task_id=\"wait_for_job_completion\",\n job_name=job_name,\n job_run_id=\"{{ ti.xcom_pull(task_ids='run_onehouse_job') }}\",\n conn_id=\"onehouse_default\",\n poke_interval=30,\n timeout=60 * 60,\n )\n\n delete_onehouse_job = OnehouseDeleteJobOperator(\n task_id=\"delete_onehouse_job\",\n job_name=job_name,\n conn_id=\"onehouse_default\",\n )\n\n delete_onehouse_cluster = OnehouseDeleteClusterOperator(\n task_id=\"delete_onehouse_cluster\",\n cluster_name=cluster_name,\n conn_id=\"onehouse_default\",\n )\n\n (\n create_cluster\n >> wait_for_cluster_ready\n >> create_onehouse_job\n >> run_onehouse_job\n >> wait_for_job\n >> delete_onehouse_job\n >> delete_onehouse_cluster\n ) \n```\n",
"bugtrack_url": null,
"license": null,
"summary": "Apache Airflow Provider for OneHouse",
"version": "0.2.2",
"project_urls": {
"Bug Tracker": "https://github.com/onehouseinc/airflow-providers-onehouse/issues",
"Source Code": "https://github.com/onehouseinc/airflow-providers-onehouse"
},
"split_keywords": [
"airflow",
" onehouse",
" provider"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "7b8f46f3578746e2fe43651a1a4bba58bff67772b5923d833fe41c1927f95e79",
"md5": "1f83d1ff027db62c3b0eb92e4aabaeb7",
"sha256": "791788721b07dc1d2d34249526c3dff4e5c85a8ff12a29208f057705772b7ae5"
},
"downloads": -1,
"filename": "apache_airflow_providers_onehouse-0.2.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "1f83d1ff027db62c3b0eb92e4aabaeb7",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 13163,
"upload_time": "2025-08-06T20:14:19",
"upload_time_iso_8601": "2025-08-06T20:14:19.623458Z",
"url": "https://files.pythonhosted.org/packages/7b/8f/46f3578746e2fe43651a1a4bba58bff67772b5923d833fe41c1927f95e79/apache_airflow_providers_onehouse-0.2.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "573933133b3c0d6e86f8848af60215080d0012c6d24195dc39c7f8822576110e",
"md5": "cdc0b42b33ac4352371453912c40c8d5",
"sha256": "5d15aa909d2d51cadedd29b8f235944e6626e22e5173c14ef06af8f462efcd32"
},
"downloads": -1,
"filename": "apache_airflow_providers_onehouse-0.2.2.tar.gz",
"has_sig": false,
"md5_digest": "cdc0b42b33ac4352371453912c40c8d5",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 9955,
"upload_time": "2025-08-06T20:14:20",
"upload_time_iso_8601": "2025-08-06T20:14:20.759066Z",
"url": "https://files.pythonhosted.org/packages/57/39/33133b3c0d6e86f8848af60215080d0012c6d24195dc39c7f8822576110e/apache_airflow_providers_onehouse-0.2.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-06 20:14:20",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "onehouseinc",
"github_project": "airflow-providers-onehouse",
"github_not_found": true,
"lcname": "apache-airflow-providers-onehouse"
}