<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<p align="center">
<a href="https://hudi.apache.org/">
<img src="https://hudi.apache.org/assets/images/hudi_logo_transparent_1400x600.png" alt="Hudi logo" height="120px">
</a>
</p>
<p align="center">
A native Rust library for Apache Hudi, with bindings to Python
<br>
<br>
<a href="https://github.com/apache/hudi-rs/actions/workflows/ci.yml">
<img alt="hudi-rs ci" src="https://github.com/apache/hudi-rs/actions/workflows/ci.yml/badge.svg">
</a>
<a href="https://codecov.io/github/apache/hudi-rs">
<img alt="hudi-rs codecov" src="https://codecov.io/github/apache/hudi-rs/graph/badge.svg">
</a>
<a href="https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g">
<img alt="join hudi slack" src="https://img.shields.io/badge/slack-%23hudi-72eff8?logo=slack&color=48c628">
</a>
<a href="https://x.com/apachehudi">
<img alt="follow hudi x/twitter" src="https://img.shields.io/twitter/follow/apachehudi?label=apachehudi">
</a>
<a href="https://www.linkedin.com/company/apache-hudi">
<img alt="follow hudi linkedin" src="https://img.shields.io/badge/apache%E2%80%93hudi-0077B5?logo=linkedin">
</a>
</p>
The `hudi-rs` project aims to broaden the use of [Apache Hudi](https://github.com/apache/hudi) for a diverse range of
users and projects.
| Source | Installation Command |
|---------------|----------------------|
| **PyPi** | `pip install hudi` |
| **Crates.io** | `cargo add hudi` |
## Example usage
> [!NOTE]
> These examples expect a Hudi table exists at `/tmp/trips_table`, created using
> the [quick start guide](https://hudi.apache.org/docs/quick-start-guide).
### Python
Read a Hudi table into a PyArrow table.
```python
from hudi import HudiTableBuilder
import pyarrow as pa
hudi_table = (
HudiTableBuilder
.from_base_uri("/tmp/trips_table")
.with_option("hoodie.read.as.of.timestamp", "20241122010827898")
.build()
)
records = hudi_table.read_snapshot(filters=[("city", "=", "san_francisco")])
arrow_table = pa.Table.from_batches(records)
result = arrow_table.select(["rider", "city", "ts", "fare"])
print(result)
```
### Rust (DataFusion)
<details>
<summary>Add crate hudi with datafusion feature to your application to query a Hudi table.</summary>
```shell
cargo new my_project --bin && cd my_project
cargo add tokio@1 datafusion@42
cargo add hudi --features datafusion
```
Update `src/main.rs` with the code snippet below then `cargo run`.
</details>
```rust
use std::sync::Arc;
use datafusion::error::Result;
use datafusion::prelude::{DataFrame, SessionContext};
use hudi::HudiDataSource;
#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();
let hudi = HudiDataSource::new_with_options(
"/tmp/trips_table",
[("hoodie.read.as.of.timestamp", "20241122010827898")]).await?;
ctx.register_table("trips_table", Arc::new(hudi))?;
let df: DataFrame = ctx.sql("SELECT * from trips_table where city = 'san_francisco'").await?;
df.show().await?;
Ok(())
}
```
### Work with cloud storage
Ensure cloud storage credentials are set properly as environment variables, e.g., `AWS_*`, `AZURE_*`, or `GOOGLE_*`.
Relevant storage environment variables will then be picked up. The target table's base uri with schemes such
as `s3://`, `az://`, or `gs://` will be processed accordingly.
Alternatively, you can pass the storage configuration as options to the `HudiTableBuilder` or `HudiDataSource`.
### Python
```python
from hudi import HudiTableBuilder
hudi_table = (
HudiTableBuilder
.from_base_uri("s3://bucket/trips_table")
.with_option("aws_region", "us-west-2")
.build()
)
```
### Rust (DataFusion)
```rust
use hudi::HudiDataSource;
async fn main() -> Result<()> {
let hudi = HudiDataSource::new_with_options(
"s3://bucket/trips_table",
[("aws_region", "us-west-2")]
).await?;
}
```
## Contributing
Check out the [contributing guide](./CONTRIBUTING.md) for all the details about making contributions to the project.
Raw data
{
"_id": null,
"home_page": "https://github.com/apache/hudi-rs",
"name": "hudi",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.9",
"maintainer_email": null,
"keywords": "apachehudi, hudi, datalake, arrow",
"author": null,
"author_email": null,
"download_url": "https://files.pythonhosted.org/packages/71/97/19f622a7f056b68e07698602762c7468ad6f957d938e6d890f82af3dd9a3/hudi-0.2.0.tar.gz",
"platform": null,
"description": "<!--\n ~ Licensed to the Apache Software Foundation (ASF) under one\n ~ or more contributor license agreements. See the NOTICE file\n ~ distributed with this work for additional information\n ~ regarding copyright ownership. The ASF licenses this file\n ~ to you under the Apache License, Version 2.0 (the\n ~ \"License\"); you may not use this file except in compliance\n ~ with the License. You may obtain a copy of the License at\n ~\n ~ http://www.apache.org/licenses/LICENSE-2.0\n ~\n ~ Unless required by applicable law or agreed to in writing,\n ~ software distributed under the License is distributed on an\n ~ \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n ~ KIND, either express or implied. See the License for the\n ~ specific language governing permissions and limitations\n ~ under the License.\n-->\n\n<p align=\"center\">\n <a href=\"https://hudi.apache.org/\">\n <img src=\"https://hudi.apache.org/assets/images/hudi_logo_transparent_1400x600.png\" alt=\"Hudi logo\" height=\"120px\">\n </a>\n</p>\n<p align=\"center\">\n A native Rust library for Apache Hudi, with bindings to Python\n <br>\n <br>\n <a href=\"https://github.com/apache/hudi-rs/actions/workflows/ci.yml\">\n <img alt=\"hudi-rs ci\" src=\"https://github.com/apache/hudi-rs/actions/workflows/ci.yml/badge.svg\">\n </a>\n <a href=\"https://codecov.io/github/apache/hudi-rs\">\n <img alt=\"hudi-rs codecov\" src=\"https://codecov.io/github/apache/hudi-rs/graph/badge.svg\">\n </a>\n <a href=\"https://join.slack.com/t/apache-hudi/shared_invite/zt-2ggm1fub8-_yt4Reu9djwqqVRFC7X49g\">\n <img alt=\"join hudi slack\" src=\"https://img.shields.io/badge/slack-%23hudi-72eff8?logo=slack&color=48c628\">\n </a>\n <a href=\"https://x.com/apachehudi\">\n <img alt=\"follow hudi x/twitter\" src=\"https://img.shields.io/twitter/follow/apachehudi?label=apachehudi\">\n </a>\n <a href=\"https://www.linkedin.com/company/apache-hudi\">\n <img alt=\"follow hudi linkedin\" src=\"https://img.shields.io/badge/apache%E2%80%93hudi-0077B5?logo=linkedin\">\n </a>\n</p>\n\nThe `hudi-rs` project aims to broaden the use of [Apache Hudi](https://github.com/apache/hudi) for a diverse range of\nusers and projects.\n\n| Source | Installation Command |\n|---------------|----------------------|\n| **PyPi** | `pip install hudi` |\n| **Crates.io** | `cargo add hudi` |\n\n## Example usage\n\n> [!NOTE]\n> These examples expect a Hudi table exists at `/tmp/trips_table`, created using\n> the [quick start guide](https://hudi.apache.org/docs/quick-start-guide).\n\n### Python\n\nRead a Hudi table into a PyArrow table.\n\n```python\nfrom hudi import HudiTableBuilder\nimport pyarrow as pa\n\nhudi_table = (\n HudiTableBuilder\n .from_base_uri(\"/tmp/trips_table\")\n .with_option(\"hoodie.read.as.of.timestamp\", \"20241122010827898\")\n .build()\n)\nrecords = hudi_table.read_snapshot(filters=[(\"city\", \"=\", \"san_francisco\")])\n\narrow_table = pa.Table.from_batches(records)\nresult = arrow_table.select([\"rider\", \"city\", \"ts\", \"fare\"])\nprint(result)\n```\n\n### Rust (DataFusion)\n\n<details>\n<summary>Add crate hudi with datafusion feature to your application to query a Hudi table.</summary>\n\n```shell\ncargo new my_project --bin && cd my_project\ncargo add tokio@1 datafusion@42\ncargo add hudi --features datafusion\n```\n\nUpdate `src/main.rs` with the code snippet below then `cargo run`.\n\n</details>\n\n```rust\nuse std::sync::Arc;\n\nuse datafusion::error::Result;\nuse datafusion::prelude::{DataFrame, SessionContext};\nuse hudi::HudiDataSource;\n\n#[tokio::main]\nasync fn main() -> Result<()> {\n let ctx = SessionContext::new();\n let hudi = HudiDataSource::new_with_options(\n \"/tmp/trips_table\",\n [(\"hoodie.read.as.of.timestamp\", \"20241122010827898\")]).await?;\n ctx.register_table(\"trips_table\", Arc::new(hudi))?;\n let df: DataFrame = ctx.sql(\"SELECT * from trips_table where city = 'san_francisco'\").await?;\n df.show().await?;\n Ok(())\n}\n```\n\n### Work with cloud storage\n\nEnsure cloud storage credentials are set properly as environment variables, e.g., `AWS_*`, `AZURE_*`, or `GOOGLE_*`.\nRelevant storage environment variables will then be picked up. The target table's base uri with schemes such\nas `s3://`, `az://`, or `gs://` will be processed accordingly.\n\nAlternatively, you can pass the storage configuration as options to the `HudiTableBuilder` or `HudiDataSource`.\n\n### Python\n\n```python\nfrom hudi import HudiTableBuilder\n\nhudi_table = (\n HudiTableBuilder\n .from_base_uri(\"s3://bucket/trips_table\")\n .with_option(\"aws_region\", \"us-west-2\")\n .build()\n)\n```\n\n### Rust (DataFusion)\n\n```rust\nuse hudi::HudiDataSource;\n\nasync fn main() -> Result<()> {\n let hudi = HudiDataSource::new_with_options(\n \"s3://bucket/trips_table\",\n [(\"aws_region\", \"us-west-2\")]\n ).await?;\n}\n\n```\n\n## Contributing\n\nCheck out the [contributing guide](./CONTRIBUTING.md) for all the details about making contributions to the project.\n\n",
"bugtrack_url": null,
"license": "Apache License 2.0",
"summary": "Native Python binding for Apache Hudi, based on hudi-rs.",
"version": "0.2.0",
"project_urls": {
"Homepage": "https://github.com/apache/hudi-rs",
"repository": "https://github.com/apache/hudi-rs/tree/main/python/"
},
"split_keywords": [
"apachehudi",
" hudi",
" datalake",
" arrow"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "01404c9f770e46eee717e18b817ae41c6d165fc12c4907c8234ab605f7c463a7",
"md5": "1e679138e908d79b481ce0626987793c",
"sha256": "a1a03c27798b25a183856927260e876d25798b3cc4d952e575cf5b893838f2ae"
},
"downloads": -1,
"filename": "hudi-0.2.0-cp39-abi3-macosx_10_12_x86_64.whl",
"has_sig": false,
"md5_digest": "1e679138e908d79b481ce0626987793c",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.9",
"size": 5794308,
"upload_time": "2024-11-26T00:25:05",
"upload_time_iso_8601": "2024-11-26T00:25:05.658649Z",
"url": "https://files.pythonhosted.org/packages/01/40/4c9f770e46eee717e18b817ae41c6d165fc12c4907c8234ab605f7c463a7/hudi-0.2.0-cp39-abi3-macosx_10_12_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "4c758f23247ff79ca23785e5793fc4d59589fa51723b3cf053cdd13e60a6b93d",
"md5": "12f68d7f6a6911bf9be44b06315c4b57",
"sha256": "c031d535150bd8b0c0681f515070c1fc1eae97a81ac95469932d249f67c134cd"
},
"downloads": -1,
"filename": "hudi-0.2.0-cp39-abi3-macosx_11_0_arm64.whl",
"has_sig": false,
"md5_digest": "12f68d7f6a6911bf9be44b06315c4b57",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.9",
"size": 5358147,
"upload_time": "2024-11-26T00:24:42",
"upload_time_iso_8601": "2024-11-26T00:24:42.810190Z",
"url": "https://files.pythonhosted.org/packages/4c/75/8f23247ff79ca23785e5793fc4d59589fa51723b3cf053cdd13e60a6b93d/hudi-0.2.0-cp39-abi3-macosx_11_0_arm64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "ec1cf3dbdfed2aa4fca333bb99e205ae96a6d3ea6ca0d1d97e32c02acd3a7e9e",
"md5": "a203746192ffbcf5c1037fee41662b61",
"sha256": "f9525742705f77d8d9ea318299c6d4ded533738399b050eef2a21acada265ec8"
},
"downloads": -1,
"filename": "hudi-0.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"has_sig": false,
"md5_digest": "a203746192ffbcf5c1037fee41662b61",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.9",
"size": 5699378,
"upload_time": "2024-11-26T00:29:46",
"upload_time_iso_8601": "2024-11-26T00:29:46.452044Z",
"url": "https://files.pythonhosted.org/packages/ec/1c/f3dbdfed2aa4fca333bb99e205ae96a6d3ea6ca0d1d97e32c02acd3a7e9e/hudi-0.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "d3ab1936df5fc561053242314aebea722bc7d5707408d4e357627a3b060977a9",
"md5": "b5a60b4c5a37fc646875af1a63162a30",
"sha256": "dc9b9b7c564a4298f3942a61b6918069853a12d8cc2e45261dcd343b75abdf86"
},
"downloads": -1,
"filename": "hudi-0.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"has_sig": false,
"md5_digest": "b5a60b4c5a37fc646875af1a63162a30",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.9",
"size": 6152139,
"upload_time": "2024-11-26T00:25:18",
"upload_time_iso_8601": "2024-11-26T00:25:18.935750Z",
"url": "https://files.pythonhosted.org/packages/d3/ab/1936df5fc561053242314aebea722bc7d5707408d4e357627a3b060977a9/hudi-0.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "306d956b24caebbb3246be6de1d49a03aaab4907904ec4edc36e425220b8251b",
"md5": "0b0fb3f2e43f06e44b4d9607cbb34ffc",
"sha256": "0a7b8641af8987993d667b10007cc4c1e0ace36936e257cd94022e21648c6151"
},
"downloads": -1,
"filename": "hudi-0.2.0-cp39-abi3-win_amd64.whl",
"has_sig": false,
"md5_digest": "0b0fb3f2e43f06e44b4d9607cbb34ffc",
"packagetype": "bdist_wheel",
"python_version": "cp39",
"requires_python": ">=3.9",
"size": 5945434,
"upload_time": "2024-11-26T00:28:29",
"upload_time_iso_8601": "2024-11-26T00:28:29.481796Z",
"url": "https://files.pythonhosted.org/packages/30/6d/956b24caebbb3246be6de1d49a03aaab4907904ec4edc36e425220b8251b/hudi-0.2.0-cp39-abi3-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "719719f622a7f056b68e07698602762c7468ad6f957d938e6d890f82af3dd9a3",
"md5": "2db9e0684776d5e3108738f2ddd7d1ea",
"sha256": "32577eb6362451fcb88ff7f308f5ab25a1ced9705f92a73d4bbfe73df81701a8"
},
"downloads": -1,
"filename": "hudi-0.2.0.tar.gz",
"has_sig": false,
"md5_digest": "2db9e0684776d5e3108738f2ddd7d1ea",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.9",
"size": 249233,
"upload_time": "2024-11-26T00:25:21",
"upload_time_iso_8601": "2024-11-26T00:25:21.177803Z",
"url": "https://files.pythonhosted.org/packages/71/97/19f622a7f056b68e07698602762c7468ad6f957d938e6d890f82af3dd9a3/hudi-0.2.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-11-26 00:25:21",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "apache",
"github_project": "hudi-rs",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "hudi"
}