# IPFS Huggingface Datasets
This is a model manager and wrapper for huggingface, looks up a index of models from an collection of models, and will download a model from either https/s3/ipfs, depending on which source is the fastest.
# How to use
~~~shell
pip install .
~~~
look run ``python3 example.py`` for examples of usage.
this is designed to be a drop in replacement, which requires only 2 lines to be changed
In your python script
~~~shell
from datasets import load_dataset
from ipfs_datasets import load_dataset
dataset = load_dataset.from_auto_download("bge-small-en-v1.5")
~~~
or
~~~shell
from datasets import load_dataset
from ipfs_datasets import load_dataset
dataset = load_dataset.from_ipfs("QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1")
~~~
or to use with with s3 caching
~~~shell
from datasets import load_dataset
from ipfs_datasets import load_dataset
dataset = load_dataset.from_auto_download(
dataset_name="common-crawl",
s3cfg={
"bucket": "cloud",
"endpoint": "https://storage.googleapis.com",
"secret_key": "",
"access_key": ""
}
)
~~~
# IPFS Huggingface Bridge:
for transformers python library visit:
https://github.com/endomorphosis/ipfs_transformers/
for transformers js client visit:
https://github.com/endomorphosis/ipfs_transformers_js/
for orbitdb_kit nodejs library visit:
https://github.com/endomorphosis/orbitdb_kit/
for fireproof_kit nodejs library visit:
https://github.com/endomorphosis/fireproof_kit
for Faiss KNN index python library visit:
https://github.com/endomorphosis/ipfs_faiss/
for python model manager library visit:
https://github.com/endomorphosis/ipfs_model_manager/
for nodejs model manager library visit:
https://github.com/endomorphosis/ipfs_model_manager_js/
for nodejs ipfs huggingface scraper with pinning services visit:
https://github.com/endomorphosis/ipfs_huggingface_scraper/
Author - Benjamin Barber
QA - Kevin De Haan
Raw data
{
"_id": null,
"home_page": null,
"name": "ipfs-embeddings-py",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": null,
"keywords": null,
"author": null,
"author_email": "Benjamin Barber <starworks5@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/01/9b/74749b3ab050b90f7a28b3588023c55f72600f5d49bace66da3c12d62ef0/ipfs_embeddings_py-0.0.28.tar.gz",
"platform": null,
"description": "# IPFS Huggingface Datasets\n\nThis is a model manager and wrapper for huggingface, looks up a index of models from an collection of models, and will download a model from either https/s3/ipfs, depending on which source is the fastest.\n\n# How to use\n~~~shell\npip install .\n~~~\n\nlook run ``python3 example.py`` for examples of usage.\n\nthis is designed to be a drop in replacement, which requires only 2 lines to be changed\n\nIn your python script\n~~~shell\nfrom datasets import load_dataset\nfrom ipfs_datasets import load_dataset\ndataset = load_dataset.from_auto_download(\"bge-small-en-v1.5\") \n~~~\n\nor \n\n~~~shell\nfrom datasets import load_dataset\nfrom ipfs_datasets import load_dataset\ndataset = load_dataset.from_ipfs(\"QmccfbkWLYs9K3yucc6b3eSt8s8fKcyRRt24e3CDaeRhM1\")\n~~~\n\nor to use with with s3 caching \n~~~shell\nfrom datasets import load_dataset\nfrom ipfs_datasets import load_dataset\ndataset = load_dataset.from_auto_download(\n dataset_name=\"common-crawl\",\n s3cfg={\n \"bucket\": \"cloud\",\n \"endpoint\": \"https://storage.googleapis.com\",\n \"secret_key\": \"\",\n \"access_key\": \"\"\n }\n)\n~~~\n\n# IPFS Huggingface Bridge:\n\nfor transformers python library visit:\nhttps://github.com/endomorphosis/ipfs_transformers/\n\nfor transformers js client visit: \nhttps://github.com/endomorphosis/ipfs_transformers_js/\n\nfor orbitdb_kit nodejs library visit:\nhttps://github.com/endomorphosis/orbitdb_kit/\n\nfor fireproof_kit nodejs library visit:\nhttps://github.com/endomorphosis/fireproof_kit\n\nfor Faiss KNN index python library visit:\nhttps://github.com/endomorphosis/ipfs_faiss/\n\nfor python model manager library visit: \nhttps://github.com/endomorphosis/ipfs_model_manager/\n\nfor nodejs model manager library visit: \nhttps://github.com/endomorphosis/ipfs_model_manager_js/\n\nfor nodejs ipfs huggingface scraper with pinning services visit:\nhttps://github.com/endomorphosis/ipfs_huggingface_scraper/\n\n\nAuthor - Benjamin Barber\nQA - Kevin De Haan\n",
"bugtrack_url": null,
"license": null,
"summary": "A wrapper around huggingface datasets, invoking an IPFS model manager.",
"version": "0.0.28",
"project_urls": {
"Homepage": "https://github.com/endomorphosis/ipfs_datasets_py",
"Issues": "https://github.com/endomorphosis/ipfs_datasets_py/issues"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "d1e5743dd2374efe3f35a4bff9fe745991906f5fabee198b8634f0af3922d6a3",
"md5": "a8a25b4b968cca00009db92f2f8a47a1",
"sha256": "e1da0bc40923a004a09c8b57e737ff329fcd2f26aae8bc12f8f26315388b8937"
},
"downloads": -1,
"filename": "ipfs_embeddings_py-0.0.28-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a8a25b4b968cca00009db92f2f8a47a1",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.8",
"size": 117419,
"upload_time": "2024-12-20T05:07:16",
"upload_time_iso_8601": "2024-12-20T05:07:16.106861Z",
"url": "https://files.pythonhosted.org/packages/d1/e5/743dd2374efe3f35a4bff9fe745991906f5fabee198b8634f0af3922d6a3/ipfs_embeddings_py-0.0.28-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "019b74749b3ab050b90f7a28b3588023c55f72600f5d49bace66da3c12d62ef0",
"md5": "d62b535f0289d570d40e7eeed1b2640e",
"sha256": "2b4ba323c1ac6a595392993dc299cf22031b52236502105a15a05085fcc46e6a"
},
"downloads": -1,
"filename": "ipfs_embeddings_py-0.0.28.tar.gz",
"has_sig": false,
"md5_digest": "d62b535f0289d570d40e7eeed1b2640e",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 92557,
"upload_time": "2024-12-20T05:07:18",
"upload_time_iso_8601": "2024-12-20T05:07:18.894540Z",
"url": "https://files.pythonhosted.org/packages/01/9b/74749b3ab050b90f7a28b3588023c55f72600f5d49bace66da3c12d62ef0/ipfs_embeddings_py-0.0.28.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-12-20 05:07:18",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "endomorphosis",
"github_project": "ipfs_datasets_py",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"requirements": [
{
"name": "orbitdb_kit_py",
"specs": []
},
{
"name": "ipfs_kit_py",
"specs": []
},
{
"name": "ipfs_model_manager_py",
"specs": []
},
{
"name": "ipfs_faiss_py",
"specs": []
},
{
"name": "transformers",
"specs": []
},
{
"name": "numpy",
"specs": []
},
{
"name": "urllib3",
"specs": []
},
{
"name": "requests",
"specs": []
},
{
"name": "boto3",
"specs": []
},
{
"name": "ipfsspec",
"specs": []
},
{
"name": "duckdb",
"specs": []
},
{
"name": "datasets",
"specs": []
},
{
"name": "pyarrow",
"specs": []
},
{
"name": "fsspec",
"specs": []
}
],
"lcname": "ipfs-embeddings-py"
}