# Oracle Cloud Infrastructure Object Storage fsspec Implementation
[![PyPI](https://img.shields.io/pypi/v/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/) [![Python](https://img.shields.io/pypi/pyversions/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/)
The [Oracle Cloud Infrastructure Object Storage](https://docs.oracle.com/en-us/iaas/Content/Object/Concepts/objectstorageoverview.htm) service is an internet-scale, high-performance storage platform that offers reliable and cost-efficient data durability. With Object Storage, you can safely and securely store or retrieve data directly from the internet or from within the cloud platform.
`ocifs` is part of the `fsspec` [intake/filesystem_spec ecosystem](https://github.com/intake/filesystem_spec)
> a template or specification for a file-system interface, that specific implementations should follow, so that applications making use of them can rely on a common interface and not have to worry about the specific internal implementation decisions with any given backend.
`ocifs` joins the list of file systems supported with this package.
The `intake/filesystem_spec` project is used by [Pandas](https://pandas.pydata.org/), [Dask](https://dask.org/) and other data libraries in python, this package adds Oracle OCI Object Storage capabilties to these libraries.
## OCIFS file system style operations Example:
```python
from ocifs import OCIFileSystem
fs = OCIFilesystem("~/.oci/config")
# 1.Create empty file or truncate in OCI objectstorage bucket
fs.touch("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", truncate=True, data=b"Writing to Object Storage!")
# 2.Fetch(potentially multiple paths' contents
fs.cat("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 3.Get metadata about a file from a head or list call
fs.info("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 4.Get directory listing page
fs.ls("oci://<my_bucket>@<my_namespace>/<my_prefix>/", detail=True)
# 5.Is this entry directory-like?
fs.isdir("oci://<my_bucket>@<my_namespace>")
# 6.Is this entry file-like?
fs.isfile("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 7.If there is a file at the given path (including broken links)
fs.lexists("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 8.List of files for the given path
fs.listdir("oci://<my_bucket>@<my_namespace>/<my_prefix>", detail=True)
# 9.Get the first ``size`` bytes from file
fs.head("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", size=1024)
# 10.Get the last ``size`` bytes from file
fs.tail("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", size=1024)
# 11.Hash of file properties, to tell if it has changed
fs.ukey("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 12.Size in bytes of file
fs.size("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 13.Size in bytes of each file in a list of paths
paths = ["oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt"]
fs.sizes(paths)
# 14.Normalise OCI path string into bucket and key.
fs.split_path("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 15.Delete a file from the bucket
fs.rm("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt")
# 16.Get the contents of the file as a byte
fs.read_bytes("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", start=0, end=13)
# 17.Get the contents of the file as a string
fs.read_text("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", encoding=None, errors=None, newline=None)
# 18.Get the contents of the file as a byte
fs.read_block("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", 0, 13)
# 19.Open a file for writing/flushing into file in OCI objectstorage bucket
# Ocifs sets the best-guessed content-type for hello.txt i.e "text/plain"
with fs.open("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", 'w', autocommit=True) as f:
f.write("Writing data to buffer, before manually flushing and closing.") # data is flushed and file closed
f.flush()
# Ocifs uses the specified content-type passed in the open while writing to OCI objectstorage bucket
with fs.open("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", 'w',content_type='text/plain') as f:
f.write("Writing data to buffer, before manually flushing and closing.") # data is flushed and file closed
f.flush()
# 20.Open a file for reading a file from OCI objectstorage bucket
with fs.open("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt") as f:
print(f.read())
# 21.Space used by files and optionally directories within a path
fs.du("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello10.csv")
# 22.Find files by glob-matching.
fs.glob("oci://<my_bucket>@<my_namespace>/<my_prefix>/*.txt")
# 23.Renames an object in a particular bucket in tenancy namespace on OCI
fs.rename("oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt", "oci://<my_bucket>@<my_namespace>/<my_prefix>/hello2.txt")
# 24.Delete multiple files from the same bucket
pathlist = ["oci://<my_bucket>@<my_namespace>/<my_prefix>/hello2.txt"]
fs.bulk_delete(pathlist)
```
### Or Use With Pandas
```python
import pandas as pd
import ocifs
df = pd.read_csv(
"oci://my_bucket@my_namespace/my_object.csv",
storage_options={"config": "~/.oci/config"},
)
```
### Or Use With PyArrow
```python
import pandas as pd
import ocifs
df = pd.read_csv(
"oci://my_bucket@my_namespace/my_object.csv",storage_options={"config": "~/.oci/config"})
```
### Or Use With ADSDataset
```python
import ads
import pandas as pd
from ads.common.auth import default_signer
from ads.dataset.dataset import ADSDataset
ads.set_auth(auth="api_key", oci_config_location="~/.oci/config", profile="<profile_name>")
ds = ADSDataset(
df=pd.read_csv(f"oci://my_bucket@my_namespace/my_object.csv", storage_options=default_signer()),
type_discovery=False
)
print(ds.df)
```
## Getting Started
```bash
python3 -m pip install ocifs
```
## Software Prerequisites
Python >= 3.6
## Environment Variables for Authentication:
```bash
export OCIFS_IAM_TYPE=api_key
export OCIFS_CONFIG_LOCATION=~/.oci/config
export OCIFS_CONFIG_PROFILE=DEFAULT
```
Note, if you are operating on OCI with an alternative valid signer, such as resource principal, instead set the following:
```bash
export OCIFS_IAM_TYPE=resource_principal
```
## Environment Variables for enabling Logging:
To quickly see all messages, you can set the environment variable OCIFS_LOGGING_LEVEL=DEBUG.
```bash
export OCIFS_LOGGING_LEVEL=DEBUG
```
## Documentation
* [![PyPI](https://img.shields.io/pypi/v/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/) [![Python](https://img.shields.io/pypi/pyversions/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/)
* [ocifs Documentation](https://ocifs.readthedocs.io/en/latest/index.html)
* [ocifs GitHub](https://github.com/oracle/ocifs)
## Support
[The built-in filesystems in `fsspec`](https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations) are maintained by the `intake` project team, where as `ocifs` is an external implementation (similar to `s3fs`, `gcsfs`, `adl/abfs`, and so on), which is maintained by Oracle.
## Contributing
This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md)
## Security
Please consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process
## License
Copyright (c) 2021, 2023 Oracle and/or its affiliates.
Released under the Universal Permissive License v1.0 as shown at
<https://oss.oracle.com/licenses/upl/>.
Raw data
{
"_id": null,
"home_page": "",
"name": "ocifs",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.6",
"maintainer_email": "Allen Hosler <allen.hosler@oracle.com>",
"keywords": "Oracle Cloud Infrastructure,OCI,Object Storage",
"author": "Oracle Cloud Infrastructure Data Science",
"author_email": "",
"download_url": "https://files.pythonhosted.org/packages/dc/01/9742b6280e40f061e74cfe4cdfc064ea424208e33e7b011824bd0fdfe7a8/ocifs-1.3.1.tar.gz",
"platform": null,
"description": "\n# Oracle Cloud Infrastructure Object Storage fsspec Implementation\n\n\n[![PyPI](https://img.shields.io/pypi/v/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/) [![Python](https://img.shields.io/pypi/pyversions/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/)\n\n\n\u200b\nThe [Oracle Cloud Infrastructure Object Storage](https://docs.oracle.com/en-us/iaas/Content/Object/Concepts/objectstorageoverview.htm) service is an internet-scale, high-performance storage platform that offers reliable and cost-efficient data durability. With Object Storage, you can safely and securely store or retrieve data directly from the internet or from within the cloud platform.\n\u200b\n`ocifs` is part of the `fsspec` [intake/filesystem_spec ecosystem](https://github.com/intake/filesystem_spec)\n\u200b\n> a template or specification for a file-system interface, that specific implementations should follow, so that applications making use of them can rely on a common interface and not have to worry about the specific internal implementation decisions with any given backend.\n\u200b\n`ocifs` joins the list of file systems supported with this package.\n\u200b\nThe `intake/filesystem_spec` project is used by [Pandas](https://pandas.pydata.org/), [Dask](https://dask.org/) and other data libraries in python, this package adds Oracle OCI Object Storage capabilties to these libraries.\n\u200b\n## OCIFS file system style operations Example:\n```python\nfrom ocifs import OCIFileSystem\n\nfs = OCIFilesystem(\"~/.oci/config\")\n# 1.Create empty file or truncate in OCI objectstorage bucket\n fs.touch(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", truncate=True, data=b\"Writing to Object Storage!\")\n # 2.Fetch(potentially multiple paths' contents\n fs.cat(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 3.Get metadata about a file from a head or list call\n fs.info(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 4.Get directory listing page\n fs.ls(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/\", detail=True)\n # 5.Is this entry directory-like?\n fs.isdir(\"oci://<my_bucket>@<my_namespace>\")\n # 6.Is this entry file-like?\n fs.isfile(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 7.If there is a file at the given path (including broken links)\n fs.lexists(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 8.List of files for the given path\n fs.listdir(\"oci://<my_bucket>@<my_namespace>/<my_prefix>\", detail=True)\n # 9.Get the first ``size`` bytes from file\n fs.head(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", size=1024)\n # 10.Get the last ``size`` bytes from file\n fs.tail(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", size=1024)\n # 11.Hash of file properties, to tell if it has changed\n fs.ukey(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 12.Size in bytes of file\n fs.size(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 13.Size in bytes of each file in a list of paths\n paths = [\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\"]\n fs.sizes(paths)\n # 14.Normalise OCI path string into bucket and key.\n fs.split_path(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 15.Delete a file from the bucket\n fs.rm(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\")\n # 16.Get the contents of the file as a byte\n fs.read_bytes(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", start=0, end=13)\n # 17.Get the contents of the file as a string\n fs.read_text(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", encoding=None, errors=None, newline=None)\n # 18.Get the contents of the file as a byte\n fs.read_block(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", 0, 13)\n # 19.Open a file for writing/flushing into file in OCI objectstorage bucket\n # Ocifs sets the best-guessed content-type for hello.txt i.e \"text/plain\"\n with fs.open(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", 'w', autocommit=True) as f:\n f.write(\"Writing data to buffer, before manually flushing and closing.\") # data is flushed and file closed\n f.flush()\n# Ocifs uses the specified content-type passed in the open while writing to OCI objectstorage bucket\n with fs.open(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", 'w',content_type='text/plain') as f:\n f.write(\"Writing data to buffer, before manually flushing and closing.\") # data is flushed and file closed\n f.flush()\n # 20.Open a file for reading a file from OCI objectstorage bucket\n with fs.open(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\") as f:\n print(f.read())\n # 21.Space used by files and optionally directories within a path\n fs.du(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello10.csv\")\n # 22.Find files by glob-matching.\n fs.glob(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/*.txt\")\n # 23.Renames an object in a particular bucket in tenancy namespace on OCI\n fs.rename(\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello.txt\", \"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello2.txt\")\n # 24.Delete multiple files from the same bucket\n pathlist = [\"oci://<my_bucket>@<my_namespace>/<my_prefix>/hello2.txt\"]\n fs.bulk_delete(pathlist)\n\n```\n\n\n\n### Or Use With Pandas\n\u200b\n```python\nimport pandas as pd\nimport ocifs\n\u200b\ndf = pd.read_csv(\n \"oci://my_bucket@my_namespace/my_object.csv\",\n storage_options={\"config\": \"~/.oci/config\"},\n)\n```\n\n### Or Use With PyArrow\n\u200b\n```python\nimport pandas as pd\nimport ocifs\n\u200b\ndf = pd.read_csv(\n \"oci://my_bucket@my_namespace/my_object.csv\",storage_options={\"config\": \"~/.oci/config\"})\n```\n\n### Or Use With ADSDataset\n\u200b\n```python\nimport ads\nimport pandas as pd\nfrom ads.common.auth import default_signer\nfrom ads.dataset.dataset import ADSDataset\n\n\u200b\n ads.set_auth(auth=\"api_key\", oci_config_location=\"~/.oci/config\", profile=\"<profile_name>\")\n ds = ADSDataset(\n df=pd.read_csv(f\"oci://my_bucket@my_namespace/my_object.csv\", storage_options=default_signer()),\n type_discovery=False\n )\n print(ds.df)\n```\n\n\u200b\n## Getting Started\n```bash\npython3 -m pip install ocifs\n```\n\n## Software Prerequisites\nPython >= 3.6\n\n## Environment Variables for Authentication:\n```bash\nexport OCIFS_IAM_TYPE=api_key\nexport OCIFS_CONFIG_LOCATION=~/.oci/config\nexport OCIFS_CONFIG_PROFILE=DEFAULT\n```\n\nNote, if you are operating on OCI with an alternative valid signer, such as resource principal, instead set the following:\n```bash\nexport OCIFS_IAM_TYPE=resource_principal\n```\n\n## Environment Variables for enabling Logging:\nTo quickly see all messages, you can set the environment variable OCIFS_LOGGING_LEVEL=DEBUG.\n```bash\nexport OCIFS_LOGGING_LEVEL=DEBUG\n```\n\n## Documentation\n* [![PyPI](https://img.shields.io/pypi/v/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/) [![Python](https://img.shields.io/pypi/pyversions/ocifs.svg?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/ocifs/)\n* [ocifs Documentation](https://ocifs.readthedocs.io/en/latest/index.html)\n* [ocifs GitHub](https://github.com/oracle/ocifs)\n\n## Support\n[The built-in filesystems in `fsspec`](https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations) are maintained by the `intake` project team, where as `ocifs` is an external implementation (similar to `s3fs`, `gcsfs`, `adl/abfs`, and so on), which is maintained by Oracle.\n\n## Contributing\nThis project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md)\n\n## Security\nPlease consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process\n\n## License\nCopyright (c) 2021, 2023 Oracle and/or its affiliates.\n\nReleased under the Universal Permissive License v1.0 as shown at\n<https://oss.oracle.com/licenses/upl/>.\n\n",
"bugtrack_url": null,
"license": "",
"summary": "Convenient filesystem interface over Oracle Cloud's Object Storage",
"version": "1.3.1",
"project_urls": {
"Documentation": "https://ocifs.readthedocs.io/en/latest/index.html",
"Github": "https://github.com/oracle/ocifs"
},
"split_keywords": [
"oracle cloud infrastructure",
"oci",
"object storage"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "709e0c69ccfafd952d60aa95fb5e943708ece44cbcb6a9e493dfa738061455dc",
"md5": "1a22ddcd7bab57cbdbb98f7d355968b0",
"sha256": "55a96bfd4421f6bebadd11821a934bd5325d8fb51dc71ed56fd164b382c0af4c"
},
"downloads": -1,
"filename": "ocifs-1.3.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "1a22ddcd7bab57cbdbb98f7d355968b0",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.6",
"size": 67915,
"upload_time": "2023-12-14T21:47:19",
"upload_time_iso_8601": "2023-12-14T21:47:19.305745Z",
"url": "https://files.pythonhosted.org/packages/70/9e/0c69ccfafd952d60aa95fb5e943708ece44cbcb6a9e493dfa738061455dc/ocifs-1.3.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "dc019742b6280e40f061e74cfe4cdfc064ea424208e33e7b011824bd0fdfe7a8",
"md5": "9998076000c47a7cadd932ed24a54995",
"sha256": "a4e25ee1df75ec94d74cdb3b54f1629fc32d3cd0fb6c15fc89296550a9fc45f8"
},
"downloads": -1,
"filename": "ocifs-1.3.1.tar.gz",
"has_sig": false,
"md5_digest": "9998076000c47a7cadd932ed24a54995",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6",
"size": 57274,
"upload_time": "2023-12-14T21:47:20",
"upload_time_iso_8601": "2023-12-14T21:47:20.608573Z",
"url": "https://files.pythonhosted.org/packages/dc/01/9742b6280e40f061e74cfe4cdfc064ea424208e33e7b011824bd0fdfe7a8/ocifs-1.3.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-12-14 21:47:20",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "oracle",
"github_project": "ocifs",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "ocifs"
}