# Simplest Possible Content-Addressable Blob Store
<p align="center">
<img src="grugstore.png" alt="GrugStore Logo" width="512" height="512">
</p>
[](https://pypi.org/project/grugstore/)
This is a simple content-addressable blob store. It stores blobs of data and
associated metadata. The blobs are stored in a directory hierarchy based on the
base58 encoding of their SHA-256 hash. Metadata is stored as siblings to
the blob file.
## Quick Start
```python
from grugstore import GrugStore
# Create a GrugStore instance
gs = GrugStore('some-dir', hierarchy_depth=3)
# Store a blob
hash_str, file_path = gs.store(b'Hello, World!')
# Check if a blob exists
if gs.exists(hash_str):
# Load the blob
blob = gs.load_bytes(hash_str)
```
## Core Methods
### Store Metadata
```python
# Set a README for the store
gs.set_readme("This store contains user avatars and profile images")
# Get the README content
readme_content = gs.get_readme()
```
### Storing and Loading Data
```python
# Store raw bytes - returns (hash_string, file_path)
hash_str, file_path = gs.store(b'Hello, World!')
# Stream from a file-like object (e.g., for large files)
with open('large_file.bin', 'rb') as f:
hash_str = gs.stream(f)
# Load data back
data = gs.load_bytes(hash_str)
```
### Working with Sibling Files
```python
# Store metadata/sibling files
gs.store_sibling(hash_str, 'json', b'{"key": "value"}')
gs.store_sibling(hash_str, 'txt', b'Additional notes')
# Load sibling data
metadata = gs.load_sibling_bytes(hash_str, 'json')
notes = gs.load_sibling_bytes(hash_str, 'txt')
```
### Checking Existence
```python
# Check if main blob exists
if gs.exists(hash_str):
print("Blob exists!")
# Check if sibling file exists
if gs.exists(hash_str, 'json'):
metadata = gs.load_sibling_bytes(hash_str, 'json')
```
### Path Operations
```python
# Get path to a blob (without loading it)
blob_path = gs.path_to(hash_str)
# Get path to a sibling file
metadata_path = gs.path_to(hash_str, 'json')
```
### Copying and Moving Files
```python
# Copy an external file into the store
# Returns (hash_string, file_path) - original file remains unchanged
hash_str, store_path = gs.copy_file('/path/to/source/file.pdf')
# Move an external file into the store
# Returns (hash_string, file_path) - original file is deleted
hash_str, store_path = gs.move_file('/path/to/source/file.pdf')
# Both methods:
# - Calculate the file's SHA-256 hash efficiently
# - Create the appropriate directory structure
# - Handle duplicates (won't overwrite existing files)
# - Support both string and Path objects as input
```
### Iteration and Validation
```python
# Iterate over all blobs (excluding siblings)
for hash_str, file_path in gs.iter_files(no_sibling=True):
print(f"Found blob: {hash_str}")
# Iterate with sibling information
for hash_str, file_path, sibling_extensions in gs.iter_files():
print(f"Blob: {hash_str}")
print(f"Siblings: {sibling_extensions}") # e.g., {'json', 'txt'}
# Validate integrity of all blobs
for invalid_path in gs.validate_tree():
print(f"Corrupted file: {invalid_path}")
# Auto-delete corrupted files
for invalid_path in gs.validate_tree(auto_delete=True):
print(f"Deleted corrupted file: {invalid_path}")
# Auto-delete corrupted files and their siblings
for invalid_path in gs.validate_tree(auto_delete=True, delete_siblings=True):
print(f"Deleted corrupted file: {invalid_path}")
```
### Filtering and Copying
```python
# Create a filtered copy of the store
def size_filter(hash_str, file_path):
# Only copy files smaller than 1MB
return file_path.stat().st_size < 1024 * 1024
# Create a new store with only small files
filtered_gs = gs.filtered_copy('filtered-dir', size_filter)
# The filtered store contains the same hierarchy depth and README
print(f"Hierarchy depth: {filtered_gs.hierarchy_depth}")
print(f"README: {filtered_gs.get_readme()}")
# Example: Copy only specific file types based on sibling extensions
def has_json_metadata(hash_str, file_path):
# Check if this blob has a JSON sibling
return gs.exists(hash_str, 'json')
json_only_gs = gs.filtered_copy('json-only-dir', has_json_metadata)
# Example: Copy files matching certain hash patterns
def hash_prefix_filter(hash_str, file_path):
# Only copy files whose hash starts with 'Q'
return hash_str.startswith('Q')
q_files_gs = gs.filtered_copy('q-files-dir', hash_prefix_filter)
```
## File Layout
GrugStore organizes files in a hierarchical directory structure based on the base58-encoded SHA-256 hash of the content. Here's an example of what a GrugStore directory looks like with `hierarchy_depth=2`:
```
some-dir/
├── _meta/
│ └── README # Optional store-level documentation
├── _tmp/ # Temporary directory for atomic file operations
├── 2/
│ └── X/
│ ├── 2XaBcD...xyz # The actual blob file (no extension)
│ └── 2XaBcD...xyz.json # Sibling metadata file
├── 5/
│ └── K/
│ ├── 5Kj9Yz...abc # Another blob
│ ├── 5Kj9Yz...abc.json # JSON sibling
│ └── 5Kj9Yz...abc.txt # Text sibling
└── 8/
└── R/
└── 8Rm4Qp...def # Blob without any sibling files
```
### Directory Structure Details
- **Hash-based hierarchy**: Files are organized using prefixes of their base58-encoded hash. With `hierarchy_depth=2`, the first character becomes the first directory level, the second character becomes the second level.
- **Blob files**: The main content files have no extension and are named with their full hash.
- **Sibling files**: Related metadata or additional content files share the same hash name but include an extension (e.g., `.json`, `.txt`).
- **`_meta/` directory**: Contains store-level metadata like README files.
- **`_tmp/` directory**: Used internally for atomic file operations. Files are first written here and then moved to their final location to ensure write atomicity and prevent partial file corruption.
Raw data
{
"_id": null,
"home_page": null,
"name": "grugstore",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": "blob, content-addressable, file-store, storage",
"author": null,
"author_email": "generativist <jbn@abreka.com>",
"download_url": "https://files.pythonhosted.org/packages/6a/8f/50b5ddec4c39b0448e30a766a10044e8fd04596f6eccf1dd97fce5bce0c2/grugstore-0.1.2.tar.gz",
"platform": null,
"description": "# Simplest Possible Content-Addressable Blob Store\n\n<p align=\"center\">\n <img src=\"grugstore.png\" alt=\"GrugStore Logo\" width=\"512\" height=\"512\">\n</p>\n\n[](https://pypi.org/project/grugstore/)\n\nThis is a simple content-addressable blob store. It stores blobs of data and\nassociated metadata. The blobs are stored in a directory hierarchy based on the\nbase58 encoding of their SHA-256 hash. Metadata is stored as siblings to\nthe blob file. \n\n## Quick Start\n\n```python\nfrom grugstore import GrugStore\n\n# Create a GrugStore instance\ngs = GrugStore('some-dir', hierarchy_depth=3)\n\n# Store a blob\nhash_str, file_path = gs.store(b'Hello, World!')\n\n# Check if a blob exists\nif gs.exists(hash_str):\n # Load the blob\n blob = gs.load_bytes(hash_str)\n```\n\n## Core Methods\n\n### Store Metadata\n\n```python\n# Set a README for the store\ngs.set_readme(\"This store contains user avatars and profile images\")\n\n# Get the README content\nreadme_content = gs.get_readme()\n```\n\n### Storing and Loading Data\n\n```python\n# Store raw bytes - returns (hash_string, file_path)\nhash_str, file_path = gs.store(b'Hello, World!')\n\n# Stream from a file-like object (e.g., for large files)\nwith open('large_file.bin', 'rb') as f:\n hash_str = gs.stream(f)\n\n# Load data back\ndata = gs.load_bytes(hash_str)\n```\n\n### Working with Sibling Files\n\n```python\n# Store metadata/sibling files\ngs.store_sibling(hash_str, 'json', b'{\"key\": \"value\"}')\ngs.store_sibling(hash_str, 'txt', b'Additional notes')\n\n# Load sibling data\nmetadata = gs.load_sibling_bytes(hash_str, 'json')\nnotes = gs.load_sibling_bytes(hash_str, 'txt')\n```\n\n### Checking Existence\n\n```python\n# Check if main blob exists\nif gs.exists(hash_str):\n print(\"Blob exists!\")\n\n# Check if sibling file exists\nif gs.exists(hash_str, 'json'):\n metadata = gs.load_sibling_bytes(hash_str, 'json')\n```\n\n### Path Operations\n\n```python\n# Get path to a blob (without loading it)\nblob_path = gs.path_to(hash_str)\n\n# Get path to a sibling file\nmetadata_path = gs.path_to(hash_str, 'json')\n```\n\n### Copying and Moving Files\n\n```python\n# Copy an external file into the store\n# Returns (hash_string, file_path) - original file remains unchanged\nhash_str, store_path = gs.copy_file('/path/to/source/file.pdf')\n\n# Move an external file into the store\n# Returns (hash_string, file_path) - original file is deleted\nhash_str, store_path = gs.move_file('/path/to/source/file.pdf')\n\n# Both methods:\n# - Calculate the file's SHA-256 hash efficiently\n# - Create the appropriate directory structure\n# - Handle duplicates (won't overwrite existing files)\n# - Support both string and Path objects as input\n```\n\n### Iteration and Validation\n\n```python\n# Iterate over all blobs (excluding siblings)\nfor hash_str, file_path in gs.iter_files(no_sibling=True):\n print(f\"Found blob: {hash_str}\")\n\n# Iterate with sibling information\nfor hash_str, file_path, sibling_extensions in gs.iter_files():\n print(f\"Blob: {hash_str}\")\n print(f\"Siblings: {sibling_extensions}\") # e.g., {'json', 'txt'}\n\n# Validate integrity of all blobs\nfor invalid_path in gs.validate_tree():\n print(f\"Corrupted file: {invalid_path}\")\n\n# Auto-delete corrupted files\nfor invalid_path in gs.validate_tree(auto_delete=True):\n print(f\"Deleted corrupted file: {invalid_path}\")\n\n# Auto-delete corrupted files and their siblings\nfor invalid_path in gs.validate_tree(auto_delete=True, delete_siblings=True):\n print(f\"Deleted corrupted file: {invalid_path}\")\n```\n\n### Filtering and Copying\n\n```python\n# Create a filtered copy of the store\ndef size_filter(hash_str, file_path):\n # Only copy files smaller than 1MB\n return file_path.stat().st_size < 1024 * 1024\n\n# Create a new store with only small files\nfiltered_gs = gs.filtered_copy('filtered-dir', size_filter)\n\n# The filtered store contains the same hierarchy depth and README\nprint(f\"Hierarchy depth: {filtered_gs.hierarchy_depth}\")\nprint(f\"README: {filtered_gs.get_readme()}\")\n\n# Example: Copy only specific file types based on sibling extensions\ndef has_json_metadata(hash_str, file_path):\n # Check if this blob has a JSON sibling\n return gs.exists(hash_str, 'json')\n\njson_only_gs = gs.filtered_copy('json-only-dir', has_json_metadata)\n\n# Example: Copy files matching certain hash patterns\ndef hash_prefix_filter(hash_str, file_path):\n # Only copy files whose hash starts with 'Q'\n return hash_str.startswith('Q')\n\nq_files_gs = gs.filtered_copy('q-files-dir', hash_prefix_filter)\n```\n\n## File Layout\n\nGrugStore organizes files in a hierarchical directory structure based on the base58-encoded SHA-256 hash of the content. Here's an example of what a GrugStore directory looks like with `hierarchy_depth=2`:\n\n```\nsome-dir/\n\u251c\u2500\u2500 _meta/\n\u2502 \u2514\u2500\u2500 README # Optional store-level documentation\n\u251c\u2500\u2500 _tmp/ # Temporary directory for atomic file operations\n\u251c\u2500\u2500 2/\n\u2502 \u2514\u2500\u2500 X/\n\u2502 \u251c\u2500\u2500 2XaBcD...xyz # The actual blob file (no extension)\n\u2502 \u2514\u2500\u2500 2XaBcD...xyz.json # Sibling metadata file\n\u251c\u2500\u2500 5/\n\u2502 \u2514\u2500\u2500 K/\n\u2502 \u251c\u2500\u2500 5Kj9Yz...abc # Another blob\n\u2502 \u251c\u2500\u2500 5Kj9Yz...abc.json # JSON sibling\n\u2502 \u2514\u2500\u2500 5Kj9Yz...abc.txt # Text sibling\n\u2514\u2500\u2500 8/\n \u2514\u2500\u2500 R/\n \u2514\u2500\u2500 8Rm4Qp...def # Blob without any sibling files\n```\n\n### Directory Structure Details\n\n- **Hash-based hierarchy**: Files are organized using prefixes of their base58-encoded hash. With `hierarchy_depth=2`, the first character becomes the first directory level, the second character becomes the second level.\n- **Blob files**: The main content files have no extension and are named with their full hash.\n- **Sibling files**: Related metadata or additional content files share the same hash name but include an extension (e.g., `.json`, `.txt`).\n- **`_meta/` directory**: Contains store-level metadata like README files.\n- **`_tmp/` directory**: Used internally for atomic file operations. Files are first written here and then moved to their final location to ensure write atomicity and prevent partial file corruption.\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Simplest possible content-addressable file store for blobs.",
"version": "0.1.2",
"project_urls": {
"Bug Tracker": "https://github.com/jbn/grugstore/issues",
"Homepage": "https://github.com/jbn/grugstore",
"Repository": "https://github.com/jbn/grugstore"
},
"split_keywords": [
"blob",
" content-addressable",
" file-store",
" storage"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "c0e07952df0c0682bd2661852d1f21002d3023c29e0b7cbddad227348fe47f83",
"md5": "d2b9ca75c87b1d3e83ab798be42c921f",
"sha256": "585bfe08725327d95f1dfd8fbd6c99f2f536dd8d74cceceb18f28edf29084ba9"
},
"downloads": -1,
"filename": "grugstore-0.1.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "d2b9ca75c87b1d3e83ab798be42c921f",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 8712,
"upload_time": "2025-07-24T16:28:35",
"upload_time_iso_8601": "2025-07-24T16:28:35.756310Z",
"url": "https://files.pythonhosted.org/packages/c0/e0/7952df0c0682bd2661852d1f21002d3023c29e0b7cbddad227348fe47f83/grugstore-0.1.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "6a8f50b5ddec4c39b0448e30a766a10044e8fd04596f6eccf1dd97fce5bce0c2",
"md5": "a03ec1d79bda1edb0a01e7d9685e4111",
"sha256": "43b456cad1f3b9f70fae98b57084e34fab14e0d7f27636887c156bbd3227bd57"
},
"downloads": -1,
"filename": "grugstore-0.1.2.tar.gz",
"has_sig": false,
"md5_digest": "a03ec1d79bda1edb0a01e7d9685e4111",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 474823,
"upload_time": "2025-07-24T16:28:37",
"upload_time_iso_8601": "2025-07-24T16:28:37.564637Z",
"url": "https://files.pythonhosted.org/packages/6a/8f/50b5ddec4c39b0448e30a766a10044e8fd04596f6eccf1dd97fce5bce0c2/grugstore-0.1.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-07-24 16:28:37",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "jbn",
"github_project": "grugstore",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "grugstore"
}