lmdbsystem


Namelmdbsystem JSON
Version 0.2.2 PyPI version JSON
download
home_pageNone
SummaryAbstraction for read and write to the lmdb file
upload_time2024-06-03 04:12:02
maintainerNone
docs_urlNone
authorNone
requires_python>=3.10
licenseThe MIT License
keywords lmdbsystem lmdb
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Python Lmdbsystem

[![Testing](https://github.com/rabiloo/python-lmdbsystem/actions/workflows/test.yml/badge.svg)](https://github.com/rabiloo/python-lmdbsystem/actions/workflows/test.yml)
[![Latest Version](https://img.shields.io/pypi/v/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)
[![Downloads](https://img.shields.io/pypi/dm/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)
[![Pypi Status](https://img.shields.io/pypi/status/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)
[![Python Versions](https://img.shields.io/pypi/pyversions/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)

## About Lmdbsystem

[LMDB Convertor](https://github.com/rabiloo/python-lmdbsystem) is a lmdb file handle library for python. It provides method to read, write to the lmdb file

## Install

```
$ pip install lmdbsystem
```

## Usage

```
from dataset_loaders import ImageLoader

from lmdbsystem.lmdb import Lmdb
from lmdbsystem.write_adapters.text import TextWriteAdapter
from lmdbsystem.write_adapters.image import ImageWriteAdapter
from lmdbsystem.read_adapters.cv2_image import Cv2ImageReadAdapter
from lmdbsystem.read_adapters.pil_image import PilImageReadAdapter
from lmdbsystem.read_adapters.bytes_image import BytesImageReadAdapter
from lmdbsystem.read_adapters.text import TextReadAdapter
from lmdbsystem.dataloader import DataLoader


class ImageLoader(DataLoader):
    def __init__(
        self,
        directory: str,
        suffix: str,
        fn_md5_mode: str,
        fn_md5_path: str,
    ):
        self.directory = directory
        self.suffix = suffix
        self.fn_md5_mode = fn_md5_mode
        self.fn_md5_path = fn_md5_path
        if fn_md5_mode == "r":
            self.dict_filename_md5 = json_reader(fn_md5_path)
        elif fn_md5_mode == "w":
            self.dict_filename_md5 = {}
        else:
            raise ValueError(f"Don't support fn_md5_mode: {fn_md5_mode}")
        self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))

    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
        for file_path in self.file_paths:
            yield self[file_path]

    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
        filename = get_relative_path(self.directory, file_path).removesuffix(self.suffix)
        value = raw_reader(file_path)
        if self.fn_md5_mode == "r":
            md5_file = self.dict_filename_md5[filename]
            value = dump_pickle((str2bytes(get_md5_file(file_path)), value))
        else:
            md5_file = get_md5_file(file_path)
            self.dict_filename_md5[filename] = md5_file
        key = str2bytes(md5_file)

        return key, value


class LabelInFilenameLoader(DataLoader):
    def __init__(
        self,
        directory: str,
        suffix: str,
        fn_md5_path: str,
        values_index: List[int],
        values_map: Dict[str, str],
        delimiter: str,
    ):
        self.directory = directory
        self.suffix = suffix
        self.values_map = values_map
        self.delimiter = delimiter
        self.values_index = values_index
        self.dict_filename_md5 = json_reader(fn_md5_path)
        self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))

    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
        for file_path in self.file_paths:
            yield self[file_path]

    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
        md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
        key = str2bytes(md5_file)
        sub_key = str2bytes(get_md5_file(file_path))

        line_values = os.path.basename(file_path).removesuffix(self.suffix).split(self.delimiter)
        labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]

        if self.values_map:
            if "type" in self.values_map:
                value_type = self.values_map["type"]
                labels = [str(eval(value_type)(item)) for item in labels]
            else:
                labels = [self.values_map.get(item, item) for item in labels]

        value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
        return key, value


# Write lmdb file with label in filename       
lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
lmdb_obj.write_loader(
    LabelInFilenameLoader(
        directory=args.folder,
        suffix=args.suffix,
        fn_md5_path=args.fn_md5_path,
        values_map=values_map,
        delimiter=args.delimiter,
        values_index=values_index,
    ),
)

# Write lmdb file with image directory 
lmdb_obj = Lmdb(ImageWriteAdapter(path=args.lmdb_file, map_size=args.lmdb_map_size))
lmdb_obj.write_loader(
    ImageLoader(
        directory=args.folder,
        suffix=args.suffix,
        fn_md5_mode=args.fn_md5_mode,
        fn_md5_path=args.fn_md5_path,
    ),
)


# Read image
value = Lmdb(Cv2ImageReadAdapter(path=path)).read_index(index)
value = Lmdb(PilImageReadAdapter(path=path)).read_index(index)
value = Lmdb(BytesImageReadAdapter(path=path)).read_index(index)

# Read text
value = Lmdb(TextReadAdapter(path=path)).read_index(index)
```

## Changelog

Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently.

## Contributing

Please see [CONTRIBUTING](.github/CONTRIBUTING.md) for details.

## Security Vulnerabilities

Please review [our security policy](../../security/policy) on how to report security vulnerabilities.

## Credits

- [Dao Quang Duy](https://github.com/duydq12)
- [All Contributors](../../contributors)

## License

The MIT License (MIT). Please see [License File](LICENSE) for more information.

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "lmdbsystem",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.10",
    "maintainer_email": "Rabiloo Developers <oss@rabiloo.com>",
    "keywords": "lmdbsystem, lmdb",
    "author": null,
    "author_email": "Dao Quang Duy <duydaoquang12@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/6a/d1/7f723cd7b54b1570a397fa20b966862e17b35f280ec78a14e376811e26f9/lmdbsystem-0.2.2.tar.gz",
    "platform": null,
    "description": "# Python Lmdbsystem\n\n[![Testing](https://github.com/rabiloo/python-lmdbsystem/actions/workflows/test.yml/badge.svg)](https://github.com/rabiloo/python-lmdbsystem/actions/workflows/test.yml)\n[![Latest Version](https://img.shields.io/pypi/v/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)\n[![Downloads](https://img.shields.io/pypi/dm/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)\n[![Pypi Status](https://img.shields.io/pypi/status/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)\n[![Python Versions](https://img.shields.io/pypi/pyversions/lmdbsystem.svg)](https://pypi.org/project/lmdbsystem)\n\n## About Lmdbsystem\n\n[LMDB Convertor](https://github.com/rabiloo/python-lmdbsystem) is a lmdb file handle library for python. It provides method to read, write to the lmdb file\n\n## Install\n\n```\n$ pip install lmdbsystem\n```\n\n## Usage\n\n```\nfrom dataset_loaders import ImageLoader\n\nfrom lmdbsystem.lmdb import Lmdb\nfrom lmdbsystem.write_adapters.text import TextWriteAdapter\nfrom lmdbsystem.write_adapters.image import ImageWriteAdapter\nfrom lmdbsystem.read_adapters.cv2_image import Cv2ImageReadAdapter\nfrom lmdbsystem.read_adapters.pil_image import PilImageReadAdapter\nfrom lmdbsystem.read_adapters.bytes_image import BytesImageReadAdapter\nfrom lmdbsystem.read_adapters.text import TextReadAdapter\nfrom lmdbsystem.dataloader import DataLoader\n\n\nclass ImageLoader(DataLoader):\n    def __init__(\n        self,\n        directory: str,\n        suffix: str,\n        fn_md5_mode: str,\n        fn_md5_path: str,\n    ):\n        self.directory = directory\n        self.suffix = suffix\n        self.fn_md5_mode = fn_md5_mode\n        self.fn_md5_path = fn_md5_path\n        if fn_md5_mode == \"r\":\n            self.dict_filename_md5 = json_reader(fn_md5_path)\n        elif fn_md5_mode == \"w\":\n            self.dict_filename_md5 = {}\n        else:\n            raise ValueError(f\"Don't support fn_md5_mode: {fn_md5_mode}\")\n        self.file_paths = sorted(glob(f\"{directory}/**/*{suffix}\", recursive=True))\n\n    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:\n        for file_path in self.file_paths:\n            yield self[file_path]\n\n    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:\n        filename = get_relative_path(self.directory, file_path).removesuffix(self.suffix)\n        value = raw_reader(file_path)\n        if self.fn_md5_mode == \"r\":\n            md5_file = self.dict_filename_md5[filename]\n            value = dump_pickle((str2bytes(get_md5_file(file_path)), value))\n        else:\n            md5_file = get_md5_file(file_path)\n            self.dict_filename_md5[filename] = md5_file\n        key = str2bytes(md5_file)\n\n        return key, value\n\n\nclass LabelInFilenameLoader(DataLoader):\n    def __init__(\n        self,\n        directory: str,\n        suffix: str,\n        fn_md5_path: str,\n        values_index: List[int],\n        values_map: Dict[str, str],\n        delimiter: str,\n    ):\n        self.directory = directory\n        self.suffix = suffix\n        self.values_map = values_map\n        self.delimiter = delimiter\n        self.values_index = values_index\n        self.dict_filename_md5 = json_reader(fn_md5_path)\n        self.file_paths = sorted(glob(f\"{directory}/**/*{suffix}\", recursive=True))\n\n    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:\n        for file_path in self.file_paths:\n            yield self[file_path]\n\n    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:\n        md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]\n        key = str2bytes(md5_file)\n        sub_key = str2bytes(get_md5_file(file_path))\n\n        line_values = os.path.basename(file_path).removesuffix(self.suffix).split(self.delimiter)\n        labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]\n\n        if self.values_map:\n            if \"type\" in self.values_map:\n                value_type = self.values_map[\"type\"]\n                labels = [str(eval(value_type)(item)) for item in labels]\n            else:\n                labels = [self.values_map.get(item, item) for item in labels]\n\n        value = dump_pickle((sub_key, str2bytes(\" \".join(labels))))\n        return key, value\n\n\n# Write lmdb file with label in filename       \nlmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))\nlmdb_obj.write_loader(\n    LabelInFilenameLoader(\n        directory=args.folder,\n        suffix=args.suffix,\n        fn_md5_path=args.fn_md5_path,\n        values_map=values_map,\n        delimiter=args.delimiter,\n        values_index=values_index,\n    ),\n)\n\n# Write lmdb file with image directory \nlmdb_obj = Lmdb(ImageWriteAdapter(path=args.lmdb_file, map_size=args.lmdb_map_size))\nlmdb_obj.write_loader(\n    ImageLoader(\n        directory=args.folder,\n        suffix=args.suffix,\n        fn_md5_mode=args.fn_md5_mode,\n        fn_md5_path=args.fn_md5_path,\n    ),\n)\n\n\n# Read image\nvalue = Lmdb(Cv2ImageReadAdapter(path=path)).read_index(index)\nvalue = Lmdb(PilImageReadAdapter(path=path)).read_index(index)\nvalue = Lmdb(BytesImageReadAdapter(path=path)).read_index(index)\n\n# Read text\nvalue = Lmdb(TextReadAdapter(path=path)).read_index(index)\n```\n\n## Changelog\n\nPlease see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently.\n\n## Contributing\n\nPlease see [CONTRIBUTING](.github/CONTRIBUTING.md) for details.\n\n## Security Vulnerabilities\n\nPlease review [our security policy](../../security/policy) on how to report security vulnerabilities.\n\n## Credits\n\n- [Dao Quang Duy](https://github.com/duydq12)\n- [All Contributors](../../contributors)\n\n## License\n\nThe MIT License (MIT). Please see [License File](LICENSE) for more information.\n",
    "bugtrack_url": null,
    "license": "The MIT License",
    "summary": "Abstraction for read and write to the lmdb file",
    "version": "0.2.2",
    "project_urls": {
        "Bug Tracker": "https://github.com/rabiloo/python-lmdbsystem/issues",
        "Homepage": "https://github.com/rabiloo/python-lmdbsystem",
        "Repository": "https://github.com/rabiloo/python-lmdbsystem"
    },
    "split_keywords": [
        "lmdbsystem",
        " lmdb"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "0734d404a0df8220fa52108474b49c3a64e5390770f5a8ea93931955ce851012",
                "md5": "9eaf7f24aac07e88d3453b6906161918",
                "sha256": "1615a149016c4e3f1398a194afb51fa64f3ec0023aa833d9885992540927a5ea"
            },
            "downloads": -1,
            "filename": "lmdbsystem-0.2.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "9eaf7f24aac07e88d3453b6906161918",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.10",
            "size": 13342,
            "upload_time": "2024-06-03T04:12:00",
            "upload_time_iso_8601": "2024-06-03T04:12:00.727460Z",
            "url": "https://files.pythonhosted.org/packages/07/34/d404a0df8220fa52108474b49c3a64e5390770f5a8ea93931955ce851012/lmdbsystem-0.2.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "6ad17f723cd7b54b1570a397fa20b966862e17b35f280ec78a14e376811e26f9",
                "md5": "990fca4be34cf8360d9178ea31192bfd",
                "sha256": "c680d2bb4f538aa5a47339d327de7f4c18413aea9f526418b2f22a6770eeb53f"
            },
            "downloads": -1,
            "filename": "lmdbsystem-0.2.2.tar.gz",
            "has_sig": false,
            "md5_digest": "990fca4be34cf8360d9178ea31192bfd",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.10",
            "size": 9875,
            "upload_time": "2024-06-03T04:12:02",
            "upload_time_iso_8601": "2024-06-03T04:12:02.595461Z",
            "url": "https://files.pythonhosted.org/packages/6a/d1/7f723cd7b54b1570a397fa20b966862e17b35f280ec78a14e376811e26f9/lmdbsystem-0.2.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-06-03 04:12:02",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "rabiloo",
    "github_project": "python-lmdbsystem",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "lcname": "lmdbsystem"
}
        
Elapsed time: 0.25955s