streamlit-sparrow-labeling


Namestreamlit-sparrow-labeling JSON
Version 0.1.1 PyPI version JSON
download
home_pagehttps://github.com/katanaml/streamlit-sparrow-labeling-comp
SummaryStreamlit component for invoice document labeling
upload_time2022-12-15 08:46:23
maintainer
docs_urlNone
authorAndrej Baranovskij
requires_python>=3.8
license
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Streamlit - Labeling Component

Streamlit component which provides labeling functionality for documents and invoices. Based on [streamlit-drawable-canvas](https://github.com/andfanilo/streamlit-drawable-canvas).

![](./img/demo.png)

## Features

- Option to switch between rectangle selection/resize and new rectangle creation
- Returning info about currently selected rectangle, this allows to provide label for this rectangle
- Labeling data is stored in simple/readable JSON format. This data is auto converted to Fabric.js format by the component
- Canvas resizing and rectangle data resizing. Coordinates are saved in original dimension

## Installation

```shell script
pip install streamlit-sparrow-labeling
```

## Example Usage

Copy this code snippet:

```python
from PIL import Image
import streamlit as st
import streamlit_nested_layout
import streamlit_javascript as st_js
from streamlit_sparrow_labeling import st_sparrow_labeling
from streamlit_sparrow_labeling import DataProcessor
import json
import math

st.set_page_config(
    page_title="Sparrow Labeling",
    layout="wide"
)

def run(img_file, rects_file, labels):
    ui_width = st_js.st_javascript("window.innerWidth")

    docImg = Image.open(img_file)

    if 'saved_state' not in st.session_state:
        with open(rects_file, "r") as f:
            saved_state = json.load(f)
            st.session_state['saved_state'] = saved_state
    else:
        saved_state = st.session_state['saved_state']

    assign_labels = st.checkbox("Assign Labels", True)
    mode = "transform" if assign_labels else "rect"

    data_processor = DataProcessor()

    col1, col2 = st.columns([4, 6])

    with col1:
        height = 1296
        width = 864

        doc_height = saved_state['meta']['image_size']['height']
        doc_width = saved_state['meta']['image_size']['width']

        canvas_width = canvas_available_width(ui_width)

        result_rects = st_sparrow_labeling(
            fill_color="rgba(0, 151, 255, 0.3)",
            stroke_width=2,
            stroke_color="rgba(0, 50, 255, 0.7)",
            background_image=docImg,
            initial_rects=saved_state,
            height=height,
            width=width,
            drawing_mode=mode,
            display_toolbar=True,
            update_streamlit=True,
            canvas_width=canvas_width,
            doc_height=doc_height,
            doc_width=doc_width,
            image_rescale=True,
            key="doc_annotation"
        )

        st.caption("Check 'Assign Labels' to enable editing of labels and values, move and resize the boxes to "
                   "annotate the document.")
        st.caption("Add annotations by clicking and dragging on the document, when 'Assign Labels' is unchecked.")

    with col2:
        if result_rects is not None:
            with st.form(key="fields_form"):
                if result_rects.current_rect_index is not None and result_rects.current_rect_index != -1:
                    st.write("Selected Field: ",
                             result_rects.rects_data['words'][result_rects.current_rect_index]['value'])
                    st.markdown("---")

                if ui_width > 1500:
                    render_form_wide(result_rects.rects_data['words'], labels, result_rects, data_processor)
                elif ui_width > 1000:
                    render_form_avg(result_rects.rects_data['words'], labels, result_rects, data_processor)
                elif ui_width > 500:
                    render_form_narrow(result_rects.rects_data['words'], labels, result_rects, data_processor)
                else:
                    render_form_mobile(result_rects.rects_data['words'], labels, result_rects, data_processor)

                submit = st.form_submit_button("Save", type="primary")
                if submit:
                    with open(rects_file, "w") as f:
                        json.dump(result_rects.rects_data, f, indent=2)
                    with open(rects_file, "r") as f:
                        saved_state = json.load(f)
                        st.session_state['saved_state'] = saved_state
                    st.write("Saved!")


def render_form_wide(words, labels, result_rects, data_processor):
    col1_form, col2_form, col3_form, col4_form = st.columns([1, 1, 1, 1])
    num_rows = math.ceil(len(words) / 4)

    for i, rect in enumerate(words):
        if i < num_rows:
            with col1_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        elif i < num_rows * 2:
            with col2_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        elif i < num_rows * 3:
            with col3_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        else:
            with col4_form:
                render_form_element(rect, labels, i, result_rects, data_processor)


def render_form_avg(words, labels, result_rects, data_processor):
    col1_form, col2_form, col3_form = st.columns([1, 1, 1])
    num_rows = math.ceil(len(words) / 3)

    for i, rect in enumerate(words):
        if i < num_rows:
            with col1_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        elif i < num_rows * 2:
            with col2_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        else:
            with col3_form:
                render_form_element(rect, labels, i, result_rects, data_processor)


def render_form_narrow(words, labels, result_rects, data_processor):
    col1_form, col2_form = st.columns([1, 1])
    num_rows = math.ceil(len(words) / 2)

    for i, rect in enumerate(words):
        if i < num_rows:
            with col1_form:
                render_form_element(rect, labels, i, result_rects, data_processor)
        else:
            with col2_form:
                render_form_element(rect, labels, i, result_rects, data_processor)


def render_form_mobile(words, labels, result_rects, data_processor):
    for i, rect in enumerate(words):
        render_form_element(rect, labels, i, result_rects, data_processor)


def render_form_element(rect, labels, i, result_rects, data_processor):
    default_index = 0
    if rect['label']:
        default_index = labels.index(rect['label'])

    value = st.text_input("Value", rect['value'], key=f"field_value_{i}",
                          disabled=False if i == result_rects.current_rect_index else True)
    label = st.selectbox("Label", labels, key=f"label_{i}", index=default_index,
                         disabled=False if i == result_rects.current_rect_index else True)
    st.markdown("---")

    data_processor.update_rect_data(result_rects.rects_data, i, value, label)


def canvas_available_width(ui_width):
    # Get ~40% of the available width, if the UI is wider than 500px
    if ui_width > 500:
        return math.floor(38 * ui_width / 100)
    else:
        return ui_width


if __name__ == "__main__":
    custom_labels = ["", "item", "item_price", "subtotal", "tax", "total"]
    run("docs/image/receipt_00001.png", "docs/json/receipt_00001.json", custom_labels)
```

## API

```
result_rects = st_sparrow_labeling(
    fill_color="rgba(0, 151, 255, 0.3)",
    stroke_width=2,
    stroke_color="rgba(0, 50, 255, 0.7)",
    background_image=docImg,
    initial_rects=saved_state,
    height=height,
    width=width,
    drawing_mode=mode,
    display_toolbar=True,
    update_streamlit=True,
    canvas_width=canvas_width,
    doc_height=doc_height,
    doc_width=doc_width,
    image_rescale=True,
    key="doc_annotation"
)
```

- **fill_color** : Color of fill for Rect in CSS color property. Defaults to "#eee".
- **stroke_width** : Width of drawing brush in CSS color property. Defaults to 20.
- **stroke_color** : Color of drawing brush in hex. Defaults to "black".
- **background_image** : Pillow Image to display behind canvas. Automatically resized to canvas dimensions. Being behind the canvas, it is not sent back to Streamlit on mouse event. Overrides background_color. Changes to this will reset canvas contents.
- **initial_rects** : Initial Rects to display on canvas. Defaults to empty list.
- **update_streamlit** : Whenever True, send canvas data to Streamlit when object/selection is updated or mouse up.
- **height** : Height of canvas in pixels. Defaults to 400.
- **width** : Width of canvas in pixels. Defaults to 600.
- **drawing_mode** : Enable free drawing when "freedraw", object manipulation when "transform", otherwise create new objects with "line", "rect", "circle" and "polygon". Defaults to "freedraw".
  - On "polygon" mode, double-clicking will remove the latest point and right-clicking will close the polygon.
- **initial_drawing** : Initialize canvas with drawings from here. Should be the `json_data` output from other canvas. Beware: if you try to import a drawing from a bigger/smaller canvas, no rescaling is done in the canvas and the import could fail.
- **display_toolbar** : If `False`, don't display the undo/redo/delete toolbar.
- **canvas_width** : Width of the canvas in pixels.
- **doc_height** : Original height of the document in pixels.
- **doc_width** : Original width of the document in pixels.
- **image_rescale** : If `True`, rescale the image to fit the canvas. Defaults to `False`.

## Development

### Install

- JS side

```shell script
cd frontend
npm install
```

- Python side

```shell script
conda create -n streamlit-sparrow-labeling python=3.11
conda activate streamlit-sparrow-labeling
pip install -e .
```

### Run

Both webpack dev server and Streamlit should run at the same time.

- JS side

```shell script
cd frontend
npm run start
```

- Python side

```shell script
streamlit run app.py
```


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/katanaml/streamlit-sparrow-labeling-comp",
    "name": "streamlit-sparrow-labeling",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": "",
    "keywords": "",
    "author": "Andrej Baranovskij",
    "author_email": "andrejus.baranovskis@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/9c/55/184913b5babff7784a7dc67202b8ecf8d04fb89f6b920412c550ced6dd54/streamlit-sparrow-labeling-0.1.1.tar.gz",
    "platform": null,
    "description": "# Streamlit - Labeling Component\n\nStreamlit component which provides labeling functionality for documents and invoices. Based on [streamlit-drawable-canvas](https://github.com/andfanilo/streamlit-drawable-canvas).\n\n![](./img/demo.png)\n\n## Features\n\n- Option to switch between rectangle selection/resize and new rectangle creation\n- Returning info about currently selected rectangle, this allows to provide label for this rectangle\n- Labeling data is stored in simple/readable JSON format. This data is auto converted to Fabric.js format by the component\n- Canvas resizing and rectangle data resizing. Coordinates are saved in original dimension\n\n## Installation\n\n```shell script\npip install streamlit-sparrow-labeling\n```\n\n## Example Usage\n\nCopy this code snippet:\n\n```python\nfrom PIL import Image\nimport streamlit as st\nimport streamlit_nested_layout\nimport streamlit_javascript as st_js\nfrom streamlit_sparrow_labeling import st_sparrow_labeling\nfrom streamlit_sparrow_labeling import DataProcessor\nimport json\nimport math\n\nst.set_page_config(\n    page_title=\"Sparrow Labeling\",\n    layout=\"wide\"\n)\n\ndef run(img_file, rects_file, labels):\n    ui_width = st_js.st_javascript(\"window.innerWidth\")\n\n    docImg = Image.open(img_file)\n\n    if 'saved_state' not in st.session_state:\n        with open(rects_file, \"r\") as f:\n            saved_state = json.load(f)\n            st.session_state['saved_state'] = saved_state\n    else:\n        saved_state = st.session_state['saved_state']\n\n    assign_labels = st.checkbox(\"Assign Labels\", True)\n    mode = \"transform\" if assign_labels else \"rect\"\n\n    data_processor = DataProcessor()\n\n    col1, col2 = st.columns([4, 6])\n\n    with col1:\n        height = 1296\n        width = 864\n\n        doc_height = saved_state['meta']['image_size']['height']\n        doc_width = saved_state['meta']['image_size']['width']\n\n        canvas_width = canvas_available_width(ui_width)\n\n        result_rects = st_sparrow_labeling(\n            fill_color=\"rgba(0, 151, 255, 0.3)\",\n            stroke_width=2,\n            stroke_color=\"rgba(0, 50, 255, 0.7)\",\n            background_image=docImg,\n            initial_rects=saved_state,\n            height=height,\n            width=width,\n            drawing_mode=mode,\n            display_toolbar=True,\n            update_streamlit=True,\n            canvas_width=canvas_width,\n            doc_height=doc_height,\n            doc_width=doc_width,\n            image_rescale=True,\n            key=\"doc_annotation\"\n        )\n\n        st.caption(\"Check 'Assign Labels' to enable editing of labels and values, move and resize the boxes to \"\n                   \"annotate the document.\")\n        st.caption(\"Add annotations by clicking and dragging on the document, when 'Assign Labels' is unchecked.\")\n\n    with col2:\n        if result_rects is not None:\n            with st.form(key=\"fields_form\"):\n                if result_rects.current_rect_index is not None and result_rects.current_rect_index != -1:\n                    st.write(\"Selected Field: \",\n                             result_rects.rects_data['words'][result_rects.current_rect_index]['value'])\n                    st.markdown(\"---\")\n\n                if ui_width > 1500:\n                    render_form_wide(result_rects.rects_data['words'], labels, result_rects, data_processor)\n                elif ui_width > 1000:\n                    render_form_avg(result_rects.rects_data['words'], labels, result_rects, data_processor)\n                elif ui_width > 500:\n                    render_form_narrow(result_rects.rects_data['words'], labels, result_rects, data_processor)\n                else:\n                    render_form_mobile(result_rects.rects_data['words'], labels, result_rects, data_processor)\n\n                submit = st.form_submit_button(\"Save\", type=\"primary\")\n                if submit:\n                    with open(rects_file, \"w\") as f:\n                        json.dump(result_rects.rects_data, f, indent=2)\n                    with open(rects_file, \"r\") as f:\n                        saved_state = json.load(f)\n                        st.session_state['saved_state'] = saved_state\n                    st.write(\"Saved!\")\n\n\ndef render_form_wide(words, labels, result_rects, data_processor):\n    col1_form, col2_form, col3_form, col4_form = st.columns([1, 1, 1, 1])\n    num_rows = math.ceil(len(words) / 4)\n\n    for i, rect in enumerate(words):\n        if i < num_rows:\n            with col1_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        elif i < num_rows * 2:\n            with col2_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        elif i < num_rows * 3:\n            with col3_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        else:\n            with col4_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n\n\ndef render_form_avg(words, labels, result_rects, data_processor):\n    col1_form, col2_form, col3_form = st.columns([1, 1, 1])\n    num_rows = math.ceil(len(words) / 3)\n\n    for i, rect in enumerate(words):\n        if i < num_rows:\n            with col1_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        elif i < num_rows * 2:\n            with col2_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        else:\n            with col3_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n\n\ndef render_form_narrow(words, labels, result_rects, data_processor):\n    col1_form, col2_form = st.columns([1, 1])\n    num_rows = math.ceil(len(words) / 2)\n\n    for i, rect in enumerate(words):\n        if i < num_rows:\n            with col1_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n        else:\n            with col2_form:\n                render_form_element(rect, labels, i, result_rects, data_processor)\n\n\ndef render_form_mobile(words, labels, result_rects, data_processor):\n    for i, rect in enumerate(words):\n        render_form_element(rect, labels, i, result_rects, data_processor)\n\n\ndef render_form_element(rect, labels, i, result_rects, data_processor):\n    default_index = 0\n    if rect['label']:\n        default_index = labels.index(rect['label'])\n\n    value = st.text_input(\"Value\", rect['value'], key=f\"field_value_{i}\",\n                          disabled=False if i == result_rects.current_rect_index else True)\n    label = st.selectbox(\"Label\", labels, key=f\"label_{i}\", index=default_index,\n                         disabled=False if i == result_rects.current_rect_index else True)\n    st.markdown(\"---\")\n\n    data_processor.update_rect_data(result_rects.rects_data, i, value, label)\n\n\ndef canvas_available_width(ui_width):\n    # Get ~40% of the available width, if the UI is wider than 500px\n    if ui_width > 500:\n        return math.floor(38 * ui_width / 100)\n    else:\n        return ui_width\n\n\nif __name__ == \"__main__\":\n    custom_labels = [\"\", \"item\", \"item_price\", \"subtotal\", \"tax\", \"total\"]\n    run(\"docs/image/receipt_00001.png\", \"docs/json/receipt_00001.json\", custom_labels)\n```\n\n## API\n\n```\nresult_rects = st_sparrow_labeling(\n    fill_color=\"rgba(0, 151, 255, 0.3)\",\n    stroke_width=2,\n    stroke_color=\"rgba(0, 50, 255, 0.7)\",\n    background_image=docImg,\n    initial_rects=saved_state,\n    height=height,\n    width=width,\n    drawing_mode=mode,\n    display_toolbar=True,\n    update_streamlit=True,\n    canvas_width=canvas_width,\n    doc_height=doc_height,\n    doc_width=doc_width,\n    image_rescale=True,\n    key=\"doc_annotation\"\n)\n```\n\n- **fill_color** : Color of fill for Rect in CSS color property. Defaults to \"#eee\".\n- **stroke_width** : Width of drawing brush in CSS color property. Defaults to 20.\n- **stroke_color** : Color of drawing brush in hex. Defaults to \"black\".\n- **background_image** : Pillow Image to display behind canvas. Automatically resized to canvas dimensions. Being behind the canvas, it is not sent back to Streamlit on mouse event. Overrides background_color. Changes to this will reset canvas contents.\n- **initial_rects** : Initial Rects to display on canvas. Defaults to empty list.\n- **update_streamlit** : Whenever True, send canvas data to Streamlit when object/selection is updated or mouse up.\n- **height** : Height of canvas in pixels. Defaults to 400.\n- **width** : Width of canvas in pixels. Defaults to 600.\n- **drawing_mode** : Enable free drawing when \"freedraw\", object manipulation when \"transform\", otherwise create new objects with \"line\", \"rect\", \"circle\" and \"polygon\". Defaults to \"freedraw\".\n  - On \"polygon\" mode, double-clicking will remove the latest point and right-clicking will close the polygon.\n- **initial_drawing** : Initialize canvas with drawings from here. Should be the `json_data` output from other canvas. Beware: if you try to import a drawing from a bigger/smaller canvas, no rescaling is done in the canvas and the import could fail.\n- **display_toolbar** : If `False`, don't display the undo/redo/delete toolbar.\n- **canvas_width** : Width of the canvas in pixels.\n- **doc_height** : Original height of the document in pixels.\n- **doc_width** : Original width of the document in pixels.\n- **image_rescale** : If `True`, rescale the image to fit the canvas. Defaults to `False`.\n\n## Development\n\n### Install\n\n- JS side\n\n```shell script\ncd frontend\nnpm install\n```\n\n- Python side\n\n```shell script\nconda create -n streamlit-sparrow-labeling python=3.11\nconda activate streamlit-sparrow-labeling\npip install -e .\n```\n\n### Run\n\nBoth webpack dev server and Streamlit should run at the same time.\n\n- JS side\n\n```shell script\ncd frontend\nnpm run start\n```\n\n- Python side\n\n```shell script\nstreamlit run app.py\n```\n\n",
    "bugtrack_url": null,
    "license": "",
    "summary": "Streamlit component for invoice document labeling",
    "version": "0.1.1",
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "471862d72a79e91a8bf0c0714e841591",
                "sha256": "894c4e8ecc6824b140795f677780d6a453d595d118d6805484f0f22d84bb4a0a"
            },
            "downloads": -1,
            "filename": "streamlit_sparrow_labeling-0.1.1-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "471862d72a79e91a8bf0c0714e841591",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8",
            "size": 1141788,
            "upload_time": "2022-12-15T08:46:20",
            "upload_time_iso_8601": "2022-12-15T08:46:20.533849Z",
            "url": "https://files.pythonhosted.org/packages/b4/a3/5caf1621efd0a72d7d30386ea365c3649388c050e229fc58f49d99b45daf/streamlit_sparrow_labeling-0.1.1-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "md5": "a6e8ea56afaf1ad292967c4fae4bceea",
                "sha256": "92c470f63e6b80aea59aee228357337ea841c5252783830e6e492678820338f8"
            },
            "downloads": -1,
            "filename": "streamlit-sparrow-labeling-0.1.1.tar.gz",
            "has_sig": false,
            "md5_digest": "a6e8ea56afaf1ad292967c4fae4bceea",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 1130829,
            "upload_time": "2022-12-15T08:46:23",
            "upload_time_iso_8601": "2022-12-15T08:46:23.283969Z",
            "url": "https://files.pythonhosted.org/packages/9c/55/184913b5babff7784a7dc67202b8ecf8d04fb89f6b920412c550ced6dd54/streamlit-sparrow-labeling-0.1.1.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2022-12-15 08:46:23",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "github_user": "katanaml",
    "github_project": "streamlit-sparrow-labeling-comp",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "streamlit-sparrow-labeling"
}
        
Elapsed time: 0.07878s