bareunpy


Namebareunpy JSON
Version 1.6.6 PyPI version JSON
download
home_pagehttps://bareun.ai/
SummaryThe bareun python library using grpc
upload_time2024-12-27 06:16:58
maintainerNone
docs_urlNone
authorGihyun YUN
requires_python<4.0,>=3.6
licenseBSD-3-Clause
keywords nlp korean deep learning pos tagger bareun
VCS
bugtrack_url
requirements grpcio googleapis-common-protos protobuf bareun-apis setuptools pytest
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # What is this?

`bareunpy` is the python 3 library for bareun.

Bareun is a Korean NLP,
which provides tokenizing, POS tagging for Korean.

## How to install

```shell
pip3 install bareunpy
```

## How to get bareun
- Go to https://bareun.ai/.
  - With registration, for the first time, you can get a API-KEY to use it freely.
  - With API-KEY, you can install the `bareun1` server.
  - Or you can make a call to use this `bareunpy` library to any servers.
- Or use docker image. See https://hub.docker.com/r/bareunai/bareun
```shell
docker pull bareunai/bareun:latest
```

## How to use, tagger

```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tagger

# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)

# If you have your own localhost bareun.
my_tagger = Tagger(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tagger = Tagger(API_KEY, '10.8.3.211', 15656)


# print results. 
res = tagger.tags(["안녕하세요.", "반가워요!"])

# get protobuf message.
m = res.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')
print(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')
print(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')
print(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')

## Advanced usage.
for sent in m.sentences:
    for token in sent.tokens:
        for m in token.morphemes:
            print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})

# get json object
jo = res.as_json()
print(jo)

# get tuple of pos tagging.
pa = res.pos()
print(pa)
# another methods
ma = res.morphs()
print(ma)
na = res.nouns()
print(na)
va = res.verbs()
print(va)

# custom dictionary
cust_dic = tagger.custom_dict("my")
cust_dic.copy_np_set({'내고유명사', '우리집고유명사'})
cust_dic.copy_cp_set({'코로나19'})
cust_dic.copy_cp_caret_set({'코로나^백신', '"독감^백신'})
cust_dic.update()

# laod prev custom dict
cust_dict2 = tagger.custom_dict("my")
cust_dict2.load()

tagger.set_domain('my')
tagger.pos('코로나19는 언제 끝날까요?')
```


## How to use, tokenizer

```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tokenizer

# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)

# If you have your own localhost bareun.
my_tokenizer = Tokenizer(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)


# print results. 
tokenized = tokenizer.tokenize_list(["안녕하세요.", "반가워요!"])

# get protobuf message.
m = tokenized.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')
print(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')
print(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')
print(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')

## Advanced usage.
for sent in m.sentences:
    for token in sent.tokens:
        for m in token.segments:
            print(f'{m.text.content}/{m.hint})

# get json object
jo = tokenized.as_json()
print(jo)

# get tuple of segments
ss = tokenized.segments()
print(ss)
ns = tokenized.nouns()
print(ns)
vs = tokenized.verbs()
print(vs)
# postpositions: 조사
ps = tokenized.postpositions()
print(ps)
# Adverbs, 부사
ass = tokenized.adverbs()
print(ass)
ss = tokenized.symbols()
print(ss)

```

## How to use, spelling corrector
```python
from bareunpy import Corrector

# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321"  # <- 본인의 API KEY로 교체(Replace this with your own API KEY)

# Initialize Corrector
corrector = Corrector(API_KEY)

# Single sentence correction
response = corrector.correct_error("영수 도 줄기가 얇어서 시들을 것 같은 꽃에물을 주었다.")
print(f"Original: {response.origin}")
print(f"Corrected: {response.revised}")
corrector.print_results(response)

# Multiple sentences correction
responses = corrector.correct_error_list([
    "어머니 께서 만들어주신김치찌게가너무맵다며동생이울어버렸다.",
    "영수 도 줄기가 얇어서 시들을 것 같은 꽃에물을 주었다."
])
for res in responses:
    print(f"Original: {res.origin}")
    print(f"Corrected: {res.revised}")

corrector.print_results(responses)

# JSON output
corrector.print_as_json(response)

```
            

Raw data

            {
    "_id": null,
    "home_page": "https://bareun.ai/",
    "name": "bareunpy",
    "maintainer": null,
    "docs_url": null,
    "requires_python": "<4.0,>=3.6",
    "maintainer_email": null,
    "keywords": "NLP, Korean, Deep Learning, POS tagger, bareun",
    "author": "Gihyun YUN",
    "author_email": "gih2yun@baikal.ai",
    "download_url": "https://files.pythonhosted.org/packages/81/93/22e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748/bareunpy-1.6.6.tar.gz",
    "platform": null,
    "description": "# What is this?\n\n`bareunpy` is the python 3 library for bareun.\n\nBareun is a Korean NLP,\nwhich provides tokenizing, POS tagging for Korean.\n\n## How to install\n\n```shell\npip3 install bareunpy\n```\n\n## How to get bareun\n- Go to https://bareun.ai/.\n  - With registration, for the first time, you can get a API-KEY to use it freely.\n  - With API-KEY, you can install the `bareun1` server.\n  - Or you can make a call to use this `bareunpy` library to any servers.\n- Or use docker image. See https://hub.docker.com/r/bareunai/bareun\n```shell\ndocker pull bareunai/bareun:latest\n```\n\n## How to use, tagger\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tagger\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\nmy_tagger = Tagger(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tagger = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \nres = tagger.tags([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = res.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')\nprint(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')\nprint(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')\nprint(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')\n\n## Advanced usage.\nfor sent in m.sentences:\n    for token in sent.tokens:\n        for m in token.morphemes:\n            print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})\n\n# get json object\njo = res.as_json()\nprint(jo)\n\n# get tuple of pos tagging.\npa = res.pos()\nprint(pa)\n# another methods\nma = res.morphs()\nprint(ma)\nna = res.nouns()\nprint(na)\nva = res.verbs()\nprint(va)\n\n# custom dictionary\ncust_dic = tagger.custom_dict(\"my\")\ncust_dic.copy_np_set({'\ub0b4\uace0\uc720\uba85\uc0ac', '\uc6b0\ub9ac\uc9d1\uace0\uc720\uba85\uc0ac'})\ncust_dic.copy_cp_set({'\ucf54\ub85c\ub09819'})\ncust_dic.copy_cp_caret_set({'\ucf54\ub85c\ub098^\ubc31\uc2e0', '\"\ub3c5\uac10^\ubc31\uc2e0'})\ncust_dic.update()\n\n# laod prev custom dict\ncust_dict2 = tagger.custom_dict(\"my\")\ncust_dict2.load()\n\ntagger.set_domain('my')\ntagger.pos('\ucf54\ub85c\ub09819\ub294 \uc5b8\uc81c \ub05d\ub0a0\uae4c\uc694?')\n```\n\n\n## How to use, tokenizer\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tokenizer\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\nmy_tokenizer = Tokenizer(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \ntokenized = tokenizer.tokenize_list([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = tokenized.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')\nprint(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')\nprint(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')\nprint(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')\n\n## Advanced usage.\nfor sent in m.sentences:\n    for token in sent.tokens:\n        for m in token.segments:\n            print(f'{m.text.content}/{m.hint})\n\n# get json object\njo = tokenized.as_json()\nprint(jo)\n\n# get tuple of segments\nss = tokenized.segments()\nprint(ss)\nns = tokenized.nouns()\nprint(ns)\nvs = tokenized.verbs()\nprint(vs)\n# postpositions: \uc870\uc0ac\nps = tokenized.postpositions()\nprint(ps)\n# Adverbs, \ubd80\uc0ac\nass = tokenized.adverbs()\nprint(ass)\nss = tokenized.symbols()\nprint(ss)\n\n```\n\n## How to use, spelling corrector\n```python\nfrom bareunpy import Corrector\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\"  # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# Initialize Corrector\ncorrector = Corrector(API_KEY)\n\n# Single sentence correction\nresponse = corrector.correct_error(\"\uc601\uc218 \ub3c4 \uc904\uae30\uac00 \uc587\uc5b4\uc11c \uc2dc\ub4e4\uc744 \uac83 \uac19\uc740 \uaf43\uc5d0\ubb3c\uc744 \uc8fc\uc5c8\ub2e4.\")\nprint(f\"Original: {response.origin}\")\nprint(f\"Corrected: {response.revised}\")\ncorrector.print_results(response)\n\n# Multiple sentences correction\nresponses = corrector.correct_error_list([\n    \"\uc5b4\uba38\ub2c8 \uaed8\uc11c \ub9cc\ub4e4\uc5b4\uc8fc\uc2e0\uae40\uce58\ucc0c\uac8c\uac00\ub108\ubb34\ub9f5\ub2e4\uba70\ub3d9\uc0dd\uc774\uc6b8\uc5b4\ubc84\ub838\ub2e4.\",\n    \"\uc601\uc218 \ub3c4 \uc904\uae30\uac00 \uc587\uc5b4\uc11c \uc2dc\ub4e4\uc744 \uac83 \uac19\uc740 \uaf43\uc5d0\ubb3c\uc744 \uc8fc\uc5c8\ub2e4.\"\n])\nfor res in responses:\n    print(f\"Original: {res.origin}\")\n    print(f\"Corrected: {res.revised}\")\n\ncorrector.print_results(responses)\n\n# JSON output\ncorrector.print_as_json(response)\n\n```",
    "bugtrack_url": null,
    "license": "BSD-3-Clause",
    "summary": "The bareun python library using grpc",
    "version": "1.6.6",
    "project_urls": {
        "Homepage": "https://bareun.ai/",
        "Repository": "https://github.com/bareun-nlp/bareunpy"
    },
    "split_keywords": [
        "nlp",
        " korean",
        " deep learning",
        " pos tagger",
        " bareun"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "459d41264a8371f6164f2246ae266295198891c5901b0f80af48864e3a4672f3",
                "md5": "e0b69c1c4268acb177be1abf761d5887",
                "sha256": "36a318e709f6781c345c2ff1f9fec7378d53e86ed18955819ecc86fee31dcabc"
            },
            "downloads": -1,
            "filename": "bareunpy-1.6.6-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "e0b69c1c4268acb177be1abf761d5887",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": "<4.0,>=3.6",
            "size": 20715,
            "upload_time": "2024-12-27T06:16:56",
            "upload_time_iso_8601": "2024-12-27T06:16:56.288114Z",
            "url": "https://files.pythonhosted.org/packages/45/9d/41264a8371f6164f2246ae266295198891c5901b0f80af48864e3a4672f3/bareunpy-1.6.6-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "819322e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748",
                "md5": "7ceb4a628d114b2197997446699b290c",
                "sha256": "7df1037dcc97302fccc351fcb925031b9935bf06b2b33eb2bbb8f942180ee536"
            },
            "downloads": -1,
            "filename": "bareunpy-1.6.6.tar.gz",
            "has_sig": false,
            "md5_digest": "7ceb4a628d114b2197997446699b290c",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "<4.0,>=3.6",
            "size": 15605,
            "upload_time": "2024-12-27T06:16:58",
            "upload_time_iso_8601": "2024-12-27T06:16:58.926254Z",
            "url": "https://files.pythonhosted.org/packages/81/93/22e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748/bareunpy-1.6.6.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-12-27 06:16:58",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "bareun-nlp",
    "github_project": "bareunpy",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "grpcio",
            "specs": [
                [
                    "==",
                    "1.46.0"
                ]
            ]
        },
        {
            "name": "googleapis-common-protos",
            "specs": [
                [
                    "==",
                    "1.56.0"
                ]
            ]
        },
        {
            "name": "protobuf",
            "specs": [
                [
                    ">=",
                    "3.19.4"
                ]
            ]
        },
        {
            "name": "bareun-apis",
            "specs": [
                [
                    "==",
                    "0.13.0"
                ]
            ]
        },
        {
            "name": "setuptools",
            "specs": [
                [
                    "~=",
                    "60.5.0"
                ]
            ]
        },
        {
            "name": "pytest",
            "specs": [
                [
                    ">=",
                    "7.2.1"
                ]
            ]
        }
    ],
    "lcname": "bareunpy"
}
        
Elapsed time: 0.73151s