# What is this?
`bareunpy` is the python 3 library for bareun.
Bareun is a Korean NLP,
which provides tokenizing, POS tagging for Korean.
## How to install
```shell
pip3 install bareunpy
```
## How to get bareun
- Go to https://bareun.ai/.
- With registration, for the first time, you can get a API-KEY to use it freely.
- With API-KEY, you can install the `bareun1` server.
- Or you can make a call to use this `bareunpy` library to any servers.
- Or use docker image. See https://hub.docker.com/r/bareunai/bareun
```shell
docker pull bareunai/bareun:latest
```
## How to use, tagger
```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tagger
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# If you have your own localhost bareun.
my_tagger = Tagger(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tagger = Tagger(API_KEY, '10.8.3.211', 15656)
# print results.
res = tagger.tags(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = res.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')
print(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')
print(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')
print(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.morphemes:
print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})
# get json object
jo = res.as_json()
print(jo)
# get tuple of pos tagging.
pa = res.pos()
print(pa)
# another methods
ma = res.morphs()
print(ma)
na = res.nouns()
print(na)
va = res.verbs()
print(va)
# custom dictionary
cust_dic = tagger.custom_dict("my")
cust_dic.copy_np_set({'내고유명사', '우리집고유명사'})
cust_dic.copy_cp_set({'코로나19'})
cust_dic.copy_cp_caret_set({'코로나^백신', '"독감^백신'})
cust_dic.update()
# laod prev custom dict
cust_dict2 = tagger.custom_dict("my")
cust_dict2.load()
tagger.set_domain('my')
tagger.pos('코로나19는 언제 끝날까요?')
```
## How to use, tokenizer
```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tokenizer
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# If you have your own localhost bareun.
my_tokenizer = Tokenizer(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)
# print results.
tokenized = tokenizer.tokenize_list(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = tokenized.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')
print(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')
print(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')
print(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.segments:
print(f'{m.text.content}/{m.hint})
# get json object
jo = tokenized.as_json()
print(jo)
# get tuple of segments
ss = tokenized.segments()
print(ss)
ns = tokenized.nouns()
print(ns)
vs = tokenized.verbs()
print(vs)
# postpositions: 조사
ps = tokenized.postpositions()
print(ps)
# Adverbs, 부사
ass = tokenized.adverbs()
print(ass)
ss = tokenized.symbols()
print(ss)
```
## How to use, spelling corrector
```python
from bareunpy import Corrector
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# Initialize Corrector
corrector = Corrector(API_KEY)
# Single sentence correction
response = corrector.correct_error("영수 도 줄기가 얇어서 시들을 것 같은 꽃에물을 주었다.")
print(f"Original: {response.origin}")
print(f"Corrected: {response.revised}")
corrector.print_results(response)
# Multiple sentences correction
responses = corrector.correct_error_list([
"어머니 께서 만들어주신김치찌게가너무맵다며동생이울어버렸다.",
"영수 도 줄기가 얇어서 시들을 것 같은 꽃에물을 주었다."
])
for res in responses:
print(f"Original: {res.origin}")
print(f"Corrected: {res.revised}")
corrector.print_results(responses)
# JSON output
corrector.print_as_json(response)
```
Raw data
{
"_id": null,
"home_page": "https://bareun.ai/",
"name": "bareunpy",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.6",
"maintainer_email": null,
"keywords": "NLP, Korean, Deep Learning, POS tagger, bareun",
"author": "Gihyun YUN",
"author_email": "gih2yun@baikal.ai",
"download_url": "https://files.pythonhosted.org/packages/81/93/22e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748/bareunpy-1.6.6.tar.gz",
"platform": null,
"description": "# What is this?\n\n`bareunpy` is the python 3 library for bareun.\n\nBareun is a Korean NLP,\nwhich provides tokenizing, POS tagging for Korean.\n\n## How to install\n\n```shell\npip3 install bareunpy\n```\n\n## How to get bareun\n- Go to https://bareun.ai/.\n - With registration, for the first time, you can get a API-KEY to use it freely.\n - With API-KEY, you can install the `bareun1` server.\n - Or you can make a call to use this `bareunpy` library to any servers.\n- Or use docker image. See https://hub.docker.com/r/bareunai/bareun\n```shell\ndocker pull bareunai/bareun:latest\n```\n\n## How to use, tagger\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tagger\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\nmy_tagger = Tagger(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tagger = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \nres = tagger.tags([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = res.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')\nprint(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')\nprint(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')\nprint(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.morphemes:\n print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})\n\n# get json object\njo = res.as_json()\nprint(jo)\n\n# get tuple of pos tagging.\npa = res.pos()\nprint(pa)\n# another methods\nma = res.morphs()\nprint(ma)\nna = res.nouns()\nprint(na)\nva = res.verbs()\nprint(va)\n\n# custom dictionary\ncust_dic = tagger.custom_dict(\"my\")\ncust_dic.copy_np_set({'\ub0b4\uace0\uc720\uba85\uc0ac', '\uc6b0\ub9ac\uc9d1\uace0\uc720\uba85\uc0ac'})\ncust_dic.copy_cp_set({'\ucf54\ub85c\ub09819'})\ncust_dic.copy_cp_caret_set({'\ucf54\ub85c\ub098^\ubc31\uc2e0', '\"\ub3c5\uac10^\ubc31\uc2e0'})\ncust_dic.update()\n\n# laod prev custom dict\ncust_dict2 = tagger.custom_dict(\"my\")\ncust_dict2.load()\n\ntagger.set_domain('my')\ntagger.pos('\ucf54\ub85c\ub09819\ub294 \uc5b8\uc81c \ub05d\ub0a0\uae4c\uc694?')\n```\n\n\n## How to use, tokenizer\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tokenizer\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\nmy_tokenizer = Tokenizer(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \ntokenized = tokenizer.tokenize_list([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = tokenized.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')\nprint(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')\nprint(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')\nprint(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.segments:\n print(f'{m.text.content}/{m.hint})\n\n# get json object\njo = tokenized.as_json()\nprint(jo)\n\n# get tuple of segments\nss = tokenized.segments()\nprint(ss)\nns = tokenized.nouns()\nprint(ns)\nvs = tokenized.verbs()\nprint(vs)\n# postpositions: \uc870\uc0ac\nps = tokenized.postpositions()\nprint(ps)\n# Adverbs, \ubd80\uc0ac\nass = tokenized.adverbs()\nprint(ass)\nss = tokenized.symbols()\nprint(ss)\n\n```\n\n## How to use, spelling corrector\n```python\nfrom bareunpy import Corrector\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# Initialize Corrector\ncorrector = Corrector(API_KEY)\n\n# Single sentence correction\nresponse = corrector.correct_error(\"\uc601\uc218 \ub3c4 \uc904\uae30\uac00 \uc587\uc5b4\uc11c \uc2dc\ub4e4\uc744 \uac83 \uac19\uc740 \uaf43\uc5d0\ubb3c\uc744 \uc8fc\uc5c8\ub2e4.\")\nprint(f\"Original: {response.origin}\")\nprint(f\"Corrected: {response.revised}\")\ncorrector.print_results(response)\n\n# Multiple sentences correction\nresponses = corrector.correct_error_list([\n \"\uc5b4\uba38\ub2c8 \uaed8\uc11c \ub9cc\ub4e4\uc5b4\uc8fc\uc2e0\uae40\uce58\ucc0c\uac8c\uac00\ub108\ubb34\ub9f5\ub2e4\uba70\ub3d9\uc0dd\uc774\uc6b8\uc5b4\ubc84\ub838\ub2e4.\",\n \"\uc601\uc218 \ub3c4 \uc904\uae30\uac00 \uc587\uc5b4\uc11c \uc2dc\ub4e4\uc744 \uac83 \uac19\uc740 \uaf43\uc5d0\ubb3c\uc744 \uc8fc\uc5c8\ub2e4.\"\n])\nfor res in responses:\n print(f\"Original: {res.origin}\")\n print(f\"Corrected: {res.revised}\")\n\ncorrector.print_results(responses)\n\n# JSON output\ncorrector.print_as_json(response)\n\n```",
"bugtrack_url": null,
"license": "BSD-3-Clause",
"summary": "The bareun python library using grpc",
"version": "1.6.6",
"project_urls": {
"Homepage": "https://bareun.ai/",
"Repository": "https://github.com/bareun-nlp/bareunpy"
},
"split_keywords": [
"nlp",
" korean",
" deep learning",
" pos tagger",
" bareun"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "459d41264a8371f6164f2246ae266295198891c5901b0f80af48864e3a4672f3",
"md5": "e0b69c1c4268acb177be1abf761d5887",
"sha256": "36a318e709f6781c345c2ff1f9fec7378d53e86ed18955819ecc86fee31dcabc"
},
"downloads": -1,
"filename": "bareunpy-1.6.6-py3-none-any.whl",
"has_sig": false,
"md5_digest": "e0b69c1c4268acb177be1abf761d5887",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.6",
"size": 20715,
"upload_time": "2024-12-27T06:16:56",
"upload_time_iso_8601": "2024-12-27T06:16:56.288114Z",
"url": "https://files.pythonhosted.org/packages/45/9d/41264a8371f6164f2246ae266295198891c5901b0f80af48864e3a4672f3/bareunpy-1.6.6-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "819322e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748",
"md5": "7ceb4a628d114b2197997446699b290c",
"sha256": "7df1037dcc97302fccc351fcb925031b9935bf06b2b33eb2bbb8f942180ee536"
},
"downloads": -1,
"filename": "bareunpy-1.6.6.tar.gz",
"has_sig": false,
"md5_digest": "7ceb4a628d114b2197997446699b290c",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.6",
"size": 15605,
"upload_time": "2024-12-27T06:16:58",
"upload_time_iso_8601": "2024-12-27T06:16:58.926254Z",
"url": "https://files.pythonhosted.org/packages/81/93/22e31bd1b95cf785a8ca9bdcba839b19ac3d1b5b149cf7155e407fa8a748/bareunpy-1.6.6.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-12-27 06:16:58",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bareun-nlp",
"github_project": "bareunpy",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "grpcio",
"specs": [
[
"==",
"1.46.0"
]
]
},
{
"name": "googleapis-common-protos",
"specs": [
[
"==",
"1.56.0"
]
]
},
{
"name": "protobuf",
"specs": [
[
">=",
"3.19.4"
]
]
},
{
"name": "bareun-apis",
"specs": [
[
"==",
"0.13.0"
]
]
},
{
"name": "setuptools",
"specs": [
[
"~=",
"60.5.0"
]
]
},
{
"name": "pytest",
"specs": [
[
">=",
"7.2.1"
]
]
}
],
"lcname": "bareunpy"
}