# What is this?
`bareunpy` is the python 3 library for bareun.
Bareun is a Korean NLP,
which provides tokenizing, POS tagging for Korean.
## How to install
```shell
pip3 install bareunpy
```
## How to get bareun
- Go to https://bareun.ai/.
- With registration, for the first time, you can get a API-KEY to use it freely.
- With API-KEY, you can install the `bareun1` server.
- Or you can make a call to use this `bareunpy` library to any servers.
- Or use docker image. See https://hub.docker.com/r/bareunai/bareun
```shell
docker pull bareunai/bareun:latest
```
## How to use, tagger
```python
import sys
import google.protobuf.text_format as tf
from google.protobuf.json_format import MessageToDict
from bareunpy import Tagger
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# If you have your own localhost bareun.
tagger = Tagger(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
tagger = Tagger(API_KEY, '10.8.3.211', 15656)
# If you don’t want to run your own Bareun server, you can use the official hosted endpoint.
# Just provide your API key and point the client to `api.bareun.ai` on port `443`.
tagger = Tagger(API_KEY, 'api.bareun.ai', 443)
# print results.
res = tagger.tags(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = res.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')
print(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')
print(f'first morph of first token in sentences[0] is {MessageToDict(m.sentences[0].tokens[0].morphemes[0])}')
print(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.morphemes:
print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab}')
# get json object
jo = res.as_json()
print(jo)
# get tuple of pos tagging.
pa = res.pos()
print(pa)
# another methods
ma = res.morphs()
print(ma)
na = res.nouns()
print(na)
va = res.verbs()
print(va)
# custom dictionary
cust_dic = tagger.custom_dict("my")
cust_dic.copy_np_set({'내고유명사', '우리집고유명사'})
cust_dic.copy_cp_set({'코로나19'})
cust_dic.copy_cp_caret_set({'코로나^백신', '"독감^백신'})
cust_dic.update()
# laod prev custom dict
cust_dict2 = tagger.custom_dict("my")
cust_dict2.load()
tagger.set_domain('my')
tagger.pos('코로나19는 언제 끝날까요?')
```
## How to use, tokenizer
```python
import sys
import google.protobuf.text_format as tf
from google.protobuf.json_format import MessageToDict
from bareunpy import Tokenizer
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# If you have your own localhost bareun.
tokenizer = Tokenizer(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)
# If you don’t want to run your own Bareun server, you can use the official hosted endpoint.
# Just provide your API key and point the client to `api.bareun.ai` on port `443`.
tokenizer = Tagger(API_KEY, 'api.bareun.ai', 443)
# print results.
tokenized = tokenizer.tokenize_list(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = tokenized.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')
print(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')
print(f'first segment of first token in sentences[0] is {MessageToDict(m.sentences[0].tokens[0].segments[0])}')
print(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.segments:
print(f'{m.text.content}/{m.hint}')
# get json object
jo = tokenized.as_json()
print(jo)
# get tuple of segments
ss = tokenized.segments()
print(ss)
ns = tokenized.nouns()
print(ns)
vs = tokenized.verbs()
print(vs)
# postpositions: 조사
ps = tokenized.postpositions()
print(ps)
# Adverbs, 부사
ass = tokenized.adverbs()
print(ass)
ss = tokenized.symbols()
print(ss)
```
## How to use, spelling corrector
```python
from bareunpy import Corrector
# You can get an API-KEY from https://bareun.ai/
# Please note that you need to sign up and verify your email.
# 아래에 "https://bareun.ai/"에서 이메일 인증 후 발급받은 API KEY("koba-...")를 입력해주세요. "로그인-내정보 확인"
API_KEY = "koba-ABCDEFG-1234567-LMNOPQR-7654321" # <- 본인의 API KEY로 교체(Replace this with your own API KEY)
# Initialize Corrector
# The spelling corrector is available only via the hosted Bareun API.
# There is no local server for Corrector. To use it, supply your API key and connect to:
corrector = Corrector(API_KEY, 'api.bareun.ai', 443)
# sentence correction
response = corrector.correct_error("영수 도 줄기가 얇어서 시들을 것 같은 꽃에물을 주었다.")
print(f"Original: {response.origin}")
print(f"Corrected: {response.revised}")
corrector.print_results(response)
```
Raw data
{
"_id": null,
"home_page": "https://bareun.ai/",
"name": "bareunpy",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.6",
"maintainer_email": null,
"keywords": "NLP, Korean, Deep Learning, POS tagger, bareun",
"author": "Gihyun YUN",
"author_email": "gih2yun@baikal.ai",
"download_url": "https://files.pythonhosted.org/packages/00/4a/a9b6bd80582d2e90bb60b653f29a0695a5e9a1f72b76f2c69d38293a94ce/bareunpy-1.7.2.tar.gz",
"platform": null,
"description": "# What is this?\n\n`bareunpy` is the python 3 library for bareun.\n\nBareun is a Korean NLP,\nwhich provides tokenizing, POS tagging for Korean.\n\n## How to install\n\n```shell\npip3 install bareunpy\n```\n\n## How to get bareun\n- Go to https://bareun.ai/.\n - With registration, for the first time, you can get a API-KEY to use it freely.\n - With API-KEY, you can install the `bareun1` server.\n - Or you can make a call to use this `bareunpy` library to any servers.\n- Or use docker image. See https://hub.docker.com/r/bareunai/bareun\n```shell\ndocker pull bareunai/bareun:latest\n```\n\n## How to use, tagger\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom google.protobuf.json_format import MessageToDict\nfrom bareunpy import Tagger\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\ntagger = Tagger(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\ntagger = Tagger(API_KEY, '10.8.3.211', 15656)\n# If you don\u2019t want to run your own Bareun server, you can use the official hosted endpoint.\n# Just provide your API key and point the client to `api.bareun.ai` on port `443`.\ntagger = Tagger(API_KEY, 'api.bareun.ai', 443)\n\n# print results. \nres = tagger.tags([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = res.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')\nprint(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')\nprint(f'first morph of first token in sentences[0] is {MessageToDict(m.sentences[0].tokens[0].morphemes[0])}')\nprint(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.morphemes:\n print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab}')\n\n# get json object\njo = res.as_json()\nprint(jo)\n\n# get tuple of pos tagging.\npa = res.pos()\nprint(pa)\n# another methods\nma = res.morphs()\nprint(ma)\nna = res.nouns()\nprint(na)\nva = res.verbs()\nprint(va)\n\n# custom dictionary\ncust_dic = tagger.custom_dict(\"my\")\ncust_dic.copy_np_set({'\ub0b4\uace0\uc720\uba85\uc0ac', '\uc6b0\ub9ac\uc9d1\uace0\uc720\uba85\uc0ac'})\ncust_dic.copy_cp_set({'\ucf54\ub85c\ub09819'})\ncust_dic.copy_cp_caret_set({'\ucf54\ub85c\ub098^\ubc31\uc2e0', '\"\ub3c5\uac10^\ubc31\uc2e0'})\ncust_dic.update()\n\n# laod prev custom dict\ncust_dict2 = tagger.custom_dict(\"my\")\ncust_dict2.load()\n\ntagger.set_domain('my')\ntagger.pos('\ucf54\ub85c\ub09819\ub294 \uc5b8\uc81c \ub05d\ub0a0\uae4c\uc694?')\n```\n\n\n## How to use, tokenizer\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom google.protobuf.json_format import MessageToDict\nfrom bareunpy import Tokenizer\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# If you have your own localhost bareun.\ntokenizer = Tokenizer(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\ntokenizer = Tagger(API_KEY, '10.8.3.211', 15656)\n# If you don\u2019t want to run your own Bareun server, you can use the official hosted endpoint.\n# Just provide your API key and point the client to `api.bareun.ai` on port `443`.\ntokenizer = Tagger(API_KEY, 'api.bareun.ai', 443)\n\n# print results. \ntokenized = tokenizer.tokenize_list([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = tokenized.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')\nprint(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')\nprint(f'first segment of first token in sentences[0] is {MessageToDict(m.sentences[0].tokens[0].segments[0])}')\nprint(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.segments:\n print(f'{m.text.content}/{m.hint}')\n\n# get json object\njo = tokenized.as_json()\nprint(jo)\n\n# get tuple of segments\nss = tokenized.segments()\nprint(ss)\nns = tokenized.nouns()\nprint(ns)\nvs = tokenized.verbs()\nprint(vs)\n# postpositions: \uc870\uc0ac\nps = tokenized.postpositions()\nprint(ps)\n# Adverbs, \ubd80\uc0ac\nass = tokenized.adverbs()\nprint(ass)\nss = tokenized.symbols()\nprint(ss)\n\n```\n\n## How to use, spelling corrector\n```python\nfrom bareunpy import Corrector\n\n# You can get an API-KEY from https://bareun.ai/\n# Please note that you need to sign up and verify your email.\n# \uc544\ub798\uc5d0 \"https://bareun.ai/\"\uc5d0\uc11c \uc774\uba54\uc77c \uc778\uc99d \ud6c4 \ubc1c\uae09\ubc1b\uc740 API KEY(\"koba-...\")\ub97c \uc785\ub825\ud574\uc8fc\uc138\uc694. \"\ub85c\uadf8\uc778-\ub0b4\uc815\ubcf4 \ud655\uc778\"\nAPI_KEY = \"koba-ABCDEFG-1234567-LMNOPQR-7654321\" # <- \ubcf8\uc778\uc758 API KEY\ub85c \uad50\uccb4(Replace this with your own API KEY)\n\n# Initialize Corrector\n# The spelling corrector is available only via the hosted Bareun API.\n# There is no local server for Corrector. To use it, supply your API key and connect to:\ncorrector = Corrector(API_KEY, 'api.bareun.ai', 443)\n\n# sentence correction\nresponse = corrector.correct_error(\"\uc601\uc218 \ub3c4 \uc904\uae30\uac00 \uc587\uc5b4\uc11c \uc2dc\ub4e4\uc744 \uac83 \uac19\uc740 \uaf43\uc5d0\ubb3c\uc744 \uc8fc\uc5c8\ub2e4.\")\nprint(f\"Original: {response.origin}\")\nprint(f\"Corrected: {response.revised}\")\ncorrector.print_results(response)\n\n```",
"bugtrack_url": null,
"license": "BSD-3-Clause",
"summary": "The bareun python library using grpc",
"version": "1.7.2",
"project_urls": {
"Homepage": "https://bareun.ai/",
"Repository": "https://github.com/bareun-nlp/bareunpy"
},
"split_keywords": [
"nlp",
" korean",
" deep learning",
" pos tagger",
" bareun"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "03a45819637695315863225743ea4c7adff0ab203f76921c4008ea8cfb1a5efb",
"md5": "22496280714bda7b088bdcfd97971c0e",
"sha256": "bc28bb00bee7fda674b6f3ca990f7214e1e3148a92a2999a1294bdf4c4a71d88"
},
"downloads": -1,
"filename": "bareunpy-1.7.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "22496280714bda7b088bdcfd97971c0e",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.6",
"size": 24017,
"upload_time": "2025-08-13T06:53:10",
"upload_time_iso_8601": "2025-08-13T06:53:10.739050Z",
"url": "https://files.pythonhosted.org/packages/03/a4/5819637695315863225743ea4c7adff0ab203f76921c4008ea8cfb1a5efb/bareunpy-1.7.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "004aa9b6bd80582d2e90bb60b653f29a0695a5e9a1f72b76f2c69d38293a94ce",
"md5": "01d3a718aae815f80ff050f421c76f22",
"sha256": "a71d858315a2d507c8730ccdf3948418c09d961b3a6fcc6f242b16e68ed8e561"
},
"downloads": -1,
"filename": "bareunpy-1.7.2.tar.gz",
"has_sig": false,
"md5_digest": "01d3a718aae815f80ff050f421c76f22",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.6",
"size": 18743,
"upload_time": "2025-08-13T06:53:12",
"upload_time_iso_8601": "2025-08-13T06:53:12.269281Z",
"url": "https://files.pythonhosted.org/packages/00/4a/a9b6bd80582d2e90bb60b653f29a0695a5e9a1f72b76f2c69d38293a94ce/bareunpy-1.7.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-13 06:53:12",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bareun-nlp",
"github_project": "bareunpy",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "grpcio",
"specs": [
[
"==",
"1.53.2"
]
]
},
{
"name": "googleapis-common-protos",
"specs": [
[
"==",
"1.56.0"
]
]
},
{
"name": "protobuf",
"specs": [
[
">=",
"3.19.6"
]
]
},
{
"name": "bareun-apis",
"specs": [
[
"==",
"0.15.2"
]
]
},
{
"name": "setuptools",
"specs": [
[
"~=",
"60.5.0"
]
]
},
{
"name": "pytest",
"specs": [
[
">=",
"7.2.1"
]
]
}
],
"lcname": "bareunpy"
}