# What is this?
`bareunpy` is the python 3 library for bareun.
Bareun is a Korean NLP,
which provides tokenizing, POS tagging for Korean.
## How to install
```shell
pip3 install bareunpy
```
## How to get bareun
- Go to https://bareun.ai/.
- With registration, for the first time, you can get a API-KEY to use it freely.
- With API-KEY, you can install the `bareun1` server.
- Or you can make a call to use this `bareunpy` library to any servers.
- Or use docker image. See https://hub.docker.com/r/bareunai/bareun
```shell
docker pull bareunai/bareun:latest
```
## How to use, tagger
```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tagger
#
# you can API-KEY from https://bareun.ai/
#
API_KEY="koba-42CXULQ-SDPU6ZA-RQ6QPBQ-4BMZCOA"
# If you have your own localhost bareun.
my_tagger = Tagger(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tagger = Tagger(API_KEY, '10.8.3.211', 15656)
# print results.
res = tagger.tags(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = res.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')
print(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')
print(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')
print(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.morphemes:
print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})
# get json object
jo = res.as_json()
print(jo)
# get tuple of pos tagging.
pa = res.pos()
print(pa)
# another methods
ma = res.morphs()
print(ma)
na = res.nouns()
print(na)
va = res.verbs()
print(va)
# custom dictionary
cust_dic = tagger.custom_dict("my")
cust_dic.copy_np_set({'내고유명사', '우리집고유명사'})
cust_dic.copy_cp_set({'코로나19'})
cust_dic.copy_cp_caret_set({'코로나^백신', '"독감^백신'})
cust_dic.update()
# laod prev custom dict
cust_dict2 = tagger.custom_dict("my")
cust_dict2.load()
tagger.set_domain('my')
tagger.pos('코로나19는 언제 끝날까요?')
```
## How to use, tokenizer
```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tokenizer
# If you have your own localhost bareun.
my_tokenizer = Tokenizer(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)
# print results.
tokenized = tokenizer.tokenize_list(["안녕하세요.", "반가워요!"])
# get protobuf message.
m = tokenized.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')
print(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')
print(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')
print(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')
## Advanced usage.
for sent in m.sentences:
for token in sent.tokens:
for m in token.segments:
print(f'{m.text.content}/{m.hint})
# get json object
jo = tokenized.as_json()
print(jo)
# get tuple of segments
ss = tokenized.segments()
print(ss)
ns = tokenized.nouns()
print(ns)
vs = tokenized.verbs()
print(vs)
# postpositions: 조사
ps = tokenized.postpositions()
print(ps)
# Adverbs, 부사
ass = tokenized.adverbs()
print(ass)
ss = tokenized.symbols()
print(ss)
```
Raw data
{
"_id": null,
"home_page": "https://bareun.ai/",
"name": "bareunpy",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.6,<4.0",
"maintainer_email": "",
"keywords": "NLP,Korean,Deep Learning,POS tagger,bareun",
"author": "Gihyun YUN",
"author_email": "gih2yun@baikal.ai",
"download_url": "https://files.pythonhosted.org/packages/4d/60/474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10/bareunpy-1.6.3.tar.gz",
"platform": null,
"description": "# What is this?\n\n`bareunpy` is the python 3 library for bareun.\n\nBareun is a Korean NLP,\nwhich provides tokenizing, POS tagging for Korean.\n\n## How to install\n\n```shell\npip3 install bareunpy\n```\n\n## How to get bareun\n- Go to https://bareun.ai/.\n - With registration, for the first time, you can get a API-KEY to use it freely.\n - With API-KEY, you can install the `bareun1` server.\n - Or you can make a call to use this `bareunpy` library to any servers.\n- Or use docker image. See https://hub.docker.com/r/bareunai/bareun\n```shell\ndocker pull bareunai/bareun:latest\n```\n\n## How to use, tagger\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tagger\n\n#\n# you can API-KEY from https://bareun.ai/\n#\nAPI_KEY=\"koba-42CXULQ-SDPU6ZA-RQ6QPBQ-4BMZCOA\"\n\n# If you have your own localhost bareun.\nmy_tagger = Tagger(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tagger = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \nres = tagger.tags([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = res.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')\nprint(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')\nprint(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')\nprint(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.morphemes:\n print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})\n\n# get json object\njo = res.as_json()\nprint(jo)\n\n# get tuple of pos tagging.\npa = res.pos()\nprint(pa)\n# another methods\nma = res.morphs()\nprint(ma)\nna = res.nouns()\nprint(na)\nva = res.verbs()\nprint(va)\n\n# custom dictionary\ncust_dic = tagger.custom_dict(\"my\")\ncust_dic.copy_np_set({'\ub0b4\uace0\uc720\uba85\uc0ac', '\uc6b0\ub9ac\uc9d1\uace0\uc720\uba85\uc0ac'})\ncust_dic.copy_cp_set({'\ucf54\ub85c\ub09819'})\ncust_dic.copy_cp_caret_set({'\ucf54\ub85c\ub098^\ubc31\uc2e0', '\"\ub3c5\uac10^\ubc31\uc2e0'})\ncust_dic.update()\n\n# laod prev custom dict\ncust_dict2 = tagger.custom_dict(\"my\")\ncust_dict2.load()\n\ntagger.set_domain('my')\ntagger.pos('\ucf54\ub85c\ub09819\ub294 \uc5b8\uc81c \ub05d\ub0a0\uae4c\uc694?')\n```\n\n\n## How to use, tokenizer\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tokenizer\n\n# If you have your own localhost bareun.\nmy_tokenizer = Tokenizer(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \ntokenized = tokenizer.tokenize_list([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = tokenized.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')\nprint(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')\nprint(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')\nprint(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')\n\n## Advanced usage.\nfor sent in m.sentences:\n for token in sent.tokens:\n for m in token.segments:\n print(f'{m.text.content}/{m.hint})\n\n# get json object\njo = tokenized.as_json()\nprint(jo)\n\n# get tuple of segments\nss = tokenized.segments()\nprint(ss)\nns = tokenized.nouns()\nprint(ns)\nvs = tokenized.verbs()\nprint(vs)\n# postpositions: \uc870\uc0ac\nps = tokenized.postpositions()\nprint(ps)\n# Adverbs, \ubd80\uc0ac\nass = tokenized.adverbs()\nprint(ass)\nss = tokenized.symbols()\nprint(ss)\n\n```\n",
"bugtrack_url": null,
"license": "BSD-3-Clause",
"summary": "The bareun python library using grpc",
"version": "1.6.3",
"project_urls": {
"Homepage": "https://bareun.ai/",
"Repository": "https://github.com/bareun-nlp/bareunpy"
},
"split_keywords": [
"nlp",
"korean",
"deep learning",
"pos tagger",
"bareun"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "10d829479490a08474fa830a395a15e85c4d2201402fbd6efb54c344e3254e9f",
"md5": "78999120ff0cce7c8055cbca7b84982b",
"sha256": "a73caaf6d7071d9f4b076d1c122c1cb16515fec2fd4c284dedf7cd5c984e94a4"
},
"downloads": -1,
"filename": "bareunpy-1.6.3-py3-none-any.whl",
"has_sig": false,
"md5_digest": "78999120ff0cce7c8055cbca7b84982b",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.6,<4.0",
"size": 15337,
"upload_time": "2023-08-08T23:37:01",
"upload_time_iso_8601": "2023-08-08T23:37:01.636188Z",
"url": "https://files.pythonhosted.org/packages/10/d8/29479490a08474fa830a395a15e85c4d2201402fbd6efb54c344e3254e9f/bareunpy-1.6.3-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "4d60474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10",
"md5": "b3133a9f1bde8aa629f035aa2844aa40",
"sha256": "f485efad14d4353e64c7e4feaf9ee75384e6a45a4035df4935a3667bd5ec5a17"
},
"downloads": -1,
"filename": "bareunpy-1.6.3.tar.gz",
"has_sig": false,
"md5_digest": "b3133a9f1bde8aa629f035aa2844aa40",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6,<4.0",
"size": 12055,
"upload_time": "2023-08-08T23:37:03",
"upload_time_iso_8601": "2023-08-08T23:37:03.231719Z",
"url": "https://files.pythonhosted.org/packages/4d/60/474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10/bareunpy-1.6.3.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-08-08 23:37:03",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bareun-nlp",
"github_project": "bareunpy",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "bareunpy"
}