bareunpy


Namebareunpy JSON
Version 1.6.3 PyPI version JSON
download
home_pagehttps://bareun.ai/
SummaryThe bareun python library using grpc
upload_time2023-08-08 23:37:03
maintainer
docs_urlNone
authorGihyun YUN
requires_python>=3.6,<4.0
licenseBSD-3-Clause
keywords nlp korean deep learning pos tagger bareun
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # What is this?

`bareunpy` is the python 3 library for bareun.

Bareun is a Korean NLP,
which provides tokenizing, POS tagging for Korean.

## How to install

```shell
pip3 install bareunpy
```

## How to get bareun
- Go to https://bareun.ai/.
  - With registration, for the first time, you can get a API-KEY to use it freely.
  - With API-KEY, you can install the `bareun1` server.
  - Or you can make a call to use this `bareunpy` library to any servers.
- Or use docker image. See https://hub.docker.com/r/bareunai/bareun
```shell
docker pull bareunai/bareun:latest
```

## How to use, tagger

```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tagger

#
# you can API-KEY from https://bareun.ai/
#
API_KEY="koba-42CXULQ-SDPU6ZA-RQ6QPBQ-4BMZCOA"

# If you have your own localhost bareun.
my_tagger = Tagger(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tagger = Tagger(API_KEY, '10.8.3.211', 15656)


# print results. 
res = tagger.tags(["안녕하세요.", "반가워요!"])

# get protobuf message.
m = res.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')
print(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')
print(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')
print(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')

## Advanced usage.
for sent in m.sentences:
    for token in sent.tokens:
        for m in token.morphemes:
            print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})

# get json object
jo = res.as_json()
print(jo)

# get tuple of pos tagging.
pa = res.pos()
print(pa)
# another methods
ma = res.morphs()
print(ma)
na = res.nouns()
print(na)
va = res.verbs()
print(va)

# custom dictionary
cust_dic = tagger.custom_dict("my")
cust_dic.copy_np_set({'내고유명사', '우리집고유명사'})
cust_dic.copy_cp_set({'코로나19'})
cust_dic.copy_cp_caret_set({'코로나^백신', '"독감^백신'})
cust_dic.update()

# laod prev custom dict
cust_dict2 = tagger.custom_dict("my")
cust_dict2.load()

tagger.set_domain('my')
tagger.pos('코로나19는 언제 끝날까요?')
```


## How to use, tokenizer

```python
import sys
import google.protobuf.text_format as tf
from bareunpy import Tokenizer

# If you have your own localhost bareun.
my_tokenizer = Tokenizer(API_KEY, 'localhost')
# or if you have your own bareun which is running on 10.8.3.211:15656.
my_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)


# print results. 
tokenized = tokenizer.tokenize_list(["안녕하세요.", "반가워요!"])

# get protobuf message.
m = tokenized.msg()
tf.PrintMessage(m, out=sys.stdout, as_utf8=True)
print(tf.MessageToString(m, as_utf8=True))
print(f'length of sentences is {len(m.sentences)}')
## output : 2
print(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')
print(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')
print(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')
print(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')
print(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')

## Advanced usage.
for sent in m.sentences:
    for token in sent.tokens:
        for m in token.segments:
            print(f'{m.text.content}/{m.hint})

# get json object
jo = tokenized.as_json()
print(jo)

# get tuple of segments
ss = tokenized.segments()
print(ss)
ns = tokenized.nouns()
print(ns)
vs = tokenized.verbs()
print(vs)
# postpositions: 조사
ps = tokenized.postpositions()
print(ps)
# Adverbs, 부사
ass = tokenized.adverbs()
print(ass)
ss = tokenized.symbols()
print(ss)

```

            

Raw data

            {
    "_id": null,
    "home_page": "https://bareun.ai/",
    "name": "bareunpy",
    "maintainer": "",
    "docs_url": null,
    "requires_python": ">=3.6,<4.0",
    "maintainer_email": "",
    "keywords": "NLP,Korean,Deep Learning,POS tagger,bareun",
    "author": "Gihyun YUN",
    "author_email": "gih2yun@baikal.ai",
    "download_url": "https://files.pythonhosted.org/packages/4d/60/474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10/bareunpy-1.6.3.tar.gz",
    "platform": null,
    "description": "# What is this?\n\n`bareunpy` is the python 3 library for bareun.\n\nBareun is a Korean NLP,\nwhich provides tokenizing, POS tagging for Korean.\n\n## How to install\n\n```shell\npip3 install bareunpy\n```\n\n## How to get bareun\n- Go to https://bareun.ai/.\n  - With registration, for the first time, you can get a API-KEY to use it freely.\n  - With API-KEY, you can install the `bareun1` server.\n  - Or you can make a call to use this `bareunpy` library to any servers.\n- Or use docker image. See https://hub.docker.com/r/bareunai/bareun\n```shell\ndocker pull bareunai/bareun:latest\n```\n\n## How to use, tagger\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tagger\n\n#\n# you can API-KEY from https://bareun.ai/\n#\nAPI_KEY=\"koba-42CXULQ-SDPU6ZA-RQ6QPBQ-4BMZCOA\"\n\n# If you have your own localhost bareun.\nmy_tagger = Tagger(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tagger = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \nres = tagger.tags([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = res.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of morphemes of first token in sentences[0] is {len(m.sentences[0].tokens[0].morphemes)}')\nprint(f'lemma of first token in sentences[0] is {m.sentences[0].tokens[0].lemma}')\nprint(f'first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0]}')\nprint(f'tag of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].morphemes[0].tag}')\n\n## Advanced usage.\nfor sent in m.sentences:\n    for token in sent.tokens:\n        for m in token.morphemes:\n            print(f'{m.text.content}/{m.tag}:{m.probability}:{m.out_of_vocab})\n\n# get json object\njo = res.as_json()\nprint(jo)\n\n# get tuple of pos tagging.\npa = res.pos()\nprint(pa)\n# another methods\nma = res.morphs()\nprint(ma)\nna = res.nouns()\nprint(na)\nva = res.verbs()\nprint(va)\n\n# custom dictionary\ncust_dic = tagger.custom_dict(\"my\")\ncust_dic.copy_np_set({'\ub0b4\uace0\uc720\uba85\uc0ac', '\uc6b0\ub9ac\uc9d1\uace0\uc720\uba85\uc0ac'})\ncust_dic.copy_cp_set({'\ucf54\ub85c\ub09819'})\ncust_dic.copy_cp_caret_set({'\ucf54\ub85c\ub098^\ubc31\uc2e0', '\"\ub3c5\uac10^\ubc31\uc2e0'})\ncust_dic.update()\n\n# laod prev custom dict\ncust_dict2 = tagger.custom_dict(\"my\")\ncust_dict2.load()\n\ntagger.set_domain('my')\ntagger.pos('\ucf54\ub85c\ub09819\ub294 \uc5b8\uc81c \ub05d\ub0a0\uae4c\uc694?')\n```\n\n\n## How to use, tokenizer\n\n```python\nimport sys\nimport google.protobuf.text_format as tf\nfrom bareunpy import Tokenizer\n\n# If you have your own localhost bareun.\nmy_tokenizer = Tokenizer(API_KEY, 'localhost')\n# or if you have your own bareun which is running on 10.8.3.211:15656.\nmy_tokenizer = Tagger(API_KEY, '10.8.3.211', 15656)\n\n\n# print results. \ntokenized = tokenizer.tokenize_list([\"\uc548\ub155\ud558\uc138\uc694.\", \"\ubc18\uac00\uc6cc\uc694!\"])\n\n# get protobuf message.\nm = tokenized.msg()\ntf.PrintMessage(m, out=sys.stdout, as_utf8=True)\nprint(tf.MessageToString(m, as_utf8=True))\nprint(f'length of sentences is {len(m.sentences)}')\n## output : 2\nprint(f'length of tokens in sentences[0] is {len(m.sentences[0].tokens)}')\nprint(f'length of segments of first token in sentences[0] is {len(m.sentences[0].tokens[0].segments)}')\nprint(f'tagged of first token in sentences[0] is {m.sentences[0].tokens[0].tagged}')\nprint(f'first segment of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0]}')\nprint(f'hint of first morph of first token in sentences[0] is {m.sentences[0].tokens[0].segments[0].hint}')\n\n## Advanced usage.\nfor sent in m.sentences:\n    for token in sent.tokens:\n        for m in token.segments:\n            print(f'{m.text.content}/{m.hint})\n\n# get json object\njo = tokenized.as_json()\nprint(jo)\n\n# get tuple of segments\nss = tokenized.segments()\nprint(ss)\nns = tokenized.nouns()\nprint(ns)\nvs = tokenized.verbs()\nprint(vs)\n# postpositions: \uc870\uc0ac\nps = tokenized.postpositions()\nprint(ps)\n# Adverbs, \ubd80\uc0ac\nass = tokenized.adverbs()\nprint(ass)\nss = tokenized.symbols()\nprint(ss)\n\n```\n",
    "bugtrack_url": null,
    "license": "BSD-3-Clause",
    "summary": "The bareun python library using grpc",
    "version": "1.6.3",
    "project_urls": {
        "Homepage": "https://bareun.ai/",
        "Repository": "https://github.com/bareun-nlp/bareunpy"
    },
    "split_keywords": [
        "nlp",
        "korean",
        "deep learning",
        "pos tagger",
        "bareun"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "10d829479490a08474fa830a395a15e85c4d2201402fbd6efb54c344e3254e9f",
                "md5": "78999120ff0cce7c8055cbca7b84982b",
                "sha256": "a73caaf6d7071d9f4b076d1c122c1cb16515fec2fd4c284dedf7cd5c984e94a4"
            },
            "downloads": -1,
            "filename": "bareunpy-1.6.3-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "78999120ff0cce7c8055cbca7b84982b",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.6,<4.0",
            "size": 15337,
            "upload_time": "2023-08-08T23:37:01",
            "upload_time_iso_8601": "2023-08-08T23:37:01.636188Z",
            "url": "https://files.pythonhosted.org/packages/10/d8/29479490a08474fa830a395a15e85c4d2201402fbd6efb54c344e3254e9f/bareunpy-1.6.3-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "4d60474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10",
                "md5": "b3133a9f1bde8aa629f035aa2844aa40",
                "sha256": "f485efad14d4353e64c7e4feaf9ee75384e6a45a4035df4935a3667bd5ec5a17"
            },
            "downloads": -1,
            "filename": "bareunpy-1.6.3.tar.gz",
            "has_sig": false,
            "md5_digest": "b3133a9f1bde8aa629f035aa2844aa40",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.6,<4.0",
            "size": 12055,
            "upload_time": "2023-08-08T23:37:03",
            "upload_time_iso_8601": "2023-08-08T23:37:03.231719Z",
            "url": "https://files.pythonhosted.org/packages/4d/60/474a4cb47c86d594d4a36e3cb37ad9d5bc8bf396a2f6f87fa286f4655a10/bareunpy-1.6.3.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-08-08 23:37:03",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "bareun-nlp",
    "github_project": "bareunpy",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "bareunpy"
}
        
Elapsed time: 0.10714s