# ToJyutping ![](https://github.com/CanCLID/ToJyutping/workflows/Python%20Package/badge.svg)
Install:
```sh
$ pip install ToJyutping
```
Usage:
```python
>>> import ToJyutping
>>> ToJyutping.get_jyutping_list('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
[('咁', 'gam3'), ('啱', 'ngaam1'), ('老', 'lou5'), ('世', 'sai3'), ('要', 'jiu1'), ('求', 'kau4'), ('佢', 'keoi5'), ('等', 'dang2'), ('陣', 'zan6'), ('要', 'jiu3'), ('開', 'hoi1'), ('會', 'wui2'), (',', None), ('剩', 'zing6'), ('低', 'dai1'), ('嘅', 'ge2'), ('嘢', 'je5'), ('我', 'ngo5'), ('會', 'wui5'), ('搞', 'gaau2'), ('掂', 'dim6'), ('㗎', 'ga3'), ('喇', 'laa3'), ('。', None)]
>>> ToJyutping.get_jyutping('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
'咁(gam3)啱(ngaam1)老(lou5)世(sai3)要(jiu1)求(kau4)佢(keoi5)等(dang2)陣(zan6)要(jiu3)開(hoi1)會(wui2),剩(zing6)低(dai1)嘅(ge2)嘢(je5)我(ngo5)會(wui5)搞(gaau2)掂(dim6)㗎(ga3)喇(laa3)。'
>>> ToJyutping.get_jyutping_text('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
'gam3 ngaam1 lou5 sai3 jiu1 kau4 keoi5 dang2 zan6 jiu3 hoi1 wui2, zing6 dai1 ge2 je5 ngo5 wui5 gaau2 dim6 ga3 laa3.'
>>> ToJyutping.get_ipa_list('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
[('咁', 'kɐm˧'), ('啱', 'ŋaːm˥'), ('老', 'lou̯˩˧'), ('世', 'sɐi̯˧'), ('要', 'jiːu̯˥'), ('求', 'kʰɐu̯˨˩'), ('佢', 'kʰɵy̑˩˧'), ('等', 'tɐŋ˧˥'), ('陣', 't͡sɐn˨'), ('要', 'jiːu̯˧'), ('開', 'hɔːi̯˥'), ('會', 'wuːi̯˧˥'), (',', None), ('剩', 't͡seŋ˨'), ('低', 'tɐi̯˥'), ('嘅', 'kɛː˧˥'), ('嘢', 'jɛː˩˧'), ('我', 'ŋɔː˩˧'), ('會', 'wuːi̯˩˧'), ('搞', 'kaːu̯˧˥'), ('掂', 'tiːm˨'), ('㗎', 'kɐ˧'), ('喇', 'laː˧'), ('。', None)]
>>> ToJyutping.get_ipa('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
'咁[kɐm˧]啱[ŋaːm˥]老[lou̯˩˧]世[sɐi̯˧]要[jiːu̯˥]求[kʰɐu̯˨˩]佢[kʰɵy̑˩˧]等[tɐŋ˧˥]陣[t͡sɐn˨]要[jiːu̯˧]開[hɔːi̯˥]會[wuːi̯˧˥],剩[t͡seŋ˨]低[tɐi̯˥]嘅[kɛː˧˥]嘢[jɛː˩˧]我[ŋɔː˩˧]會[wuːi̯˩˧]搞[kaːu̯˧˥]掂[tiːm˨]㗎[kɐ˧]喇[laː˧]。'
>>> ToJyutping.get_ipa_text('咁啱老世要求佢等陣要開會,剩低嘅嘢我會搞掂㗎喇。')
'kɐm˧.ŋaːm˥.lou̯˩˧.sɐi̯˧.jiːu̯˥.kʰɐu̯˨˩.kʰɵy̑˩˧.tɐŋ˧˥.t͡sɐn˨.jiːu̯˧.hɔːi̯˥.wuːi̯˧˥ | t͡seŋ˨.tɐi̯˥.kɛː˧˥.jɛː˩˧.ŋɔː˩˧.wuːi̯˩˧.kaːu̯˧˥.tiːm˨.kɐ˧.laː˧'
```
In rare cases, the pronunciation of a single character can contain more than one syllable:
```python
>>> ToJyutping.get_jyutping_list('一瓩')
[('一', 'jat1'), ('瓩', 'cin1 ngaa5')]
>>> ToJyutping.get_ipa_list('一瓩')
[('一', 'jɐt̚˥'), ('瓩', 't͡sʰiːn˥.ŋaː˩˧')]
```
Helper:
```python
>>> ToJyutping.jyutping2ipa('jat1')
'jɐt̚˥'
>>> ToJyutping.jyutping2ipa('cin1 ngaa5')
't͡sʰiːn˥.ŋaː˩˧'
```
Note that autocorrection is intentionally not included in this helper, and an error is thrown if strings like `jyt6` are passed into the function.
Punctuation is ignored in the helper.
Raw data
{
"_id": null,
"home_page": "https://github.com/CanCLID/ToJyutping",
"name": "ToJyutping",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.5, <4",
"maintainer_email": "",
"keywords": "chinese cantonese nlp natural-language-processing",
"author": "Cantonese Computational Linguistics Infrastructure Development Workgroup",
"author_email": "support@jyutping.org",
"download_url": "https://files.pythonhosted.org/packages/12/6e/1a386587dd251e207bac4af2eb697f3c2e03130b0d8904e9fa77974b4f3e/ToJyutping-0.2.3.tar.gz",
"platform": null,
"description": "# ToJyutping ![](https://github.com/CanCLID/ToJyutping/workflows/Python%20Package/badge.svg)\n\nInstall:\n\n```sh\n$ pip install ToJyutping\n```\n\nUsage:\n\n```python\n>>> import ToJyutping\n>>> ToJyutping.get_jyutping_list('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n[('\u5481', 'gam3'), ('\u5571', 'ngaam1'), ('\u8001', 'lou5'), ('\u4e16', 'sai3'), ('\u8981', 'jiu1'), ('\u6c42', 'kau4'), ('\u4f62', 'keoi5'), ('\u7b49', 'dang2'), ('\u9663', 'zan6'), ('\u8981', 'jiu3'), ('\u958b', 'hoi1'), ('\u6703', 'wui2'), ('\uff0c', None), ('\u5269', 'zing6'), ('\u4f4e', 'dai1'), ('\u5605', 'ge2'), ('\u5622', 'je5'), ('\u6211', 'ngo5'), ('\u6703', 'wui5'), ('\u641e', 'gaau2'), ('\u6382', 'dim6'), ('\u35ce', 'ga3'), ('\u5587', 'laa3'), ('\u3002', None)]\n>>> ToJyutping.get_jyutping('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n'\u5481(gam3)\u5571(ngaam1)\u8001(lou5)\u4e16(sai3)\u8981(jiu1)\u6c42(kau4)\u4f62(keoi5)\u7b49(dang2)\u9663(zan6)\u8981(jiu3)\u958b(hoi1)\u6703(wui2)\uff0c\u5269(zing6)\u4f4e(dai1)\u5605(ge2)\u5622(je5)\u6211(ngo5)\u6703(wui5)\u641e(gaau2)\u6382(dim6)\u35ce(ga3)\u5587(laa3)\u3002'\n>>> ToJyutping.get_jyutping_text('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n'gam3 ngaam1 lou5 sai3 jiu1 kau4 keoi5 dang2 zan6 jiu3 hoi1 wui2, zing6 dai1 ge2 je5 ngo5 wui5 gaau2 dim6 ga3 laa3.'\n>>> ToJyutping.get_ipa_list('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n[('\u5481', 'k\u0250m\u02e7'), ('\u5571', '\u014ba\u02d0m\u02e5'), ('\u8001', 'lou\u032f\u02e9\u02e7'), ('\u4e16', 's\u0250i\u032f\u02e7'), ('\u8981', 'ji\u02d0u\u032f\u02e5'), ('\u6c42', 'k\u02b0\u0250u\u032f\u02e8\u02e9'), ('\u4f62', 'k\u02b0\u0275y\u0311\u02e9\u02e7'), ('\u7b49', 't\u0250\u014b\u02e7\u02e5'), ('\u9663', 't\u0361s\u0250n\u02e8'), ('\u8981', 'ji\u02d0u\u032f\u02e7'), ('\u958b', 'h\u0254\u02d0i\u032f\u02e5'), ('\u6703', 'wu\u02d0i\u032f\u02e7\u02e5'), ('\uff0c', None), ('\u5269', 't\u0361se\u014b\u02e8'), ('\u4f4e', 't\u0250i\u032f\u02e5'), ('\u5605', 'k\u025b\u02d0\u02e7\u02e5'), ('\u5622', 'j\u025b\u02d0\u02e9\u02e7'), ('\u6211', '\u014b\u0254\u02d0\u02e9\u02e7'), ('\u6703', 'wu\u02d0i\u032f\u02e9\u02e7'), ('\u641e', 'ka\u02d0u\u032f\u02e7\u02e5'), ('\u6382', 'ti\u02d0m\u02e8'), ('\u35ce', 'k\u0250\u02e7'), ('\u5587', 'la\u02d0\u02e7'), ('\u3002', None)]\n>>> ToJyutping.get_ipa('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n'\u5481[k\u0250m\u02e7]\u5571[\u014ba\u02d0m\u02e5]\u8001[lou\u032f\u02e9\u02e7]\u4e16[s\u0250i\u032f\u02e7]\u8981[ji\u02d0u\u032f\u02e5]\u6c42[k\u02b0\u0250u\u032f\u02e8\u02e9]\u4f62[k\u02b0\u0275y\u0311\u02e9\u02e7]\u7b49[t\u0250\u014b\u02e7\u02e5]\u9663[t\u0361s\u0250n\u02e8]\u8981[ji\u02d0u\u032f\u02e7]\u958b[h\u0254\u02d0i\u032f\u02e5]\u6703[wu\u02d0i\u032f\u02e7\u02e5]\uff0c\u5269[t\u0361se\u014b\u02e8]\u4f4e[t\u0250i\u032f\u02e5]\u5605[k\u025b\u02d0\u02e7\u02e5]\u5622[j\u025b\u02d0\u02e9\u02e7]\u6211[\u014b\u0254\u02d0\u02e9\u02e7]\u6703[wu\u02d0i\u032f\u02e9\u02e7]\u641e[ka\u02d0u\u032f\u02e7\u02e5]\u6382[ti\u02d0m\u02e8]\u35ce[k\u0250\u02e7]\u5587[la\u02d0\u02e7]\u3002'\n>>> ToJyutping.get_ipa_text('\u5481\u5571\u8001\u4e16\u8981\u6c42\u4f62\u7b49\u9663\u8981\u958b\u6703\uff0c\u5269\u4f4e\u5605\u5622\u6211\u6703\u641e\u6382\u35ce\u5587\u3002')\n'k\u0250m\u02e7.\u014ba\u02d0m\u02e5.lou\u032f\u02e9\u02e7.s\u0250i\u032f\u02e7.ji\u02d0u\u032f\u02e5.k\u02b0\u0250u\u032f\u02e8\u02e9.k\u02b0\u0275y\u0311\u02e9\u02e7.t\u0250\u014b\u02e7\u02e5.t\u0361s\u0250n\u02e8.ji\u02d0u\u032f\u02e7.h\u0254\u02d0i\u032f\u02e5.wu\u02d0i\u032f\u02e7\u02e5 | t\u0361se\u014b\u02e8.t\u0250i\u032f\u02e5.k\u025b\u02d0\u02e7\u02e5.j\u025b\u02d0\u02e9\u02e7.\u014b\u0254\u02d0\u02e9\u02e7.wu\u02d0i\u032f\u02e9\u02e7.ka\u02d0u\u032f\u02e7\u02e5.ti\u02d0m\u02e8.k\u0250\u02e7.la\u02d0\u02e7'\n```\n\nIn rare cases, the pronunciation of a single character can contain more than one syllable:\n\n```python\n>>> ToJyutping.get_jyutping_list('\u4e00\u74e9')\n[('\u4e00', 'jat1'), ('\u74e9', 'cin1 ngaa5')]\n>>> ToJyutping.get_ipa_list('\u4e00\u74e9')\n[('\u4e00', 'j\u0250t\u031a\u02e5'), ('\u74e9', 't\u0361s\u02b0i\u02d0n\u02e5.\u014ba\u02d0\u02e9\u02e7')]\n```\n\nHelper:\n\n```python\n>>> ToJyutping.jyutping2ipa('jat1')\n'j\u0250t\u031a\u02e5'\n>>> ToJyutping.jyutping2ipa('cin1 ngaa5')\n't\u0361s\u02b0i\u02d0n\u02e5.\u014ba\u02d0\u02e9\u02e7'\n```\n\nNote that autocorrection is intentionally not included in this helper, and an error is thrown if strings like `jyt6` are passed into the function.\nPunctuation is ignored in the helper.\n",
"bugtrack_url": null,
"license": "",
"summary": "\u7cb5\u8a9e\u62fc\u97f3\u81ea\u52d5\u6a19\u8a3b\u5de5\u5177 Cantonese Pronunciation Automatic Labeling Tool",
"version": "0.2.3",
"split_keywords": [
"chinese",
"cantonese",
"nlp",
"natural-language-processing"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "5f0af2d9396fd32f73216d370e4d81140d7dad2ed9d43465d3cfb915a6fa3e6f",
"md5": "0cabe4230a35ad78df9795a9ef7071cb",
"sha256": "968ab13d5bb3622fa7793a0d1cf49e3be2cd61fb9d5fbbc37f4532ec8a3fd306"
},
"downloads": -1,
"filename": "ToJyutping-0.2.3-py3-none-any.whl",
"has_sig": false,
"md5_digest": "0cabe4230a35ad78df9795a9ef7071cb",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.5, <4",
"size": 2323478,
"upload_time": "2023-03-13T11:54:17",
"upload_time_iso_8601": "2023-03-13T11:54:17.948219Z",
"url": "https://files.pythonhosted.org/packages/5f/0a/f2d9396fd32f73216d370e4d81140d7dad2ed9d43465d3cfb915a6fa3e6f/ToJyutping-0.2.3-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "126e1a386587dd251e207bac4af2eb697f3c2e03130b0d8904e9fa77974b4f3e",
"md5": "5ed1c92231ca59383a1dbd85de26bfce",
"sha256": "13fee3c26f534a81c8d172fb13bd64e5dd745157d652002c556393a0e6b8be4e"
},
"downloads": -1,
"filename": "ToJyutping-0.2.3.tar.gz",
"has_sig": false,
"md5_digest": "5ed1c92231ca59383a1dbd85de26bfce",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.5, <4",
"size": 2326526,
"upload_time": "2023-03-13T11:54:19",
"upload_time_iso_8601": "2023-03-13T11:54:19.335975Z",
"url": "https://files.pythonhosted.org/packages/12/6e/1a386587dd251e207bac4af2eb697f3c2e03130b0d8904e9fa77974b4f3e/ToJyutping-0.2.3.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-03-13 11:54:19",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "CanCLID",
"github_project": "ToJyutping",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "tojyutping"
}