ipapy
=====
**ipapy** is a Python module to work with International Phonetic
Alphabet (IPA) strings.
- Version: 0.0.9
- Date: 2019-05-05
- Developer: `Alberto Pettarin <http://www.albertopettarin.it/>`__
- License: the MIT License (MIT)
- Contact: `click here <http://www.albertopettarin.it/contact.html>`__
- Links: `GitHub <https://github.com/pettarin/ipapy>`__ -
`PyPI <https://pypi.python.org/pypi/ipapy>`__
Installation
------------
.. code:: bash
$ pip install ipapy
or
.. code:: bash
$ git clone https://github.com/pettarin/ipapy.git
$ cd ipapy
Usage
-----
As A Python Module
~~~~~~~~~~~~~~~~~~
.. code:: python
###########
# IMPORTS #
###########
from ipapy import UNICODE_TO_IPA
from ipapy import is_valid_ipa
from ipapy.ipachar import IPAConsonant
from ipapy.ipachar import IPAVowel
from ipapy.ipastring import IPAString
###########
# IPAChar #
###########
# Def.: an IPAChar is an IPA letter or diacritic/suprasegmental/tone mark
# create IPAChar from its Unicode representation
c1 = UNICODE_TO_IPA[u"a"] # vowel open front unrounded
c2 = UNICODE_TO_IPA[u"e"] # vowel close-mid front unrounded
c3 = UNICODE_TO_IPA[u"\u03B2"] # consonant voiced bilabial non-sibilant-fricative
tS1 = UNICODE_TO_IPA[u"t͡ʃ"] # consonant voiceless palato-alveolar sibilant-affricate
tS2 = UNICODE_TO_IPA[u"t͜ʃ"] # consonant voiceless palato-alveolar sibilant-affricate
tS3 = UNICODE_TO_IPA[u"tʃ"] # consonant voiceless palato-alveolar sibilant-affricate
tS4 = UNICODE_TO_IPA[u"ʧ"] # consonant voiceless palato-alveolar sibilant-affricate
tS5 = UNICODE_TO_IPA[u"\u0074\u0361\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
tS6 = UNICODE_TO_IPA[u"\u0074\u035C\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
tS7 = UNICODE_TO_IPA[u"\u0074\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
tS8 = UNICODE_TO_IPA[u"\u02A7"] # consonant voiceless palato-alveolar sibilant-affricate
c1 == c2 # False
c1 == c3 # False
c1 == tS1 # False
tS1 == tS2 # True (they both point to the same IPAChar object)
tS1 == tS3 # True (idem)
tS1 == tS4 # True (idem)
tS1 == tS5 # True (idem)
tS1 == tS6 # True (idem)
tS1 == tS7 # True (idem)
tS1 == tS8 # True (idem)
# create custom IPAChars
my_a1 = IPAVowel(name="my_a_1", descriptors=u"open front unrounded", unicode_repr=u"a")
my_a2 = IPAVowel(name="my_a_2", descriptors=[u"open", "front", "unrounded"], unicode_repr=u"a")
my_a3 = IPAVowel(name="my_a_3", height=u"open", backness=u"front", roundness=u"unrounded", unicode_repr=u"a")
my_a4 = IPAVowel(name="my_a_4", descriptors=[u"low", u"fnt", "unr"], unicode_repr=u"a")
my_ee = IPAVowel(name="my_e_1", descriptors=u"close-mid front unrounded", unicode_repr=u"e")
my_b1 = IPAConsonant(name="bilabial fricative", descriptors=u"voiced bilabial non-sibilant-fricative", unicode_repr=u"\u03B2")
my_b2 = IPAConsonant(name="bf", voicing=u"voiced", place=u"bilabial", manner=u"non-sibilant-fricative", unicode_repr=u"\u03B2")
my_tS = IPAConsonant(name="tS", voicing=u"voiceless", place=u"palato-alveolar", manner=u"sibilant-affricate", unicode_repr=u"t͡ʃ")
my_a1 == my_a2 # False (two different objects)
my_a1 == c1 # False (two different objects)
my_a1 == UNICODE_TO_IPA["a"] # False (two different objects)
# associate non-standard Unicode representation
my_aa = IPAVowel(name="a special", descriptors=[u"low", u"fnt", u"unr"], unicode_repr=u"a{*}")
print(my_aa) # "a{*}"
# equality vs. equivalence
my_tS == tS1 # False (my_tS is a different object than tS1)
my_tS.is_equivalent(tS1) # True (my_tS is equivalent to tS1...)
tS1.is_equivalent(my_tS) # True (... and vice versa)
# compare IPAChar objects
my_a1.is_equivalent(my_a2) # True
my_a1.is_equivalent(my_a3) # True
my_a1.is_equivalent(my_a4) # True
my_a1.is_equivalent(my_ee) # False
my_a1.is_equivalent(my_b1) # False
my_b1.is_equivalent(my_b2) # True
my_b1.is_equivalent(my_tS) # False
# compare IPAChar and a Unicode string
my_b1.is_equivalent(u"\u03B2") # True
my_b1.is_equivalent(u"β") # True
my_b1.is_equivalent(u"b") # False
my_tS.is_equivalent(u"tS") # False
my_tS.is_equivalent(u"tʃ") # False (missing the combining diacritic)
my_tS.is_equivalent(u"t͡ʃ") # True (has combining diacritic)
# compare IPAChar and a string listing descriptors
my_a1.is_equivalent(u"open front unrounded") # False (missing 'vowel')
my_a1.is_equivalent(u"open front unrounded vowel") # True
my_a1.is_equivalent(u"low fnt unr vwl") # True (known abbreviations are good as well)
my_ee.is_equivalent(u"open front unrounded vowel") # False
my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative") # False (missing 'consonant')
my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative consonant") # True
my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiced") # True (the order does not matter)
my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiceless") # False
# compare IPAChar and list of descriptors
my_a1.is_equivalent([u"open", u"front", u"unrounded"]) # False
my_a1.is_equivalent([u"vowel", u"open", u"front", u"unrounded"]) # True
my_a1.is_equivalent([u"open", u"unrounded", u"vowel", u"front"]) # True
my_a1.is_equivalent([u"low", u"fnt", u"unr", u"vwl"]) # True
#############
# IPAString #
#############
# Def.: an IPAString is a list of IPAChar objects
# check if Unicode string contains only IPA valid characters
s_uni = u"əˈkiːn æˌkænˈθɑ.lə.d͡ʒi" # Unicode string of the IPA pronunciation for "achene acanthology"
is_valid_ipa(s_uni) # True
is_valid_ipa(u"LoL") # False (uppercase letter L is not IPA valid)
# create IPAString from list of IPAChar objects
new_s_ipa = IPAString(ipa_chars=[c3, c2, tS1, c1])
# create IPAString from Unicode string
s_ipa = IPAString(unicode_string=s_uni)
# IPAString is similar to regular Python string object
print(s_ipa) # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"
len(s_ipa) # 21
s_ipa[0] # (first IPA char)
s_ipa[5:8] # (6th, 7th, 8th IPA chars)
s_ipa[19:] # (IPA chars from the 20th)
s_ipa[-1] # (last IPA char)
len(new_s_ipa) # 4
new_s_ipa.append(UNICODE_TO_IPA[u"a"]) # (append IPA char "a")
len(new_s_ipa) # 5
new_s_ipa.append(UNICODE_TO_IPA[u"t͡ʃ"]) # (append IPA char "t͡ʃ")
len(new_s_ipa) # 6
new_s_ipa.extend(s_ipa) # (append s_ipa to new_s_ipa)
len(new_s_ipa) # 27
double = s_ipa + new_s_ipa # (concatenate s_ipa and new_s_ipa)
len(double) # 48
# new IPAString objects containing only...
print(s_ipa.consonants) # "knknθld͡ʒ" (consonants)
print(s_ipa.vowels) # "əiææɑəi" (vowels)
print(s_ipa.letters) # "əkinækænθɑləd͡ʒi" (vowels and consonants)
print(s_ipa.cns_vwl) # "əkinækænθɑləd͡ʒi" (vowels and consonants)
print(s_ipa.cns_vwl_pstr) # "əˈkinækænˈθɑləd͡ʒi" ( + primary stress marks)
print(s_ipa.cns_vwl_pstr_long) # "əˈkiːnækænˈθɑləd͡ʒi" ( + long marks)
print(s_ipa.cns_vwl_str) # "əˈkinæˌkænˈθɑləd͡ʒi" ( + stress marks)
print(s_ipa.cns_vwl_str_len) # "əˈkiːnæˌkænˈθɑləd͡ʒi" ( + length marks)
print(s_ipa.cns_vwl_str_len_wb) # "əˈkiːn æˌkænˈθɑləd͡ʒi" ( + word breaks)
print(s_ipa.cns_vwl_str_len_wb_sb) # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi" ( + syllable breaks)
cns = s_ipa.consonants # (store new IPA string)
cns == s_ipa.consonants # False (two different objects)
cns.is_equivalent(s_ipa.consonants) # True
cns.is_equivalent(s_ipa) # False
# print representation and name of all IPAChar objects in IPAString
for c in s_ipa:
print(u"%s\t%s" % (c, c.name))
# ə vowel mid central unrounded
# ˈ suprasegmental primary-stress
# k consonant voiceless velar plosive
# i vowel close front unrounded
# ː suprasegmental long
# n consonant voiced alveolar nasal
# suprasegmental word-break
# æ vowel near-open front unrounded
# ˌ suprasegmental secondary-stress
# k consonant voiceless velar plosive
# æ vowel near-open front unrounded
# n consonant voiced alveolar nasal
# ˈ suprasegmental primary-stress
# θ consonant voiceless dental non-sibilant-fricative
# ɑ vowel open back unrounded
# . suprasegmental syllable-break
# l consonant voiced alveolar lateral-approximant
# ə vowel mid central unrounded
# . suprasegmental syllable-break
# d͡ʒ consonant voiced palato-alveolar sibilant-affricate
# i vowel close front unrounded
# compare IPAString objects
s_ipa_d = IPAString(unicode_string=u"diff")
s_ipa_1 = IPAString(unicode_string=u"at͡ʃe")
s_ipa_2 = IPAString(unicode_string=u"aʧe")
s_ipa_3 = IPAString(unicode_string=u"at͡ʃe", single_char_parsing=True)
s_ipa_d == s_ipa_1 # False
s_ipa_1 == s_ipa_2 # False (different objects)
s_ipa_1 == s_ipa_3 # False (different objects)
s_ipa_2 == s_ipa_3 # False (different objects)
s_ipa_d.is_equivalent(s_ipa_1) # False
s_ipa_1.is_equivalent(s_ipa_2) # True
s_ipa_2.is_equivalent(s_ipa_1) # True
s_ipa_1.is_equivalent(s_ipa_3) # True
s_ipa_2.is_equivalent(s_ipa_3) # True
# compare IPAString and list of IPAChar objects
s_ipa_1.is_equivalent([my_a1, my_tS, my_ee]) # True
# compare IPAString and Unicode string
s_ipa_d.is_equivalent(u"diff") # True
s_ipa_1.is_equivalent(u"atse") # False
s_ipa_1.is_equivalent(u"atSe") # False
s_ipa_1.is_equivalent(u"at͡ʃe") # True
s_ipa_1.is_equivalent(u"at͜ʃe") # True
s_ipa_1.is_equivalent(u"aʧe") # True
s_ipa_1.is_equivalent(u"at͡ʃeLOL", ignore=True) # True (ignore chars non IPA valid)
s_ipa_1.is_equivalent(u"at͡ʃeLoL", ignore=True) # False (ignore chars non IPA valid, note extra "o")
########################
# CONVERSION FUNCTIONS #
########################
from ipapy.kirshenbaummapper import KirshenbaumMapper
kmapper = KirshenbaumMapper() # mapper to Kirshenbaum ASCII IPA
s_k_ipa = kmapper.map_ipa_string(s_ipa) # u"@'ki:n#&,k&n'TA#l@#dZi"
s_k_uni = kmapper.map_unicode_string(s_uni) # u"@'ki:n#&,k&n'TA#l@#dZi"
s_k_ipa == s_k_uni # True
s_k_lis = kmapper.map_unicode_string(s_uni, return_as_list=True) # [u'@', u"'", u'k', u'i', u':', u'n', u'#', u'&', u',', u'k', u'&', u'n', u"'", u'T', u'A', u'#', u'l', u'@', u'#', u'dZ', u'i']
from ipapy.arpabetmapper import ARPABETMapper
amapper = ARPABETMapper() # mapper to ARPABET ASCII IPA (stress marks not supported yet)
s_a = amapper.map_unicode_string(u"pɹuːf") # error: long suprasegmental not mapped
s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True) # u"PRUWF"
s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True, return_as_list=True) # [u'P', u'R', u'UW', u'F']
As A Command Line Tool
~~~~~~~~~~~~~~~~~~~~~~
**ipapy** comes with a command line tool to perform operations on a
given Unicode UTF-8 encoded string, representing an IPA string.
Therefore, it is recommended to run it on a shell supporting UTF-8.
Currently, the supported operations are:
- ``canonize``: canonize the Unicode representation of the IPA string
- ``chars``: list all IPA characters appearing in the IPA string
- ``check``: check if the given Unicode string is IPA valid
- ``clean``: remove characters that are not IPA valid
- ``u2a``: print the corresponding ARPABET (ASCII IPA) string
- ``u2k``: print the corresponding Kirshenbaum (ASCII IPA) string
Run with the ``--help`` parameter to list all the available options:
.. code:: bash
$ python -m ipapy --help
usage: __main__.py [-h] [-i] [-p] [--separator [SEPARATOR]] [-s] [-u]
command string
ipapy perform a command on the given IPA/Unicode string
positional arguments:
command [canonize|chars|check|clean|u2a|u2k]
string String to canonize, check, clean, or convert
optional arguments:
-h, --help show this help message and exit
-i, --ignore Ignore Unicode characters that are not IPA valid
-p, --print-invalid Print Unicode characters that are not IPA valid
--separator [SEPARATOR]
Print IPA chars separated by this character (default:
'')
-s, --single-char-parsing
Perform single character parsing instead of maximal
parsing
-u, --unicode Print each Unicode character that is not IPA valid
with its Unicode codepoint and name
Examples:
.. code:: bash
$ python -m ipapy canonize "eʧiu"
et͡ʃiu
$ python -m ipapy canonize "eʧiu" --separator " "
e t͡ʃ i u
$ python -m ipapy chars "eʧiu"
'e' vowel close-mid front unrounded (U+0065)
't͡ʃ' consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)
$ python -m ipapy chars "et͡ʃiu"
'e' vowel close-mid front unrounded (U+0065)
't͡ʃ' consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)
$ python -m ipapy chars "et͡ʃiu" -s
'e' vowel close-mid front unrounded (U+0065)
't' consonant voiceless alveolar plosive (U+0074)
'͡' diacritic tie-bar-above (U+0361)
'ʃ' consonant voiceless palato-alveolar sibilant-fricative (U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)
$ python -m ipapy check "eʧiu"
True
$ python -m ipapy check "LoL"
False
$ python -m ipapy check "LoL" -p
False
LL
$ python -m ipapy check "LoLOL" -p -u
False
LLOL
'L' 0x4c LATIN CAPITAL LETTER L
'O' 0x4f LATIN CAPITAL LETTER O
$ python -m ipapy clean "/eʧiu/"
eʧiu
$ python -m ipapy u2k "eʧiu"
etSiu
$ python -m ipapy u2k "eTa"
The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.
$ python -m ipapy u2k "eTa" -i
ea
$ python -m ipapy u2a "eʧiu" --separator " "
EH CH IH UW
Unit Testing
------------
.. code:: bash
$ python run_all_unit_tests.py
License
-------
**ipapy** is released under the MIT License.
Acknowledgments
---------------
- Bram Vanroy provided a fix to ``setup.py`` for Windows users
Raw data
{
"_id": null,
"home_page": "https://github.com/pettarin/ipapy",
"name": "ipapy",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "ipapy,International Phonetic Alphabet,IPA,ASCII IPA,ASCIIIPA,ASCII-IPA,Kirshenbaum,Kirshenbaum IPA,Unicode",
"author": "Alberto Pettarin",
"author_email": "alberto@albertopettarin.it",
"download_url": "https://files.pythonhosted.org/packages/41/0d/7e8652df6af20a61bb3315f5c9d99fb9ea8f3779ff80fca9d71001230f90/ipapy-0.0.9.0.tar.gz",
"platform": "",
"description": "ipapy\n=====\n\n**ipapy** is a Python module to work with International Phonetic\nAlphabet (IPA) strings.\n\n- Version: 0.0.9\n- Date: 2019-05-05\n- Developer: `Alberto Pettarin <http://www.albertopettarin.it/>`__\n- License: the MIT License (MIT)\n- Contact: `click here <http://www.albertopettarin.it/contact.html>`__\n- Links: `GitHub <https://github.com/pettarin/ipapy>`__ -\n `PyPI <https://pypi.python.org/pypi/ipapy>`__\n\nInstallation\n------------\n\n.. code:: bash\n\n $ pip install ipapy\n\nor\n\n.. code:: bash\n\n $ git clone https://github.com/pettarin/ipapy.git\n $ cd ipapy\n\nUsage\n-----\n\nAs A Python Module\n~~~~~~~~~~~~~~~~~~\n\n.. code:: python\n\n ###########\n # IMPORTS #\n ###########\n from ipapy import UNICODE_TO_IPA\n from ipapy import is_valid_ipa\n from ipapy.ipachar import IPAConsonant\n from ipapy.ipachar import IPAVowel\n from ipapy.ipastring import IPAString\n\n\n ###########\n # IPAChar #\n ###########\n\n # Def.: an IPAChar is an IPA letter or diacritic/suprasegmental/tone mark\n\n # create IPAChar from its Unicode representation\n c1 = UNICODE_TO_IPA[u\"a\"] # vowel open front unrounded\n c2 = UNICODE_TO_IPA[u\"e\"] # vowel close-mid front unrounded\n c3 = UNICODE_TO_IPA[u\"\\u03B2\"] # consonant voiced bilabial non-sibilant-fricative\n tS1 = UNICODE_TO_IPA[u\"t\u0361\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS2 = UNICODE_TO_IPA[u\"t\u035c\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS3 = UNICODE_TO_IPA[u\"t\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS4 = UNICODE_TO_IPA[u\"\u02a7\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS5 = UNICODE_TO_IPA[u\"\\u0074\\u0361\\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS6 = UNICODE_TO_IPA[u\"\\u0074\\u035C\\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS7 = UNICODE_TO_IPA[u\"\\u0074\\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n tS8 = UNICODE_TO_IPA[u\"\\u02A7\"] # consonant voiceless palato-alveolar sibilant-affricate\n c1 == c2 # False\n c1 == c3 # False\n c1 == tS1 # False\n tS1 == tS2 # True (they both point to the same IPAChar object)\n tS1 == tS3 # True (idem)\n tS1 == tS4 # True (idem)\n tS1 == tS5 # True (idem)\n tS1 == tS6 # True (idem)\n tS1 == tS7 # True (idem)\n tS1 == tS8 # True (idem)\n\n # create custom IPAChars\n my_a1 = IPAVowel(name=\"my_a_1\", descriptors=u\"open front unrounded\", unicode_repr=u\"a\")\n my_a2 = IPAVowel(name=\"my_a_2\", descriptors=[u\"open\", \"front\", \"unrounded\"], unicode_repr=u\"a\")\n my_a3 = IPAVowel(name=\"my_a_3\", height=u\"open\", backness=u\"front\", roundness=u\"unrounded\", unicode_repr=u\"a\")\n my_a4 = IPAVowel(name=\"my_a_4\", descriptors=[u\"low\", u\"fnt\", \"unr\"], unicode_repr=u\"a\")\n my_ee = IPAVowel(name=\"my_e_1\", descriptors=u\"close-mid front unrounded\", unicode_repr=u\"e\")\n my_b1 = IPAConsonant(name=\"bilabial fricative\", descriptors=u\"voiced bilabial non-sibilant-fricative\", unicode_repr=u\"\\u03B2\")\n my_b2 = IPAConsonant(name=\"bf\", voicing=u\"voiced\", place=u\"bilabial\", manner=u\"non-sibilant-fricative\", unicode_repr=u\"\\u03B2\")\n my_tS = IPAConsonant(name=\"tS\", voicing=u\"voiceless\", place=u\"palato-alveolar\", manner=u\"sibilant-affricate\", unicode_repr=u\"t\u0361\u0283\")\n my_a1 == my_a2 # False (two different objects)\n my_a1 == c1 # False (two different objects)\n my_a1 == UNICODE_TO_IPA[\"a\"] # False (two different objects)\n\n # associate non-standard Unicode representation\n my_aa = IPAVowel(name=\"a special\", descriptors=[u\"low\", u\"fnt\", u\"unr\"], unicode_repr=u\"a{*}\")\n print(my_aa) # \"a{*}\"\n\n # equality vs. equivalence\n my_tS == tS1 # False (my_tS is a different object than tS1)\n my_tS.is_equivalent(tS1) # True (my_tS is equivalent to tS1...)\n tS1.is_equivalent(my_tS) # True (... and vice versa)\n\n # compare IPAChar objects\n my_a1.is_equivalent(my_a2) # True\n my_a1.is_equivalent(my_a3) # True\n my_a1.is_equivalent(my_a4) # True\n my_a1.is_equivalent(my_ee) # False\n my_a1.is_equivalent(my_b1) # False\n my_b1.is_equivalent(my_b2) # True\n my_b1.is_equivalent(my_tS) # False\n\n # compare IPAChar and a Unicode string\n my_b1.is_equivalent(u\"\\u03B2\") # True\n my_b1.is_equivalent(u\"\u03b2\") # True\n my_b1.is_equivalent(u\"b\") # False\n my_tS.is_equivalent(u\"tS\") # False\n my_tS.is_equivalent(u\"t\u0283\") # False (missing the combining diacritic)\n my_tS.is_equivalent(u\"t\u0361\u0283\") # True (has combining diacritic)\n\n # compare IPAChar and a string listing descriptors\n my_a1.is_equivalent(u\"open front unrounded\") # False (missing 'vowel')\n my_a1.is_equivalent(u\"open front unrounded vowel\") # True\n my_a1.is_equivalent(u\"low fnt unr vwl\") # True (known abbreviations are good as well)\n my_ee.is_equivalent(u\"open front unrounded vowel\") # False\n my_b1.is_equivalent(u\"voiced bilabial non-sibilant-fricative\") # False (missing 'consonant')\n my_b1.is_equivalent(u\"voiced bilabial non-sibilant-fricative consonant\") # True\n my_b1.is_equivalent(u\"consonant non-sibilant-fricative bilabial voiced\") # True (the order does not matter)\n my_b1.is_equivalent(u\"consonant non-sibilant-fricative bilabial voiceless\") # False\n\n # compare IPAChar and list of descriptors\n my_a1.is_equivalent([u\"open\", u\"front\", u\"unrounded\"]) # False\n my_a1.is_equivalent([u\"vowel\", u\"open\", u\"front\", u\"unrounded\"]) # True\n my_a1.is_equivalent([u\"open\", u\"unrounded\", u\"vowel\", u\"front\"]) # True\n my_a1.is_equivalent([u\"low\", u\"fnt\", u\"unr\", u\"vwl\"]) # True\n\n\n #############\n # IPAString #\n #############\n\n # Def.: an IPAString is a list of IPAChar objects\n\n # check if Unicode string contains only IPA valid characters\n s_uni = u\"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\" # Unicode string of the IPA pronunciation for \"achene acanthology\"\n is_valid_ipa(s_uni) # True\n is_valid_ipa(u\"LoL\") # False (uppercase letter L is not IPA valid)\n\n # create IPAString from list of IPAChar objects\n new_s_ipa = IPAString(ipa_chars=[c3, c2, tS1, c1])\n\n # create IPAString from Unicode string\n s_ipa = IPAString(unicode_string=s_uni)\n\n # IPAString is similar to regular Python string object\n print(s_ipa) # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\"\n len(s_ipa) # 21\n s_ipa[0] # (first IPA char)\n s_ipa[5:8] # (6th, 7th, 8th IPA chars)\n s_ipa[19:] # (IPA chars from the 20th)\n s_ipa[-1] # (last IPA char)\n len(new_s_ipa) # 4\n new_s_ipa.append(UNICODE_TO_IPA[u\"a\"]) # (append IPA char \"a\")\n len(new_s_ipa) # 5\n new_s_ipa.append(UNICODE_TO_IPA[u\"t\u0361\u0283\"]) # (append IPA char \"t\u0361\u0283\")\n len(new_s_ipa) # 6\n new_s_ipa.extend(s_ipa) # (append s_ipa to new_s_ipa)\n len(new_s_ipa) # 27\n double = s_ipa + new_s_ipa # (concatenate s_ipa and new_s_ipa)\n len(double) # 48\n\n # new IPAString objects containing only...\n print(s_ipa.consonants) # \"knkn\u03b8ld\u0361\u0292\" (consonants)\n print(s_ipa.vowels) # \"\u0259i\u00e6\u00e6\u0251\u0259i\" (vowels)\n print(s_ipa.letters) # \"\u0259kin\u00e6k\u00e6n\u03b8\u0251l\u0259d\u0361\u0292i\" (vowels and consonants)\n print(s_ipa.cns_vwl) # \"\u0259kin\u00e6k\u00e6n\u03b8\u0251l\u0259d\u0361\u0292i\" (vowels and consonants)\n print(s_ipa.cns_vwl_pstr) # \"\u0259\u02c8kin\u00e6k\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\" ( + primary stress marks)\n print(s_ipa.cns_vwl_pstr_long) # \"\u0259\u02c8ki\u02d0n\u00e6k\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\" ( + long marks)\n print(s_ipa.cns_vwl_str) # \"\u0259\u02c8kin\u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\" ( + stress marks)\n print(s_ipa.cns_vwl_str_len) # \"\u0259\u02c8ki\u02d0n\u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\" ( + length marks)\n print(s_ipa.cns_vwl_str_len_wb) # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\" ( + word breaks)\n print(s_ipa.cns_vwl_str_len_wb_sb) # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\" ( + syllable breaks)\n cns = s_ipa.consonants # (store new IPA string)\n cns == s_ipa.consonants # False (two different objects)\n cns.is_equivalent(s_ipa.consonants) # True\n cns.is_equivalent(s_ipa) # False\n\n # print representation and name of all IPAChar objects in IPAString\n for c in s_ipa:\n print(u\"%s\\t%s\" % (c, c.name))\n # \u0259 vowel mid central unrounded\n # \u02c8 suprasegmental primary-stress\n # k consonant voiceless velar plosive\n # i vowel close front unrounded\n # \u02d0 suprasegmental long\n # n consonant voiced alveolar nasal\n # suprasegmental word-break\n # \u00e6 vowel near-open front unrounded\n # \u02cc suprasegmental secondary-stress\n # k consonant voiceless velar plosive\n # \u00e6 vowel near-open front unrounded\n # n consonant voiced alveolar nasal\n # \u02c8 suprasegmental primary-stress\n # \u03b8 consonant voiceless dental non-sibilant-fricative\n # \u0251 vowel open back unrounded\n # . suprasegmental syllable-break\n # l consonant voiced alveolar lateral-approximant\n # \u0259 vowel mid central unrounded\n # . suprasegmental syllable-break\n # d\u0361\u0292 consonant voiced palato-alveolar sibilant-affricate\n # i vowel close front unrounded\n\n # compare IPAString objects\n s_ipa_d = IPAString(unicode_string=u\"diff\")\n s_ipa_1 = IPAString(unicode_string=u\"at\u0361\u0283e\")\n s_ipa_2 = IPAString(unicode_string=u\"a\u02a7e\")\n s_ipa_3 = IPAString(unicode_string=u\"at\u0361\u0283e\", single_char_parsing=True)\n s_ipa_d == s_ipa_1 # False\n s_ipa_1 == s_ipa_2 # False (different objects)\n s_ipa_1 == s_ipa_3 # False (different objects)\n s_ipa_2 == s_ipa_3 # False (different objects)\n s_ipa_d.is_equivalent(s_ipa_1) # False\n s_ipa_1.is_equivalent(s_ipa_2) # True\n s_ipa_2.is_equivalent(s_ipa_1) # True\n s_ipa_1.is_equivalent(s_ipa_3) # True\n s_ipa_2.is_equivalent(s_ipa_3) # True\n\n # compare IPAString and list of IPAChar objects\n s_ipa_1.is_equivalent([my_a1, my_tS, my_ee]) # True\n\n # compare IPAString and Unicode string\n s_ipa_d.is_equivalent(u\"diff\") # True\n s_ipa_1.is_equivalent(u\"atse\") # False\n s_ipa_1.is_equivalent(u\"atSe\") # False\n s_ipa_1.is_equivalent(u\"at\u0361\u0283e\") # True\n s_ipa_1.is_equivalent(u\"at\u035c\u0283e\") # True\n s_ipa_1.is_equivalent(u\"a\u02a7e\") # True\n s_ipa_1.is_equivalent(u\"at\u0361\u0283eLOL\", ignore=True) # True (ignore chars non IPA valid)\n s_ipa_1.is_equivalent(u\"at\u0361\u0283eLoL\", ignore=True) # False (ignore chars non IPA valid, note extra \"o\")\n\n\n ########################\n # CONVERSION FUNCTIONS #\n ########################\n from ipapy.kirshenbaummapper import KirshenbaumMapper\n kmapper = KirshenbaumMapper() # mapper to Kirshenbaum ASCII IPA\n s_k_ipa = kmapper.map_ipa_string(s_ipa) # u\"@'ki:n#&,k&n'TA#l@#dZi\"\n s_k_uni = kmapper.map_unicode_string(s_uni) # u\"@'ki:n#&,k&n'TA#l@#dZi\"\n s_k_ipa == s_k_uni # True\n s_k_lis = kmapper.map_unicode_string(s_uni, return_as_list=True) # [u'@', u\"'\", u'k', u'i', u':', u'n', u'#', u'&', u',', u'k', u'&', u'n', u\"'\", u'T', u'A', u'#', u'l', u'@', u'#', u'dZ', u'i']\n\n from ipapy.arpabetmapper import ARPABETMapper\n amapper = ARPABETMapper() # mapper to ARPABET ASCII IPA (stress marks not supported yet)\n s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\") # error: long suprasegmental not mapped\n s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\", ignore=True) # u\"PRUWF\"\n s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\", ignore=True, return_as_list=True) # [u'P', u'R', u'UW', u'F']\n\nAs A Command Line Tool\n~~~~~~~~~~~~~~~~~~~~~~\n\n**ipapy** comes with a command line tool to perform operations on a\ngiven Unicode UTF-8 encoded string, representing an IPA string.\nTherefore, it is recommended to run it on a shell supporting UTF-8.\n\nCurrently, the supported operations are:\n\n- ``canonize``: canonize the Unicode representation of the IPA string\n- ``chars``: list all IPA characters appearing in the IPA string\n- ``check``: check if the given Unicode string is IPA valid\n- ``clean``: remove characters that are not IPA valid\n- ``u2a``: print the corresponding ARPABET (ASCII IPA) string\n- ``u2k``: print the corresponding Kirshenbaum (ASCII IPA) string\n\nRun with the ``--help`` parameter to list all the available options:\n\n.. code:: bash\n\n $ python -m ipapy --help\n\n usage: __main__.py [-h] [-i] [-p] [--separator [SEPARATOR]] [-s] [-u]\n command string\n\n ipapy perform a command on the given IPA/Unicode string\n\n positional arguments:\n command [canonize|chars|check|clean|u2a|u2k]\n string String to canonize, check, clean, or convert\n\n optional arguments:\n -h, --help show this help message and exit\n -i, --ignore Ignore Unicode characters that are not IPA valid\n -p, --print-invalid Print Unicode characters that are not IPA valid\n --separator [SEPARATOR]\n Print IPA chars separated by this character (default:\n '')\n -s, --single-char-parsing\n Perform single character parsing instead of maximal\n parsing\n -u, --unicode Print each Unicode character that is not IPA valid\n with its Unicode codepoint and name\n\nExamples:\n\n.. code:: bash\n\n $ python -m ipapy canonize \"e\u02a7iu\"\n et\u0361\u0283iu\n\n $ python -m ipapy canonize \"e\u02a7iu\" --separator \" \"\n e t\u0361\u0283 i u\n\n $ python -m ipapy chars \"e\u02a7iu\"\n 'e' vowel close-mid front unrounded (U+0065)\n 't\u0361\u0283' consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)\n 'i' vowel close front unrounded (U+0069)\n 'u' vowel close back rounded (U+0075)\n\n $ python -m ipapy chars \"et\u0361\u0283iu\"\n 'e' vowel close-mid front unrounded (U+0065)\n 't\u0361\u0283' consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)\n 'i' vowel close front unrounded (U+0069)\n 'u' vowel close back rounded (U+0075)\n\n $ python -m ipapy chars \"et\u0361\u0283iu\" -s\n 'e' vowel close-mid front unrounded (U+0065)\n 't' consonant voiceless alveolar plosive (U+0074)\n '\u0361' diacritic tie-bar-above (U+0361)\n '\u0283' consonant voiceless palato-alveolar sibilant-fricative (U+0283)\n 'i' vowel close front unrounded (U+0069)\n 'u' vowel close back rounded (U+0075)\n\n $ python -m ipapy check \"e\u02a7iu\"\n True\n\n $ python -m ipapy check \"LoL\"\n False\n\n $ python -m ipapy check \"LoL\" -p\n False\n LL\n\n $ python -m ipapy check \"LoLOL\" -p -u\n False\n LLOL\n 'L' 0x4c LATIN CAPITAL LETTER L\n 'O' 0x4f LATIN CAPITAL LETTER O\n\n $ python -m ipapy clean \"/e\u02a7iu/\"\n e\u02a7iu\n\n $ python -m ipapy u2k \"e\u02a7iu\"\n etSiu\n\n $ python -m ipapy u2k \"eTa\"\n The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.\n\n $ python -m ipapy u2k \"eTa\" -i\n ea\n\n $ python -m ipapy u2a \"e\u02a7iu\" --separator \" \"\n EH CH IH UW\n\nUnit Testing\n------------\n\n.. code:: bash\n\n $ python run_all_unit_tests.py\n\nLicense\n-------\n\n**ipapy** is released under the MIT License.\n\nAcknowledgments\n---------------\n\n- Bram Vanroy provided a fix to ``setup.py`` for Windows users\n\n\n",
"bugtrack_url": null,
"license": "MIT License",
"summary": "ipapy is a Python module to work with IPA strings",
"version": "0.0.9.0",
"split_keywords": [
"ipapy",
"international phonetic alphabet",
"ipa",
"ascii ipa",
"asciiipa",
"ascii-ipa",
"kirshenbaum",
"kirshenbaum ipa",
"unicode"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "6d88c2fcc87174fc67a6b829f0bf2565",
"sha256": "b96d0435282103c7d893c8226a458b70a810d130ce65fabe127c8a7490d1f82b"
},
"downloads": -1,
"filename": "ipapy-0.0.9.0-py2-none-any.whl",
"has_sig": false,
"md5_digest": "6d88c2fcc87174fc67a6b829f0bf2565",
"packagetype": "bdist_wheel",
"python_version": "py2",
"requires_python": null,
"size": 38724,
"upload_time": "2019-05-05T22:27:35",
"upload_time_iso_8601": "2019-05-05T22:27:35.890892Z",
"url": "https://files.pythonhosted.org/packages/5d/b6/c170e49cd5d3aaa8cbbe3c836d2fe09c72f08cf3b8ea1e4b4f81fed7881b/ipapy-0.0.9.0-py2-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "4c87b745b63f5a69571815a979af9cc6",
"sha256": "e1bc73f6a4861b9a0ff562b70b87dab8acf7a63badd98caabd6e248b3839f1c9"
},
"downloads": -1,
"filename": "ipapy-0.0.9.0.tar.gz",
"has_sig": false,
"md5_digest": "4c87b745b63f5a69571815a979af9cc6",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 37415,
"upload_time": "2019-05-05T22:27:37",
"upload_time_iso_8601": "2019-05-05T22:27:37.753585Z",
"url": "https://files.pythonhosted.org/packages/41/0d/7e8652df6af20a61bb3315f5c9d99fb9ea8f3779ff80fca9d71001230f90/ipapy-0.0.9.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2019-05-05 22:27:37",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "pettarin",
"github_project": "ipapy",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "ipapy"
}