ipapy

Name	ipapy JSON
Version	0.0.9.0 JSON
	download
home_page	https://github.com/pettarin/ipapy
Summary	ipapy is a Python module to work with IPA strings
upload_time	2019-05-05 22:27:37
maintainer
docs_url	None
author	Alberto Pettarin
requires_python
license	MIT License
keywords	ipapy international phonetic alphabet ipa ascii ipa asciiipa ascii-ipa kirshenbaum kirshenbaum ipa unicode
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            ipapy
=====

**ipapy** is a Python module to work with International Phonetic
Alphabet (IPA) strings.

-  Version: 0.0.9
-  Date: 2019-05-05
-  Developer: `Alberto Pettarin <http://www.albertopettarin.it/>`__
-  License: the MIT License (MIT)
-  Contact: `click here <http://www.albertopettarin.it/contact.html>`__
-  Links: `GitHub <https://github.com/pettarin/ipapy>`__ -
   `PyPI <https://pypi.python.org/pypi/ipapy>`__

Installation
------------

.. code:: bash

   $ pip install ipapy

or

.. code:: bash

   $ git clone https://github.com/pettarin/ipapy.git
   $ cd ipapy

Usage
-----

As A Python Module
~~~~~~~~~~~~~~~~~~

.. code:: python

   ###########
   # IMPORTS #
   ###########
   from ipapy import UNICODE_TO_IPA
   from ipapy import is_valid_ipa
   from ipapy.ipachar import IPAConsonant
   from ipapy.ipachar import IPAVowel
   from ipapy.ipastring import IPAString


   ###########
   # IPAChar #
   ###########

   # Def.: an IPAChar is an IPA letter or diacritic/suprasegmental/tone mark

   # create IPAChar from its Unicode representation
   c1 = UNICODE_TO_IPA[u"a"]                   # vowel open front unrounded
   c2 = UNICODE_TO_IPA[u"e"]                   # vowel close-mid front unrounded
   c3 = UNICODE_TO_IPA[u"\u03B2"]              # consonant voiced bilabial non-sibilant-fricative
   tS1 = UNICODE_TO_IPA[u"t͡ʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
   tS2 = UNICODE_TO_IPA[u"t͜ʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
   tS3 = UNICODE_TO_IPA[u"tʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
   tS4 = UNICODE_TO_IPA[u"ʧ"]                  # consonant voiceless palato-alveolar sibilant-affricate
   tS5 = UNICODE_TO_IPA[u"\u0074\u0361\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
   tS6 = UNICODE_TO_IPA[u"\u0074\u035C\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
   tS7 = UNICODE_TO_IPA[u"\u0074\u0283"]       # consonant voiceless palato-alveolar sibilant-affricate
   tS8 = UNICODE_TO_IPA[u"\u02A7"]             # consonant voiceless palato-alveolar sibilant-affricate
   c1 == c2    # False
   c1 == c3    # False
   c1 == tS1   # False
   tS1 == tS2  # True (they both point to the same IPAChar object)
   tS1 == tS3  # True (idem)
   tS1 == tS4  # True (idem)
   tS1 == tS5  # True (idem)
   tS1 == tS6  # True (idem)
   tS1 == tS7  # True (idem)
   tS1 == tS8  # True (idem)

   # create custom IPAChars
   my_a1 = IPAVowel(name="my_a_1", descriptors=u"open front unrounded", unicode_repr=u"a")
   my_a2 = IPAVowel(name="my_a_2", descriptors=[u"open", "front", "unrounded"], unicode_repr=u"a")
   my_a3 = IPAVowel(name="my_a_3", height=u"open", backness=u"front", roundness=u"unrounded", unicode_repr=u"a")
   my_a4 = IPAVowel(name="my_a_4", descriptors=[u"low", u"fnt", "unr"], unicode_repr=u"a")
   my_ee = IPAVowel(name="my_e_1", descriptors=u"close-mid front unrounded", unicode_repr=u"e")
   my_b1 = IPAConsonant(name="bilabial fricative", descriptors=u"voiced bilabial non-sibilant-fricative", unicode_repr=u"\u03B2")
   my_b2 = IPAConsonant(name="bf", voicing=u"voiced", place=u"bilabial", manner=u"non-sibilant-fricative", unicode_repr=u"\u03B2")
   my_tS = IPAConsonant(name="tS", voicing=u"voiceless", place=u"palato-alveolar", manner=u"sibilant-affricate", unicode_repr=u"t͡ʃ")
   my_a1 == my_a2                  # False (two different objects)
   my_a1 == c1                     # False (two different objects)
   my_a1 == UNICODE_TO_IPA["a"]    # False (two different objects)

   # associate non-standard Unicode representation
   my_aa = IPAVowel(name="a special", descriptors=[u"low", u"fnt", u"unr"], unicode_repr=u"a{*}")
   print(my_aa)    # "a{*}"

   # equality vs. equivalence
   my_tS == tS1                # False (my_tS is a different object than tS1)
   my_tS.is_equivalent(tS1)    # True  (my_tS is equivalent to tS1...)
   tS1.is_equivalent(my_tS)    # True  (... and vice versa)

   # compare IPAChar objects
   my_a1.is_equivalent(my_a2)  # True
   my_a1.is_equivalent(my_a3)  # True
   my_a1.is_equivalent(my_a4)  # True
   my_a1.is_equivalent(my_ee)  # False
   my_a1.is_equivalent(my_b1)  # False
   my_b1.is_equivalent(my_b2)  # True
   my_b1.is_equivalent(my_tS)  # False

   # compare IPAChar and a Unicode string
   my_b1.is_equivalent(u"\u03B2")  # True
   my_b1.is_equivalent(u"β")       # True
   my_b1.is_equivalent(u"b")       # False
   my_tS.is_equivalent(u"tS")      # False
   my_tS.is_equivalent(u"tʃ")      # False (missing the combining diacritic)
   my_tS.is_equivalent(u"t͡ʃ")      # True (has combining diacritic)

   # compare IPAChar and a string listing descriptors
   my_a1.is_equivalent(u"open front unrounded")                                # False (missing 'vowel')
   my_a1.is_equivalent(u"open front unrounded vowel")                          # True
   my_a1.is_equivalent(u"low fnt unr vwl")                                     # True (known abbreviations are good as well)
   my_ee.is_equivalent(u"open front unrounded vowel")                          # False
   my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative")              # False (missing 'consonant')
   my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative consonant")    # True
   my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiced")    # True (the order does not matter)
   my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiceless") # False

   # compare IPAChar and list of descriptors
   my_a1.is_equivalent([u"open", u"front", u"unrounded"])              # False
   my_a1.is_equivalent([u"vowel", u"open", u"front", u"unrounded"])    # True
   my_a1.is_equivalent([u"open", u"unrounded", u"vowel", u"front"])    # True
   my_a1.is_equivalent([u"low", u"fnt", u"unr", u"vwl"])               # True


   #############
   # IPAString #
   #############

   # Def.: an IPAString is a list of IPAChar objects

   # check if Unicode string contains only IPA valid characters
   s_uni = u"əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"   # Unicode string of the IPA pronunciation for "achene acanthology"
   is_valid_ipa(s_uni)                 # True
   is_valid_ipa(u"LoL")                # False (uppercase letter L is not IPA valid)

   # create IPAString from list of IPAChar objects
   new_s_ipa = IPAString(ipa_chars=[c3, c2, tS1, c1])

   # create IPAString from Unicode string
   s_ipa = IPAString(unicode_string=s_uni)

   # IPAString is similar to regular Python string object
   print(s_ipa)                            # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"
   len(s_ipa)                              # 21
   s_ipa[0]                                # (first IPA char)
   s_ipa[5:8]                              # (6th, 7th, 8th IPA chars)
   s_ipa[19:]                              # (IPA chars from the 20th)
   s_ipa[-1]                               # (last IPA char)
   len(new_s_ipa)                          # 4
   new_s_ipa.append(UNICODE_TO_IPA[u"a"])  # (append IPA char "a")
   len(new_s_ipa)                          # 5
   new_s_ipa.append(UNICODE_TO_IPA[u"t͡ʃ"]) # (append IPA char "t͡ʃ")
   len(new_s_ipa)                          # 6
   new_s_ipa.extend(s_ipa)                 # (append s_ipa to new_s_ipa)
   len(new_s_ipa)                          # 27
   double = s_ipa + new_s_ipa              # (concatenate s_ipa and new_s_ipa)
   len(double)                             # 48

   # new IPAString objects containing only...
   print(s_ipa.consonants)                 # "knknθld͡ʒ"                (consonants)
   print(s_ipa.vowels)                     # "əiææɑəi"                 (vowels)
   print(s_ipa.letters)                    # "əkinækænθɑləd͡ʒi"         (vowels and consonants)
   print(s_ipa.cns_vwl)                    # "əkinækænθɑləd͡ʒi"         (vowels and consonants)
   print(s_ipa.cns_vwl_pstr)               # "əˈkinækænˈθɑləd͡ʒi"       (  + primary stress marks)
   print(s_ipa.cns_vwl_pstr_long)          # "əˈkiːnækænˈθɑləd͡ʒi"      (    + long marks)
   print(s_ipa.cns_vwl_str)                # "əˈkinæˌkænˈθɑləd͡ʒi"      (  + stress marks)
   print(s_ipa.cns_vwl_str_len)            # "əˈkiːnæˌkænˈθɑləd͡ʒi"     (    + length marks)
   print(s_ipa.cns_vwl_str_len_wb)         # "əˈkiːn æˌkænˈθɑləd͡ʒi"    (      + word breaks)
   print(s_ipa.cns_vwl_str_len_wb_sb)      # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"  (        + syllable breaks)
   cns = s_ipa.consonants                  # (store new IPA string)
   cns == s_ipa.consonants                 # False (two different objects)
   cns.is_equivalent(s_ipa.consonants)     # True
   cns.is_equivalent(s_ipa)                # False

   # print representation and name of all IPAChar objects in IPAString
   for c in s_ipa:
       print(u"%s\t%s" % (c, c.name))
   # ə vowel mid central unrounded
   # ˈ suprasegmental primary-stress
   # k consonant voiceless velar plosive
   # i vowel close front unrounded
   # ː suprasegmental long
   # n consonant voiced alveolar nasal
   #   suprasegmental word-break
   # æ vowel near-open front unrounded
   # ˌ suprasegmental secondary-stress
   # k consonant voiceless velar plosive
   # æ vowel near-open front unrounded
   # n consonant voiced alveolar nasal
   # ˈ suprasegmental primary-stress
   # θ consonant voiceless dental non-sibilant-fricative
   # ɑ vowel open back unrounded
   # . suprasegmental syllable-break
   # l consonant voiced alveolar lateral-approximant
   # ə vowel mid central unrounded
   # . suprasegmental syllable-break
   # d͡ʒ   consonant voiced palato-alveolar sibilant-affricate
   # i vowel close front unrounded

   # compare IPAString objects
   s_ipa_d = IPAString(unicode_string=u"diff")
   s_ipa_1 = IPAString(unicode_string=u"at͡ʃe")
   s_ipa_2 = IPAString(unicode_string=u"aʧe")
   s_ipa_3 = IPAString(unicode_string=u"at͡ʃe", single_char_parsing=True)
   s_ipa_d == s_ipa_1              # False
   s_ipa_1 == s_ipa_2              # False (different objects)
   s_ipa_1 == s_ipa_3              # False (different objects)
   s_ipa_2 == s_ipa_3              # False (different objects)
   s_ipa_d.is_equivalent(s_ipa_1)  # False
   s_ipa_1.is_equivalent(s_ipa_2)  # True
   s_ipa_2.is_equivalent(s_ipa_1)  # True
   s_ipa_1.is_equivalent(s_ipa_3)  # True
   s_ipa_2.is_equivalent(s_ipa_3)  # True

   # compare IPAString and list of IPAChar objects
   s_ipa_1.is_equivalent([my_a1, my_tS, my_ee])    # True

   # compare IPAString and Unicode string
   s_ipa_d.is_equivalent(u"diff")                  # True
   s_ipa_1.is_equivalent(u"atse")                  # False
   s_ipa_1.is_equivalent(u"atSe")                  # False
   s_ipa_1.is_equivalent(u"at͡ʃe")                  # True
   s_ipa_1.is_equivalent(u"at͜ʃe")                  # True
   s_ipa_1.is_equivalent(u"aʧe")                   # True
   s_ipa_1.is_equivalent(u"at͡ʃeLOL", ignore=True)  # True (ignore chars non IPA valid)
   s_ipa_1.is_equivalent(u"at͡ʃeLoL", ignore=True)  # False (ignore chars non IPA valid, note extra "o")


   ########################
   # CONVERSION FUNCTIONS #
   ########################
   from ipapy.kirshenbaummapper import KirshenbaumMapper
   kmapper = KirshenbaumMapper()                                    # mapper to Kirshenbaum ASCII IPA
   s_k_ipa = kmapper.map_ipa_string(s_ipa)                          # u"@'ki:n#&,k&n'TA#l@#dZi"
   s_k_uni = kmapper.map_unicode_string(s_uni)                      # u"@'ki:n#&,k&n'TA#l@#dZi"
   s_k_ipa == s_k_uni                                               # True
   s_k_lis = kmapper.map_unicode_string(s_uni, return_as_list=True) # [u'@', u"'", u'k', u'i', u':', u'n', u'#', u'&', u',', u'k', u'&', u'n', u"'", u'T', u'A', u'#', u'l', u'@', u'#', u'dZ', u'i']

   from ipapy.arpabetmapper import ARPABETMapper
   amapper = ARPABETMapper()                                                    # mapper to ARPABET ASCII IPA (stress marks not supported yet)
   s_a = amapper.map_unicode_string(u"pɹuːf")                                   # error: long suprasegmental not mapped
   s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True)                      # u"PRUWF"
   s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True, return_as_list=True) # [u'P', u'R', u'UW', u'F']

As A Command Line Tool
~~~~~~~~~~~~~~~~~~~~~~

**ipapy** comes with a command line tool to perform operations on a
given Unicode UTF-8 encoded string, representing an IPA string.
Therefore, it is recommended to run it on a shell supporting UTF-8.

Currently, the supported operations are:

-  ``canonize``: canonize the Unicode representation of the IPA string
-  ``chars``: list all IPA characters appearing in the IPA string
-  ``check``: check if the given Unicode string is IPA valid
-  ``clean``: remove characters that are not IPA valid
-  ``u2a``: print the corresponding ARPABET (ASCII IPA) string
-  ``u2k``: print the corresponding Kirshenbaum (ASCII IPA) string

Run with the ``--help`` parameter to list all the available options:

.. code:: bash

   $ python -m ipapy --help

   usage: __main__.py [-h] [-i] [-p] [--separator [SEPARATOR]] [-s] [-u]
                      command string

   ipapy perform a command on the given IPA/Unicode string

   positional arguments:
     command               [canonize|chars|check|clean|u2a|u2k]
     string                String to canonize, check, clean, or convert

   optional arguments:
     -h, --help            show this help message and exit
     -i, --ignore          Ignore Unicode characters that are not IPA valid
     -p, --print-invalid   Print Unicode characters that are not IPA valid
     --separator [SEPARATOR]
                           Print IPA chars separated by this character (default:
                           '')
     -s, --single-char-parsing
                           Perform single character parsing instead of maximal
                           parsing
     -u, --unicode         Print each Unicode character that is not IPA valid
                           with its Unicode codepoint and name

Examples:

.. code:: bash

   $ python -m ipapy canonize "eʧiu"
   et͡ʃiu

   $ python -m ipapy canonize "eʧiu" --separator " "
   e t͡ʃ i u

   $ python -m ipapy chars "eʧiu"
   'e' vowel close-mid front unrounded (U+0065)
   't͡ʃ'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
   'i' vowel close front unrounded (U+0069)
   'u' vowel close back rounded (U+0075)

   $ python -m ipapy chars "et͡ʃiu"
   'e' vowel close-mid front unrounded (U+0065)
   't͡ʃ'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
   'i' vowel close front unrounded (U+0069)
   'u' vowel close back rounded (U+0075)

   $ python -m ipapy chars "et͡ʃiu" -s
   'e' vowel close-mid front unrounded (U+0065)
   't' consonant voiceless alveolar plosive (U+0074)
   '͡' diacritic tie-bar-above (U+0361)
   'ʃ' consonant voiceless palato-alveolar sibilant-fricative (U+0283)
   'i' vowel close front unrounded (U+0069)
   'u' vowel close back rounded (U+0075)

   $ python -m ipapy check "eʧiu"
   True

   $ python -m ipapy check "LoL"
   False

   $ python -m ipapy check "LoL" -p
   False
   LL

   $ python -m ipapy check "LoLOL" -p -u
   False
   LLOL
   'L' 0x4c    LATIN CAPITAL LETTER L
   'O' 0x4f    LATIN CAPITAL LETTER O

   $ python -m ipapy clean "/eʧiu/"
   eʧiu

   $ python -m ipapy u2k "eʧiu"
   etSiu

   $ python -m ipapy u2k "eTa"
   The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.

   $ python -m ipapy u2k "eTa" -i
   ea

   $ python -m ipapy u2a "eʧiu" --separator " "
   EH CH IH UW

Unit Testing
------------

.. code:: bash

   $ python run_all_unit_tests.py

License
-------

**ipapy** is released under the MIT License.

Acknowledgments
---------------

-  Bram Vanroy provided a fix to ``setup.py`` for Windows users

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/pettarin/ipapy",
    "name": "ipapy",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "ipapy,International Phonetic Alphabet,IPA,ASCII IPA,ASCIIIPA,ASCII-IPA,Kirshenbaum,Kirshenbaum IPA,Unicode",
    "author": "Alberto Pettarin",
    "author_email": "alberto@albertopettarin.it",
    "download_url": "https://files.pythonhosted.org/packages/41/0d/7e8652df6af20a61bb3315f5c9d99fb9ea8f3779ff80fca9d71001230f90/ipapy-0.0.9.0.tar.gz",
    "platform": "",
    "description": "ipapy\n=====\n\n**ipapy** is a Python module to work with International Phonetic\nAlphabet (IPA) strings.\n\n-  Version: 0.0.9\n-  Date: 2019-05-05\n-  Developer: `Alberto Pettarin <http://www.albertopettarin.it/>`__\n-  License: the MIT License (MIT)\n-  Contact: `click here <http://www.albertopettarin.it/contact.html>`__\n-  Links: `GitHub <https://github.com/pettarin/ipapy>`__ -\n   `PyPI <https://pypi.python.org/pypi/ipapy>`__\n\nInstallation\n------------\n\n.. code:: bash\n\n   $ pip install ipapy\n\nor\n\n.. code:: bash\n\n   $ git clone https://github.com/pettarin/ipapy.git\n   $ cd ipapy\n\nUsage\n-----\n\nAs A Python Module\n~~~~~~~~~~~~~~~~~~\n\n.. code:: python\n\n   ###########\n   # IMPORTS #\n   ###########\n   from ipapy import UNICODE_TO_IPA\n   from ipapy import is_valid_ipa\n   from ipapy.ipachar import IPAConsonant\n   from ipapy.ipachar import IPAVowel\n   from ipapy.ipastring import IPAString\n\n\n   ###########\n   # IPAChar #\n   ###########\n\n   # Def.: an IPAChar is an IPA letter or diacritic/suprasegmental/tone mark\n\n   # create IPAChar from its Unicode representation\n   c1 = UNICODE_TO_IPA[u\"a\"]                   # vowel open front unrounded\n   c2 = UNICODE_TO_IPA[u\"e\"]                   # vowel close-mid front unrounded\n   c3 = UNICODE_TO_IPA[u\"\\u03B2\"]              # consonant voiced bilabial non-sibilant-fricative\n   tS1 = UNICODE_TO_IPA[u\"t\u0361\u0283\"]                 # consonant voiceless palato-alveolar sibilant-affricate\n   tS2 = UNICODE_TO_IPA[u\"t\u035c\u0283\"]                 # consonant voiceless palato-alveolar sibilant-affricate\n   tS3 = UNICODE_TO_IPA[u\"t\u0283\"]                 # consonant voiceless palato-alveolar sibilant-affricate\n   tS4 = UNICODE_TO_IPA[u\"\u02a7\"]                  # consonant voiceless palato-alveolar sibilant-affricate\n   tS5 = UNICODE_TO_IPA[u\"\\u0074\\u0361\\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n   tS6 = UNICODE_TO_IPA[u\"\\u0074\\u035C\\u0283\"] # consonant voiceless palato-alveolar sibilant-affricate\n   tS7 = UNICODE_TO_IPA[u\"\\u0074\\u0283\"]       # consonant voiceless palato-alveolar sibilant-affricate\n   tS8 = UNICODE_TO_IPA[u\"\\u02A7\"]             # consonant voiceless palato-alveolar sibilant-affricate\n   c1 == c2    # False\n   c1 == c3    # False\n   c1 == tS1   # False\n   tS1 == tS2  # True (they both point to the same IPAChar object)\n   tS1 == tS3  # True (idem)\n   tS1 == tS4  # True (idem)\n   tS1 == tS5  # True (idem)\n   tS1 == tS6  # True (idem)\n   tS1 == tS7  # True (idem)\n   tS1 == tS8  # True (idem)\n\n   # create custom IPAChars\n   my_a1 = IPAVowel(name=\"my_a_1\", descriptors=u\"open front unrounded\", unicode_repr=u\"a\")\n   my_a2 = IPAVowel(name=\"my_a_2\", descriptors=[u\"open\", \"front\", \"unrounded\"], unicode_repr=u\"a\")\n   my_a3 = IPAVowel(name=\"my_a_3\", height=u\"open\", backness=u\"front\", roundness=u\"unrounded\", unicode_repr=u\"a\")\n   my_a4 = IPAVowel(name=\"my_a_4\", descriptors=[u\"low\", u\"fnt\", \"unr\"], unicode_repr=u\"a\")\n   my_ee = IPAVowel(name=\"my_e_1\", descriptors=u\"close-mid front unrounded\", unicode_repr=u\"e\")\n   my_b1 = IPAConsonant(name=\"bilabial fricative\", descriptors=u\"voiced bilabial non-sibilant-fricative\", unicode_repr=u\"\\u03B2\")\n   my_b2 = IPAConsonant(name=\"bf\", voicing=u\"voiced\", place=u\"bilabial\", manner=u\"non-sibilant-fricative\", unicode_repr=u\"\\u03B2\")\n   my_tS = IPAConsonant(name=\"tS\", voicing=u\"voiceless\", place=u\"palato-alveolar\", manner=u\"sibilant-affricate\", unicode_repr=u\"t\u0361\u0283\")\n   my_a1 == my_a2                  # False (two different objects)\n   my_a1 == c1                     # False (two different objects)\n   my_a1 == UNICODE_TO_IPA[\"a\"]    # False (two different objects)\n\n   # associate non-standard Unicode representation\n   my_aa = IPAVowel(name=\"a special\", descriptors=[u\"low\", u\"fnt\", u\"unr\"], unicode_repr=u\"a{*}\")\n   print(my_aa)    # \"a{*}\"\n\n   # equality vs. equivalence\n   my_tS == tS1                # False (my_tS is a different object than tS1)\n   my_tS.is_equivalent(tS1)    # True  (my_tS is equivalent to tS1...)\n   tS1.is_equivalent(my_tS)    # True  (... and vice versa)\n\n   # compare IPAChar objects\n   my_a1.is_equivalent(my_a2)  # True\n   my_a1.is_equivalent(my_a3)  # True\n   my_a1.is_equivalent(my_a4)  # True\n   my_a1.is_equivalent(my_ee)  # False\n   my_a1.is_equivalent(my_b1)  # False\n   my_b1.is_equivalent(my_b2)  # True\n   my_b1.is_equivalent(my_tS)  # False\n\n   # compare IPAChar and a Unicode string\n   my_b1.is_equivalent(u\"\\u03B2\")  # True\n   my_b1.is_equivalent(u\"\u03b2\")       # True\n   my_b1.is_equivalent(u\"b\")       # False\n   my_tS.is_equivalent(u\"tS\")      # False\n   my_tS.is_equivalent(u\"t\u0283\")      # False (missing the combining diacritic)\n   my_tS.is_equivalent(u\"t\u0361\u0283\")      # True (has combining diacritic)\n\n   # compare IPAChar and a string listing descriptors\n   my_a1.is_equivalent(u\"open front unrounded\")                                # False (missing 'vowel')\n   my_a1.is_equivalent(u\"open front unrounded vowel\")                          # True\n   my_a1.is_equivalent(u\"low fnt unr vwl\")                                     # True (known abbreviations are good as well)\n   my_ee.is_equivalent(u\"open front unrounded vowel\")                          # False\n   my_b1.is_equivalent(u\"voiced bilabial non-sibilant-fricative\")              # False (missing 'consonant')\n   my_b1.is_equivalent(u\"voiced bilabial non-sibilant-fricative consonant\")    # True\n   my_b1.is_equivalent(u\"consonant non-sibilant-fricative bilabial voiced\")    # True (the order does not matter)\n   my_b1.is_equivalent(u\"consonant non-sibilant-fricative bilabial voiceless\") # False\n\n   # compare IPAChar and list of descriptors\n   my_a1.is_equivalent([u\"open\", u\"front\", u\"unrounded\"])              # False\n   my_a1.is_equivalent([u\"vowel\", u\"open\", u\"front\", u\"unrounded\"])    # True\n   my_a1.is_equivalent([u\"open\", u\"unrounded\", u\"vowel\", u\"front\"])    # True\n   my_a1.is_equivalent([u\"low\", u\"fnt\", u\"unr\", u\"vwl\"])               # True\n\n\n   #############\n   # IPAString #\n   #############\n\n   # Def.: an IPAString is a list of IPAChar objects\n\n   # check if Unicode string contains only IPA valid characters\n   s_uni = u\"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\"   # Unicode string of the IPA pronunciation for \"achene acanthology\"\n   is_valid_ipa(s_uni)                 # True\n   is_valid_ipa(u\"LoL\")                # False (uppercase letter L is not IPA valid)\n\n   # create IPAString from list of IPAChar objects\n   new_s_ipa = IPAString(ipa_chars=[c3, c2, tS1, c1])\n\n   # create IPAString from Unicode string\n   s_ipa = IPAString(unicode_string=s_uni)\n\n   # IPAString is similar to regular Python string object\n   print(s_ipa)                            # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\"\n   len(s_ipa)                              # 21\n   s_ipa[0]                                # (first IPA char)\n   s_ipa[5:8]                              # (6th, 7th, 8th IPA chars)\n   s_ipa[19:]                              # (IPA chars from the 20th)\n   s_ipa[-1]                               # (last IPA char)\n   len(new_s_ipa)                          # 4\n   new_s_ipa.append(UNICODE_TO_IPA[u\"a\"])  # (append IPA char \"a\")\n   len(new_s_ipa)                          # 5\n   new_s_ipa.append(UNICODE_TO_IPA[u\"t\u0361\u0283\"]) # (append IPA char \"t\u0361\u0283\")\n   len(new_s_ipa)                          # 6\n   new_s_ipa.extend(s_ipa)                 # (append s_ipa to new_s_ipa)\n   len(new_s_ipa)                          # 27\n   double = s_ipa + new_s_ipa              # (concatenate s_ipa and new_s_ipa)\n   len(double)                             # 48\n\n   # new IPAString objects containing only...\n   print(s_ipa.consonants)                 # \"knkn\u03b8ld\u0361\u0292\"                (consonants)\n   print(s_ipa.vowels)                     # \"\u0259i\u00e6\u00e6\u0251\u0259i\"                 (vowels)\n   print(s_ipa.letters)                    # \"\u0259kin\u00e6k\u00e6n\u03b8\u0251l\u0259d\u0361\u0292i\"         (vowels and consonants)\n   print(s_ipa.cns_vwl)                    # \"\u0259kin\u00e6k\u00e6n\u03b8\u0251l\u0259d\u0361\u0292i\"         (vowels and consonants)\n   print(s_ipa.cns_vwl_pstr)               # \"\u0259\u02c8kin\u00e6k\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\"       (  + primary stress marks)\n   print(s_ipa.cns_vwl_pstr_long)          # \"\u0259\u02c8ki\u02d0n\u00e6k\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\"      (    + long marks)\n   print(s_ipa.cns_vwl_str)                # \"\u0259\u02c8kin\u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\"      (  + stress marks)\n   print(s_ipa.cns_vwl_str_len)            # \"\u0259\u02c8ki\u02d0n\u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\"     (    + length marks)\n   print(s_ipa.cns_vwl_str_len_wb)         # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251l\u0259d\u0361\u0292i\"    (      + word breaks)\n   print(s_ipa.cns_vwl_str_len_wb_sb)      # \"\u0259\u02c8ki\u02d0n \u00e6\u02cck\u00e6n\u02c8\u03b8\u0251.l\u0259.d\u0361\u0292i\"  (        + syllable breaks)\n   cns = s_ipa.consonants                  # (store new IPA string)\n   cns == s_ipa.consonants                 # False (two different objects)\n   cns.is_equivalent(s_ipa.consonants)     # True\n   cns.is_equivalent(s_ipa)                # False\n\n   # print representation and name of all IPAChar objects in IPAString\n   for c in s_ipa:\n       print(u\"%s\\t%s\" % (c, c.name))\n   # \u0259 vowel mid central unrounded\n   # \u02c8 suprasegmental primary-stress\n   # k consonant voiceless velar plosive\n   # i vowel close front unrounded\n   # \u02d0 suprasegmental long\n   # n consonant voiced alveolar nasal\n   #   suprasegmental word-break\n   # \u00e6 vowel near-open front unrounded\n   # \u02cc suprasegmental secondary-stress\n   # k consonant voiceless velar plosive\n   # \u00e6 vowel near-open front unrounded\n   # n consonant voiced alveolar nasal\n   # \u02c8 suprasegmental primary-stress\n   # \u03b8 consonant voiceless dental non-sibilant-fricative\n   # \u0251 vowel open back unrounded\n   # . suprasegmental syllable-break\n   # l consonant voiced alveolar lateral-approximant\n   # \u0259 vowel mid central unrounded\n   # . suprasegmental syllable-break\n   # d\u0361\u0292   consonant voiced palato-alveolar sibilant-affricate\n   # i vowel close front unrounded\n\n   # compare IPAString objects\n   s_ipa_d = IPAString(unicode_string=u\"diff\")\n   s_ipa_1 = IPAString(unicode_string=u\"at\u0361\u0283e\")\n   s_ipa_2 = IPAString(unicode_string=u\"a\u02a7e\")\n   s_ipa_3 = IPAString(unicode_string=u\"at\u0361\u0283e\", single_char_parsing=True)\n   s_ipa_d == s_ipa_1              # False\n   s_ipa_1 == s_ipa_2              # False (different objects)\n   s_ipa_1 == s_ipa_3              # False (different objects)\n   s_ipa_2 == s_ipa_3              # False (different objects)\n   s_ipa_d.is_equivalent(s_ipa_1)  # False\n   s_ipa_1.is_equivalent(s_ipa_2)  # True\n   s_ipa_2.is_equivalent(s_ipa_1)  # True\n   s_ipa_1.is_equivalent(s_ipa_3)  # True\n   s_ipa_2.is_equivalent(s_ipa_3)  # True\n\n   # compare IPAString and list of IPAChar objects\n   s_ipa_1.is_equivalent([my_a1, my_tS, my_ee])    # True\n\n   # compare IPAString and Unicode string\n   s_ipa_d.is_equivalent(u\"diff\")                  # True\n   s_ipa_1.is_equivalent(u\"atse\")                  # False\n   s_ipa_1.is_equivalent(u\"atSe\")                  # False\n   s_ipa_1.is_equivalent(u\"at\u0361\u0283e\")                  # True\n   s_ipa_1.is_equivalent(u\"at\u035c\u0283e\")                  # True\n   s_ipa_1.is_equivalent(u\"a\u02a7e\")                   # True\n   s_ipa_1.is_equivalent(u\"at\u0361\u0283eLOL\", ignore=True)  # True (ignore chars non IPA valid)\n   s_ipa_1.is_equivalent(u\"at\u0361\u0283eLoL\", ignore=True)  # False (ignore chars non IPA valid, note extra \"o\")\n\n\n   ########################\n   # CONVERSION FUNCTIONS #\n   ########################\n   from ipapy.kirshenbaummapper import KirshenbaumMapper\n   kmapper = KirshenbaumMapper()                                    # mapper to Kirshenbaum ASCII IPA\n   s_k_ipa = kmapper.map_ipa_string(s_ipa)                          # u\"@'ki:n#&,k&n'TA#l@#dZi\"\n   s_k_uni = kmapper.map_unicode_string(s_uni)                      # u\"@'ki:n#&,k&n'TA#l@#dZi\"\n   s_k_ipa == s_k_uni                                               # True\n   s_k_lis = kmapper.map_unicode_string(s_uni, return_as_list=True) # [u'@', u\"'\", u'k', u'i', u':', u'n', u'#', u'&', u',', u'k', u'&', u'n', u\"'\", u'T', u'A', u'#', u'l', u'@', u'#', u'dZ', u'i']\n\n   from ipapy.arpabetmapper import ARPABETMapper\n   amapper = ARPABETMapper()                                                    # mapper to ARPABET ASCII IPA (stress marks not supported yet)\n   s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\")                                   # error: long suprasegmental not mapped\n   s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\", ignore=True)                      # u\"PRUWF\"\n   s_a = amapper.map_unicode_string(u\"p\u0279u\u02d0f\", ignore=True, return_as_list=True) # [u'P', u'R', u'UW', u'F']\n\nAs A Command Line Tool\n~~~~~~~~~~~~~~~~~~~~~~\n\n**ipapy** comes with a command line tool to perform operations on a\ngiven Unicode UTF-8 encoded string, representing an IPA string.\nTherefore, it is recommended to run it on a shell supporting UTF-8.\n\nCurrently, the supported operations are:\n\n-  ``canonize``: canonize the Unicode representation of the IPA string\n-  ``chars``: list all IPA characters appearing in the IPA string\n-  ``check``: check if the given Unicode string is IPA valid\n-  ``clean``: remove characters that are not IPA valid\n-  ``u2a``: print the corresponding ARPABET (ASCII IPA) string\n-  ``u2k``: print the corresponding Kirshenbaum (ASCII IPA) string\n\nRun with the ``--help`` parameter to list all the available options:\n\n.. code:: bash\n\n   $ python -m ipapy --help\n\n   usage: __main__.py [-h] [-i] [-p] [--separator [SEPARATOR]] [-s] [-u]\n                      command string\n\n   ipapy perform a command on the given IPA/Unicode string\n\n   positional arguments:\n     command               [canonize|chars|check|clean|u2a|u2k]\n     string                String to canonize, check, clean, or convert\n\n   optional arguments:\n     -h, --help            show this help message and exit\n     -i, --ignore          Ignore Unicode characters that are not IPA valid\n     -p, --print-invalid   Print Unicode characters that are not IPA valid\n     --separator [SEPARATOR]\n                           Print IPA chars separated by this character (default:\n                           '')\n     -s, --single-char-parsing\n                           Perform single character parsing instead of maximal\n                           parsing\n     -u, --unicode         Print each Unicode character that is not IPA valid\n                           with its Unicode codepoint and name\n\nExamples:\n\n.. code:: bash\n\n   $ python -m ipapy canonize \"e\u02a7iu\"\n   et\u0361\u0283iu\n\n   $ python -m ipapy canonize \"e\u02a7iu\" --separator \" \"\n   e t\u0361\u0283 i u\n\n   $ python -m ipapy chars \"e\u02a7iu\"\n   'e' vowel close-mid front unrounded (U+0065)\n   't\u0361\u0283'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)\n   'i' vowel close front unrounded (U+0069)\n   'u' vowel close back rounded (U+0075)\n\n   $ python -m ipapy chars \"et\u0361\u0283iu\"\n   'e' vowel close-mid front unrounded (U+0065)\n   't\u0361\u0283'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)\n   'i' vowel close front unrounded (U+0069)\n   'u' vowel close back rounded (U+0075)\n\n   $ python -m ipapy chars \"et\u0361\u0283iu\" -s\n   'e' vowel close-mid front unrounded (U+0065)\n   't' consonant voiceless alveolar plosive (U+0074)\n   '\u0361' diacritic tie-bar-above (U+0361)\n   '\u0283' consonant voiceless palato-alveolar sibilant-fricative (U+0283)\n   'i' vowel close front unrounded (U+0069)\n   'u' vowel close back rounded (U+0075)\n\n   $ python -m ipapy check \"e\u02a7iu\"\n   True\n\n   $ python -m ipapy check \"LoL\"\n   False\n\n   $ python -m ipapy check \"LoL\" -p\n   False\n   LL\n\n   $ python -m ipapy check \"LoLOL\" -p -u\n   False\n   LLOL\n   'L' 0x4c    LATIN CAPITAL LETTER L\n   'O' 0x4f    LATIN CAPITAL LETTER O\n\n   $ python -m ipapy clean \"/e\u02a7iu/\"\n   e\u02a7iu\n\n   $ python -m ipapy u2k \"e\u02a7iu\"\n   etSiu\n\n   $ python -m ipapy u2k \"eTa\"\n   The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.\n\n   $ python -m ipapy u2k \"eTa\" -i\n   ea\n\n   $ python -m ipapy u2a \"e\u02a7iu\" --separator \" \"\n   EH CH IH UW\n\nUnit Testing\n------------\n\n.. code:: bash\n\n   $ python run_all_unit_tests.py\n\nLicense\n-------\n\n**ipapy** is released under the MIT License.\n\nAcknowledgments\n---------------\n\n-  Bram Vanroy provided a fix to ``setup.py`` for Windows users\n\n\n",
    "bugtrack_url": null,
    "license": "MIT License",
    "summary": "ipapy is a Python module to work with IPA strings",
    "version": "0.0.9.0",
    "split_keywords": [
        "ipapy",
        "international phonetic alphabet",
        "ipa",
        "ascii ipa",
        "asciiipa",
        "ascii-ipa",
        "kirshenbaum",
        "kirshenbaum ipa",
        "unicode"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "6d88c2fcc87174fc67a6b829f0bf2565",
                "sha256": "b96d0435282103c7d893c8226a458b70a810d130ce65fabe127c8a7490d1f82b"
            },
            "downloads": -1,
            "filename": "ipapy-0.0.9.0-py2-none-any.whl",
            "has_sig": false,
            "md5_digest": "6d88c2fcc87174fc67a6b829f0bf2565",
            "packagetype": "bdist_wheel",
            "python_version": "py2",
            "requires_python": null,
            "size": 38724,
            "upload_time": "2019-05-05T22:27:35",
            "upload_time_iso_8601": "2019-05-05T22:27:35.890892Z",
            "url": "https://files.pythonhosted.org/packages/5d/b6/c170e49cd5d3aaa8cbbe3c836d2fe09c72f08cf3b8ea1e4b4f81fed7881b/ipapy-0.0.9.0-py2-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "md5": "4c87b745b63f5a69571815a979af9cc6",
                "sha256": "e1bc73f6a4861b9a0ff562b70b87dab8acf7a63badd98caabd6e248b3839f1c9"
            },
            "downloads": -1,
            "filename": "ipapy-0.0.9.0.tar.gz",
            "has_sig": false,
            "md5_digest": "4c87b745b63f5a69571815a979af9cc6",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 37415,
            "upload_time": "2019-05-05T22:27:37",
            "upload_time_iso_8601": "2019-05-05T22:27:37.753585Z",
            "url": "https://files.pythonhosted.org/packages/41/0d/7e8652df6af20a61bb3315f5c9d99fb9ea8f3779ff80fca9d71001230f90/ipapy-0.0.9.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2019-05-05 22:27:37",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "github_user": "pettarin",
    "github_project": "ipapy",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "ipapy"
}

Alberto Pettarin