abydos


Nameabydos JSON
Version 0.5.0 PyPI version JSON
download
home_pagehttps://github.com/chrislit/abydos
SummaryAbydos NLP/IR library
upload_time2020-01-11 00:00:29
maintainer
docs_urlNone
authorChristopher C. Little
requires_python~=3.5
licenseGPLv3+
keywords nlp ai ir language linguistics phonetic algorithms string distance
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI
coveralls test coverage
            Abydos
======

+------------------+------------------------------------------------------+
| CI & Test Status | |travis| |circle| |azure| |semaphore| |coveralls|    |
+------------------+------------------------------------------------------+
| Code Quality     | |codeclimate| |scrutinizer| |codacy| |codefactor|    |
+------------------+------------------------------------------------------+
| Dependencies     | |requires| |snyk| |pyup| |cii|                       |
+------------------+------------------------------------------------------+
| Local Analysis   | |pylint| |flake8| |pydocstyle| |sloccount| |black|   |
+------------------+------------------------------------------------------+
| Usage            | |docs| |mybinder| |license| |sourcerank| |zenodo|    |
+------------------+------------------------------------------------------+
| Contribution     | |openhub| |gh-commits| |gh-issues| |gh-stars|        |
+------------------+------------------------------------------------------+
| PyPI             | |pypi| |pypi-dl| |pypi-ver|                          |
+------------------+------------------------------------------------------+
| conda-forge      | |conda| |conda-dl| |conda-platforms|                 |
+------------------+------------------------------------------------------+

.. |travis| image:: https://travis-ci.org/chrislit/abydos.svg?branch=master
    :target: https://travis-ci.org/chrislit/abydos
    :alt: Travis-CI Build Status

.. |circle| image:: https://circleci.com/gh/chrislit/abydos/tree/master.svg?style=shield
    :target: https://circleci.com/gh/chrislit/abydos/tree/master
    :alt: Circle-CI Build Status

.. |azure| image:: https://dev.azure.com/chrislit/abydos/_apis/build/status/chrislit.abydos?branchName=master
    :target: https://dev.azure.com/chrislit/abydos/_build/latest?definitionId=1
    :alt: Azure Pipelines Build Status

.. |semaphore| image:: https://semaphoreci.com/api/v1/chrislit/abydos/branches/master/shields_badge.svg
    :target: https://semaphoreci.com/chrislit/abydos
    :alt: Semaphore Build Status

.. |coveralls| image:: https://coveralls.io/repos/github/chrislit/abydos/badge.svg?branch=master
    :target: https://coveralls.io/github/chrislit/abydos?branch=master
    :alt: Coverage Status

.. |codeclimate| image:: https://codeclimate.com/github/chrislit/abydos/badges/gpa.svg
    :target: https://codeclimate.com/github/chrislit/abydos
    :alt: Code Climate

.. |scrutinizer| image:: https://scrutinizer-ci.com/g/chrislit/abydos/badges/quality-score.png?b=master
    :target: https://scrutinizer-ci.com/g/chrislit/abydos/?branch=master
    :alt: Scrutinizer

.. |codacy| image:: https://api.codacy.com/project/badge/Grade/db79f2c31ea142fb9b5938abe87b0854
    :target: https://www.codacy.com/app/chrislit/abydos?utm_source=github.com&utm_medium=referral&utm_content=chrislit/abydos&utm_campaign=Badge_Grade
    :alt: Codacy

.. |codefactor| image:: https://www.codefactor.io/repository/github/chrislit/abydos/badge
    :target: https://www.codefactor.io/repository/github/chrislit/abydos
    :alt: CodeFactor

.. |requires| image:: https://requires.io/github/chrislit/abydos/requirements.svg?branch=master
    :target: https://requires.io/github/chrislit/abydos/requirements/?branch=master
    :alt: Requirements Status

.. |snyk| image:: https://snyk.io/test/github/chrislit/abydos/badge.svg?targetFile=requirements.txt
    :target: https://snyk.io/test/github/chrislit/abydos?targetFile=requirements.txt
    :alt: Known Vulnerabilities

.. |pyup| image:: https://pyup.io/repos/github/chrislit/abydos/shield.svg
    :target: https://pyup.io/repos/github/chrislit/abydos/
    :alt: Updates

.. |cii| image:: https://bestpractices.coreinfrastructure.org/projects/1598/badge
    :target: https://bestpractices.coreinfrastructure.org/projects/1598
    :alt: CII Best Practices

.. |pylint| image:: https://img.shields.io/badge/Pylint-9.09/10-yellowgreen.svg
    :target: #
    :alt: Pylint Score

.. |flake8| image:: https://img.shields.io/badge/flake8-0-brightgreen.svg
    :target: #
    :alt: flake8 Errors

.. |pydocstyle| image:: https://img.shields.io/badge/pydocstyle-0-brightgreen.svg
    :target: #
    :alt: pydocstyle Errors

.. |sloccount| image:: https://img.shields.io/badge/SLOCCount-40,610-blue.svg
    :target: #
    :alt: SLOCCount

.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg
    :target: https://github.com/ambv/black
    :alt: black

.. |docs| image:: https://readthedocs.org/projects/abydos/badge/?version=latest
    :target: https://abydos.readthedocs.org/en/latest/
    :alt: Documentation Status

.. |mybinder| image:: https://img.shields.io/badge/launch-binder-579aca.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFkAAABZCAMAAABi1XidAAAB8lBMVEX///9XmsrmZYH1olJXmsr1olJXmsrmZYH1olJXmsr1olJXmsrmZYH1olL1olJXmsr1olJXmsrmZYH1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olJXmsrmZYH1olL1olL0nFf1olJXmsrmZYH1olJXmsq8dZb1olJXmsrmZYH1olJXmspXmspXmsr1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olLeaIVXmsrmZYH1olL1olL1olJXmsrmZYH1olLna31Xmsr1olJXmsr1olJXmsrmZYH1olLqoVr1olJXmsr1olJXmsrmZYH1olL1olKkfaPobXvviGabgadXmsqThKuofKHmZ4Dobnr1olJXmsr1olJXmspXmsr1olJXmsrfZ4TuhWn1olL1olJXmsqBi7X1olJXmspZmslbmMhbmsdemsVfl8ZgmsNim8Jpk8F0m7R4m7F5nLB6jbh7jbiDirOEibOGnKaMhq+PnaCVg6qWg6qegKaff6WhnpKofKGtnomxeZy3noG6dZi+n3vCcpPDcpPGn3bLb4/Mb47UbIrVa4rYoGjdaIbeaIXhoWHmZYHobXvpcHjqdHXreHLroVrsfG/uhGnuh2bwj2Hxk17yl1vzmljzm1j0nlX1olL3AJXWAAAAbXRSTlMAEBAQHx8gICAuLjAwMDw9PUBAQEpQUFBXV1hgYGBkcHBwcXl8gICAgoiIkJCQlJicnJ2goKCmqK+wsLC4usDAwMjP0NDQ1NbW3Nzg4ODi5+3v8PDw8/T09PX29vb39/f5+fr7+/z8/Pz9/v7+zczCxgAABC5JREFUeAHN1ul3k0UUBvCb1CTVpmpaitAGSLSpSuKCLWpbTKNJFGlcSMAFF63iUmRccNG6gLbuxkXU66JAUef/9LSpmXnyLr3T5AO/rzl5zj137p136BISy44fKJXuGN/d19PUfYeO67Znqtf2KH33Id1psXoFdW30sPZ1sMvs2D060AHqws4FHeJojLZqnw53cmfvg+XR8mC0OEjuxrXEkX5ydeVJLVIlV0e10PXk5k7dYeHu7Cj1j+49uKg7uLU61tGLw1lq27ugQYlclHC4bgv7VQ+TAyj5Zc/UjsPvs1sd5cWryWObtvWT2EPa4rtnWW3JkpjggEpbOsPr7F7EyNewtpBIslA7p43HCsnwooXTEc3UmPmCNn5lrqTJxy6nRmcavGZVt/3Da2pD5NHvsOHJCrdc1G2r3DITpU7yic7w/7Rxnjc0kt5GC4djiv2Sz3Fb2iEZg41/ddsFDoyuYrIkmFehz0HR2thPgQqMyQYb2OtB0WxsZ3BeG3+wpRb1vzl2UYBog8FfGhttFKjtAclnZYrRo9ryG9uG/FZQU4AEg8ZE9LjGMzTmqKXPLnlWVnIlQQTvxJf8ip7VgjZjyVPrjw1te5otM7RmP7xm+sK2Gv9I8Gi++BRbEkR9EBw8zRUcKxwp73xkaLiqQb+kGduJTNHG72zcW9LoJgqQxpP3/Tj//c3yB0tqzaml05/+orHLksVO+95kX7/7qgJvnjlrfr2Ggsyx0eoy9uPzN5SPd86aXggOsEKW2Prz7du3VID3/tzs/sSRs2w7ovVHKtjrX2pd7ZMlTxAYfBAL9jiDwfLkq55Tm7ifhMlTGPyCAs7RFRhn47JnlcB9RM5T97ASuZXIcVNuUDIndpDbdsfrqsOppeXl5Y+XVKdjFCTh+zGaVuj0d9zy05PPK3QzBamxdwtTCrzyg/2Rvf2EstUjordGwa/kx9mSJLr8mLLtCW8HHGJc2R5hS219IiF6PnTusOqcMl57gm0Z8kanKMAQg0qSyuZfn7zItsbGyO9QlnxY0eCuD1XL2ys/MsrQhltE7Ug0uFOzufJFE2PxBo/YAx8XPPdDwWN0MrDRYIZF0mSMKCNHgaIVFoBbNoLJ7tEQDKxGF0kcLQimojCZopv0OkNOyWCCg9XMVAi7ARJzQdM2QUh0gmBozjc3Skg6dSBRqDGYSUOu66Zg+I2fNZs/M3/f/Grl/XnyF1Gw3VKCez0PN5IUfFLqvgUN4C0qNqYs5YhPL+aVZYDE4IpUk57oSFnJm4FyCqqOE0jhY2SMyLFoo56zyo6becOS5UVDdj7Vih0zp+tcMhwRpBeLyqtIjlJKAIZSbI8SGSF3k0pA3mR5tHuwPFoa7N7reoq2bqCsAk1HqCu5uvI1n6JuRXI+S1Mco54YmYTwcn6Aeic+kssXi8XpXC4V3t7/ADuTNKaQJdScAAAAAElFTkSuQmCC
    :target: https://mybinder.org/v2/gh/chrislit/abydos/master?filepath=binder
    :alt: Binder

.. |license| image:: https://img.shields.io/badge/License-GPL%20v3+-blue.svg?logo=gnu
    :target: https://www.gnu.org/licenses/gpl-3.0
    :alt: License: GPL v3.0+

.. |sourcerank| image:: https://img.shields.io/librariesio/sourcerank/pypi/abydos.svg
    :target: https://libraries.io/pypi/abydos
    :alt: Libraries.io SourceRank

.. |zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3603514.svg
    :target: https://doi.org/10.5281/zenodo.3603514
    :alt: Zenodo

.. |openhub| image:: https://www.openhub.net/p/abydosnlp/widgets/project_thin_badge.gif
    :target: https://www.openhub.net/p/abydosnlp
    :alt: OpenHUB

.. |gh-commits| image:: https://img.shields.io/github/commit-activity/y/chrislit/abydos.svg?logo=github
    :target: https://github.com/chrislit/abydos/graphs/commit-activity
    :alt: GitHub Commits

.. |gh-issues| image:: https://img.shields.io/github/issues-closed/chrislit/abydos.svg?logo=github
    :target: https://github.com/chrislit/abydos/issues?q=
    :alt: GitHub Issues Closed

.. |gh-stars| image:: https://img.shields.io/github/stars/chrislit/abydos.svg?logo=github
    :target: https://github.com/chrislit/abydos/stargazers
    :alt: GitHub Stars

.. |pypi| image:: https://img.shields.io/pypi/v/abydos.svg?logo=python&logoColor=white
    :target: https://pypi.python.org/pypi/abydos
    :alt: PyPI

.. |pypi-dl| image:: https://img.shields.io/pypi/dm/abydos.svg?logo=python&logoColor=white
    :target: https://pypi.python.org/pypi/abydos
    :alt: PyPI downloads/month

.. |pypi-ver| image:: https://img.shields.io/pypi/pyversions/abydos.svg?logo=python&logoColor=white
    :target: https://pypi.python.org/pypi/abydos
    :alt: PyPI versions

.. |conda| image:: https://img.shields.io/conda/vn/conda-forge/abydos.svg?logo=conda-forge
    :target: https://anaconda.org/conda-forge/abydos
    :alt: conda-forge

.. |conda-dl| image:: 	https://img.shields.io/conda/dn/conda-forge/abydos.svg?logo=conda-forge
    :target: https://anaconda.org/conda-forge/abydos
    :alt: conda-forge downloads

.. |conda-platforms| image:: https://img.shields.io/conda/pn/conda-forge/abydos.svg?logo=conda-forge
    :target: https://anaconda.org/conda-forge/abydos
    :alt: conda-forge platforms

|

.. image:: https://raw.githubusercontent.com/chrislit/abydos/master/abydos-small.png
    :target: https://github.com/chrislit/abydos
    :alt: abydos
    :align: right

|
| `Abydos NLP/IR library <https://github.com/chrislit/abydos>`_
| Copyright 2014-2020 by Christopher C. Little

Abydos is a library of phonetic algorithms, string distance measures & metrics,
stemmers, and string fingerprinters including:

- Phonetic algorithms
    - Robert C. Russell's Index
    - American Soundex
    - Refined Soundex
    - Daitch-Mokotoff Soundex
    - Kölner Phonetik
    - NYSIIS
    - Match Rating Algorithm
    - Metaphone
    - Double Metaphone
    - Caverphone
    - Alpha Search Inquiry System
    - Fuzzy Soundex
    - Phonex
    - Phonem
    - Phonix
    - SfinxBis
    - phonet
    - Standardized Phonetic Frequency Code
    - Statistics Canada
    - Lein
    - Roger Root
    - Oxford Name Compression Algorithm (ONCA)
    - Eudex phonetic hash
    - Haase Phonetik
    - Reth-Schek Phonetik
    - FONEM
    - Parmar-Kumbharana
    - Davidson's Consonant Code
    - SoundD
    - PSHP Soundex/Viewex Coding
    - an early version of Henry Code
    - Norphone
    - Dolby Code
    - Phonetic Spanish
    - Spanish Metaphone
    - MetaSoundex
    - SoundexBR
    - NRL English-to-phoneme
    - Beider-Morse Phonetic Matching

- String distance metrics
    - Levenshtein distance
    - Optimal String Alignment distance
    - Levenshtein-Damerau distance
    - Hamming distance
    - Tversky index
    - Sørensen–Dice coefficient & distance
    - Jaccard similarity coefficient & distance
    - overlap similarity & distance
    - Tanimoto coefficient & distance
    - Minkowski distance & similarity
    - Manhattan distance & similarity
    - Euclidean distance & similarity
    - Chebyshev distance
    - cosine similarity & distance
    - Jaro distance
    - Jaro-Winkler distance (incl. the strcmp95 algorithm variant)
    - Longest common substring
    - Ratcliff-Obershelp similarity & distance
    - Match Rating Algorithm similarity
    - Normalized Compression Distance (NCD) & similarity
    - Monge-Elkan similarity & distance
    - Matrix similarity
    - Needleman-Wunsch score
    - Smith-Waterman score
    - Gotoh score
    - Length similarity
    - Prefix, Suffix, and Identity similarity & distance
    - Modified Language-Independent Product Name Search (MLIPNS) similarity &
      distance
    - Bag distance
    - Editex distance
    - Eudex distances
    - Sift4 distance
    - Baystat distance & similarity
    - Typo distance
    - Indel distance
    - Synoname

- Stemmers
    - the Lovins stemmer
    - the Porter and Porter2 (Snowball English) stemmers
    - Snowball stemmers for German, Dutch, Norwegian, Swedish, and Danish
    - CLEF German, German plus, and Swedish stemmers
    - Caumann's German stemmer
    - UEA-Lite Stemmer
    - Paice-Husk Stemmer
    - Schinke Latin stemmer
    - S stemmer

- String Fingerprints
    - string fingerprint
    - q-gram fingerprint
    - phonetic fingerprint
    - Pollock & Zomora's skeleton key
    - Pollock & Zomora's omission key
    - Cisłak & Grabowski's occurrence fingerprint
    - Cisłak & Grabowski's occurrence halved fingerprint
    - Cisłak & Grabowski's count fingerprint
    - Cisłak & Grabowski's position fingerprint
    - Synoname Toolcode


-----

Installation
============

Required libraries:

- NumPy
- deprecation

Optional libraries (all available on PyPI, some available on conda or
conda-forge):

- `SyllabiPy <http://syllabipy.com/>`_
- `NLTK <https://www.nltk.org/>`_
- `PyLZSS <https://github.com/rumbah/pylzss>`_
- `paq <https://github.com/observerss/paq>`_


To install Abydos (master) from Github source::

   git clone https://github.com/chrislit/abydos.git --recursive
   cd abydos
   python setup install

If your default python command calls Python 2.7 but you want to install for
Python 3, you may instead need to call::

   python3 setup install


To install Abydos (latest release) from PyPI using pip::

   pip install abydos

To install from `conda-forge <https://anaconda.org/conda-forge/abydos>`_::

   conda install abydos

It should run on Python 3.5-3.8.

Testing & Contributing
======================

To run the whole test-suite just call tox::

    tox

The tox setup has the following environments: black, py37, doctest,
regression, fuzz, pylint, pydocstyle, flake8, doc8, docs, sloccount, badges, &
build. So if you only want to generate documentation (in HTML, EPUB, & PDF
formats), just call::

    tox -e docs

In order to only run & generate Flake8 reports, call::

    tox -e flake8

Contributions such as bug reports, PRs, suggestions, desired new features, etc.
are welcome through Github
`Issues <https://github.com/chrislit/abydos/issues>`_ &
`Pull requests <https://github.com/chrislit/abydos/pulls>`_.


Release History
---------------

0.5.0 (2020-01-10) *ecgtheow*
+++++++++++++++++++++++++++++

doi:10.5281/zenodo.3603514

Changes:

- Support for Python 2.7 was removed.


0.4.1 (2020-01-07) *distant dietrich*
+++++++++++++++++++++++++++++++++++++

doi:10.5281/zenodo.3600548

Changes:

- Support for Python 3.4 was removed. (3.4 reached end-of-life on March 18,
  2019)
- Fuzzy intersections were corrected to avoid over-counting partial
  intersection instances.
- Levenshtein can now return an optimal alignment
- Added the following distance measures:
    - Indice de Similitude-Guth (ISG)
    - INClusion Programme
    - Guth
    - Victorian Panel Study (VPS) score
    - LIG3 similarity
    - Discounted Levenshtein
    - Relaxed Hamming
    - String subsequence kernel (SSK) similarity
    - Phonetic edit distance
    - Henderson-Heron dissimilarity
    - Raup-Crick similarity
    - Millar's binomial deviance dissimilarity
    - Morisita similarity
    - Horn-Morisita similarity
    - Clark's coefficient of divergence
    - Chao's Jaccard similarity
    - Chao's Dice similarity
    - Cao's CY similarity (CYs) and dissimilarity (CYd)
- Added the following fingerprint classes:
    - Taft's Consonant coding
    - Taft's Extract - letter list
    - Taft's Extract - position & frequency
    - L.A. County Sheriff's System
    - Library of Congres Cutter table encoding
- Added the following phonetic algorithms:
    - Ainsworth's grapheme-to-phoneme
    - PHONIC


0.4.0 (2019-05-30) *dietrich*
+++++++++++++++++++++++++++++

doi:10.5281/zenodo.3235034

Version 0.4.0 focuses on distance measures, adding 211 new measures. Attempts
were made to provide normalized version for measure that did not inherently
range from 0 to 1. The other major focus was the addition of 12 tokenizers, in
service of expanding distance measure options.

Changes:

- Support for Python 3.3 was dropped.
- Deprecated functions that merely wrap class methods to maintain API
  compatibility, for removal in 0.6.0
- Added methods to ConfusionTable to return:
    - its internal representation
    - false negative rate
    - false omission rate
    - positive & negative likelihood ratios
    - diagnostic odds ratio
    - error rate
    - prevalence
    - Jaccard index
    - D-measure
    - Phi coefficient
    - joint, actual, & predicted entropies
    - mutual information
    - proficiency (uncertainty coefficient)
    - information gain ratio
    - dependency
    - lift
- Deprecated f-measure & g-measure from ConfusionTable for removal in
  0.6.0
- Added notes to indicate when functions, classes, & methods were added
- Added the following 12 tokenizers:
    - QSkipgrams
    - CharacterTokenizer
    - RegexpTokenizer, WhitespaceTokenizer, & WordpunctTokenizer
    - COrVClusterTokenizer, CVClusterTokenizer, & VCClusterTokenizer
    - SonoriPyTokenizer & LegaliPyTokenizer
    - NLTKTokenizer
    - SAPSTokenizer
- Added the UnigramCorpus class & a facility for downloading data, such as
  pre-processed/trained data, from storage on GitHub
- Added the Wåhlin phonetic encoding
- Added the following 211 similarity/distance/correlation measures:
    - ALINE
    - AMPLE
    - Anderberg
    - Andres & Marzo's Delta
    - Average Linkage
    - AZZOO
    - Baroni-Urbani & Buser I & II
    - Batagelj & Bren
    - Baulieu I-XV
    - Benini I & II
    - Bennet
    - Bhattacharyya
    - BI-SIM
    - BLEU
    - Block Levenshtein
    - Brainerd-Robinson
    - Braun-Blanquet
    - Canberra
    - Chord
    - Clement
    - Cohen's Kappa
    - Cole
    - Complete Linkage
    - Consonni & Todeschini I-V
    - Cormode's LZ
    - Covington
    - Dennis
    - Dice Asymmetric I & II
    - Digby
    - Dispersion
    - Doolittle
    - Dunning
    - Eyraud
    - Fager & McGowan
    - Faith
    - Fellegi-Sunter
    - Fidelity
    - Fleiss
    - Fleiss-Levin-Paik
    - FlexMetric
    - Forbes I & II
    - Fossum
    - FuzzyWuzzy Partial String
    - FuzzyWuzzy Token Set
    - FuzzyWuzzy Token Sort
    - Generalized Fleiss
    - Gilbert
    - Gilbert & Wells
    - Gini I & II
    - Goodall
    - Goodman & Kruskal's Lambda
    - Goodman & Kruskal's Lambda-r
    - Goodman & Kruskal's Tau A & B
    - Gower & Legendre
    - Guttman's Lambda A & B
    - Gwet's AC
    - Hamann
    - Harris & Lahey
    - Hassanat
    - Hawkins & Dotson
    - Hellinger
    - Higuera & Mico
    - Hurlbert
    - Iterative SubString
    - Jaccard-NM
    - Jensen-Shannon
    - Johnson
    - Kendall's Tau
    - Kent & Foster I & II
    - Koppen I & II
    - Kuder & Richardson
    - Kuhns I-XII
    - Kulczynski I & II
    - Longest Common Prefix
    - Longest Common Suffix
    - Lorentzian
    - Maarel
    - Marking
    - Marking Metric
    - MASI
    - Matusita
    - Maxwell & Pilliner
    - McConnaughey
    - McEwen & Michael
    - MetaLevenshtein
    - Michelet
    - MinHash
    - Mountford
    - Mean Squared Contingency
    - Mutual Information
    - NCD with LZSS
    - NCD with PAQ9a
    - Ozbay
    - Pattern
    - Pearson's Chi-Squared
    - Pearson & Heron II
    - Pearson II & III
    - Pearson's Phi
    - Peirce
    - Positional Q-Gram Dice, Jaccard, & Overlap
    - Q-Gram
    - Quantitative Cosine, Dice, & Jaccard
    - Rees-Levenshtein
    - Roberts
    - Rogers & Tanimoto
    - Rogot & Goldberg
    - Rouge-L, -S, -SU, & -W
    - Russell & Rao
    - SAPS
    - Scott's Pi
    - Shape
    - Shapira & Storer I
    - Sift4 Extended
    - Single Linkage
    - Size
    - Soft Cosine
    - SoftTF-IDF
    - Sokal & Michener
    - Sokal & Sneath I-V
    - Sorgenfrei
    - Steffensen
    - Stiles
    - Stuart's Tau
    - Tarantula
    - Tarwid
    - Tetrachoric
    - TF-IDF
    - Tichy
    - Tulloss's R, S, T, & U
    - Unigram Subtuple
    - Unknown A-M
    - Upholt
    - Warrens I-V
    - Weighted Jaccard
    - Whittaker
    - Yates' Chi-Squared
    - YJHHR
    - Yujian & Bo
    - Yule's Q, Q II, & Y
- Four intersection types are now supported for all distance measure that are
  based on _TokenDistance. In addition to basic crisp intersections, soft,
  fuzzy, and group linkage intersections have been provided.


0.3.6 (2018-11-17) *classy carl*
++++++++++++++++++++++++++++++++

doi:10.5281/zenodo.1490537

Changes:

- Most functions were encapsulated into classes.
- Each class is broken out into its own file, with test files paralleling
  library files.
- Documentation was converted from Sphinx markup to Numpy style.
- A tutorial was written for each subpackage.
- Documentation was cleaned up, with math markup corrections and many
  additional links.


0.3.5 (2018-10-31) *cantankerous carl*
++++++++++++++++++++++++++++++++++++++

doi:10.5281/zenodo.1463204

Version 0.3.5 focuses on refactoring the whole project. The API itself remains
largely the same as in previous versions, but underlyingly modules have been
split up. Essentially no new features are added (bugfixes aside) in this
version.

Changes:

- Refactored library and tests into smaller modules
- Broke compression distances (NCD) out into separate functions
- Adopted Black code style
- Added pyproject.toml to use Poetry for packaging (but will continue using
  setuptools and setup.py for the present)
- Minor bug fixes


0.3.0 (2018-10-15) *carl*
+++++++++++++++++++++++++

doi:10.5281/zenodo.1462443

Version 0.3.0 focuses on additional phonetic algorithms, but does add numerous
distance measures, fingerprints, and even a few stemmers. Another focus was
getting everything to build again (including docs) and to move to more
standard modern tools (flake8, tox, etc.).

Changes:

- Fixed implementation of Bag distance
- Updated BMPM to version 3.10
- Fixed Sphinx documentation on readthedocs.org
- Split string fingerprints out of clustering into their own module
- Added support for q-grams to skip-n characters
- New phonetic algorithms:
   - Statistics Canada
   - Lein
   - Roger Root
   - Oxford Name Compression Algorithm (ONCA)
   - Eudex phonetic hash
   - Haase Phonetik
   - Reth-Schek Phonetik
   - FONEM
   - Parmar-Kumbharana
   - Davidson's Consonant Code
   - SoundD
   - PSHP Soundex/Viewex Coding
   - an early version of Henry Code
   - Norphone
   - Dolby Code
   - Phonetic Spanish
   - Spanish Metaphone
   - MetaSoundex
   - SoundexBR
   - NRL English-to-phoneme
- New string fingerprints:
   - Cisłak & Grabowski's occurrence fingerprint
   - Cisłak & Grabowski's occurrence halved fingerprint
   - Cisłak & Grabowski's count fingerprint
   - Cisłak & Grabowski's position fingerprint
   - Synoname Toolcode
- New distance measures:
   - Minkowski distance & similarity
   - Manhattan distance & similarity
   - Euclidean distance & similarity
   - Chebyshev distance & similarity
   - Eudex distances
   - Sift4 distance
   - Baystat distance & similarity
   - Typo distance
   - Indel distance
   - Synoname
- New stemmers:
   - UEA-Lite Stemmer
   - Paice-Husk Stemmer
   - Schinke Latin stemmer
   - S stemmer
- Eliminated ._compat submodule in favor of six
- Transitioned from PEP8 to flake8, etc.
- Phonetic algorithms now consistently use max_length=-1 to indicate that
  there should be no length limit
- Added example notebooks in binder directory


0.2.0 (2015-05-27) *berthold*
+++++++++++++++++++++++++++++

- Added Caumanns' German stemmer
- Added Lovins' English stemmer
- Updated Beider-Morse Phonetic Matching to 3.04
- Added Sphinx documentation


0.1.1 (2015-05-12) *albrecht*
+++++++++++++++++++++++++++++

- First Beta release to PyPI



Authors
```````

- Christopher C. Little (`@chrislit <https://github.com/chrislit>`_) <chrisclittle+abydos@gmail.com>



            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/chrislit/abydos",
    "name": "abydos",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "~=3.5",
    "maintainer_email": "",
    "keywords": "nlp,ai,ir,language,linguistics,phonetic algorithms,string distance",
    "author": "Christopher C. Little",
    "author_email": "chrisclittle+abydos@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/30/53/4d8dfccbbfe6031a2293941d718dfda7cf2e39883f915b5e3b2c057b518c/abydos-0.5.0.tar.gz",
    "platform": "",
    "description": "Abydos\n======\n\n+------------------+------------------------------------------------------+\n| CI & Test Status | |travis| |circle| |azure| |semaphore| |coveralls|    |\n+------------------+------------------------------------------------------+\n| Code Quality     | |codeclimate| |scrutinizer| |codacy| |codefactor|    |\n+------------------+------------------------------------------------------+\n| Dependencies     | |requires| |snyk| |pyup| |cii|                       |\n+------------------+------------------------------------------------------+\n| Local Analysis   | |pylint| |flake8| |pydocstyle| |sloccount| |black|   |\n+------------------+------------------------------------------------------+\n| Usage            | |docs| |mybinder| |license| |sourcerank| |zenodo|    |\n+------------------+------------------------------------------------------+\n| Contribution     | |openhub| |gh-commits| |gh-issues| |gh-stars|        |\n+------------------+------------------------------------------------------+\n| PyPI             | |pypi| |pypi-dl| |pypi-ver|                          |\n+------------------+------------------------------------------------------+\n| conda-forge      | |conda| |conda-dl| |conda-platforms|                 |\n+------------------+------------------------------------------------------+\n\n.. |travis| image:: https://travis-ci.org/chrislit/abydos.svg?branch=master\n    :target: https://travis-ci.org/chrislit/abydos\n    :alt: Travis-CI Build Status\n\n.. |circle| image:: https://circleci.com/gh/chrislit/abydos/tree/master.svg?style=shield\n    :target: https://circleci.com/gh/chrislit/abydos/tree/master\n    :alt: Circle-CI Build Status\n\n.. |azure| image:: https://dev.azure.com/chrislit/abydos/_apis/build/status/chrislit.abydos?branchName=master\n    :target: https://dev.azure.com/chrislit/abydos/_build/latest?definitionId=1\n    :alt: Azure Pipelines Build Status\n\n.. |semaphore| image:: https://semaphoreci.com/api/v1/chrislit/abydos/branches/master/shields_badge.svg\n    :target: https://semaphoreci.com/chrislit/abydos\n    :alt: Semaphore Build Status\n\n.. |coveralls| image:: https://coveralls.io/repos/github/chrislit/abydos/badge.svg?branch=master\n    :target: https://coveralls.io/github/chrislit/abydos?branch=master\n    :alt: Coverage Status\n\n.. |codeclimate| image:: https://codeclimate.com/github/chrislit/abydos/badges/gpa.svg\n    :target: https://codeclimate.com/github/chrislit/abydos\n    :alt: Code Climate\n\n.. |scrutinizer| image:: https://scrutinizer-ci.com/g/chrislit/abydos/badges/quality-score.png?b=master\n    :target: https://scrutinizer-ci.com/g/chrislit/abydos/?branch=master\n    :alt: Scrutinizer\n\n.. |codacy| image:: https://api.codacy.com/project/badge/Grade/db79f2c31ea142fb9b5938abe87b0854\n    :target: https://www.codacy.com/app/chrislit/abydos?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=chrislit/abydos&amp;utm_campaign=Badge_Grade\n    :alt: Codacy\n\n.. |codefactor| image:: https://www.codefactor.io/repository/github/chrislit/abydos/badge\n    :target: https://www.codefactor.io/repository/github/chrislit/abydos\n    :alt: CodeFactor\n\n.. |requires| image:: https://requires.io/github/chrislit/abydos/requirements.svg?branch=master\n    :target: https://requires.io/github/chrislit/abydos/requirements/?branch=master\n    :alt: Requirements Status\n\n.. |snyk| image:: https://snyk.io/test/github/chrislit/abydos/badge.svg?targetFile=requirements.txt\n    :target: https://snyk.io/test/github/chrislit/abydos?targetFile=requirements.txt\n    :alt: Known Vulnerabilities\n\n.. |pyup| image:: https://pyup.io/repos/github/chrislit/abydos/shield.svg\n    :target: https://pyup.io/repos/github/chrislit/abydos/\n    :alt: Updates\n\n.. |cii| image:: https://bestpractices.coreinfrastructure.org/projects/1598/badge\n    :target: https://bestpractices.coreinfrastructure.org/projects/1598\n    :alt: CII Best Practices\n\n.. |pylint| image:: https://img.shields.io/badge/Pylint-9.09/10-yellowgreen.svg\n    :target: #\n    :alt: Pylint Score\n\n.. |flake8| image:: https://img.shields.io/badge/flake8-0-brightgreen.svg\n    :target: #\n    :alt: flake8 Errors\n\n.. |pydocstyle| image:: https://img.shields.io/badge/pydocstyle-0-brightgreen.svg\n    :target: #\n    :alt: pydocstyle Errors\n\n.. |sloccount| image:: https://img.shields.io/badge/SLOCCount-40,610-blue.svg\n    :target: #\n    :alt: SLOCCount\n\n.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg\n    :target: https://github.com/ambv/black\n    :alt: black\n\n.. |docs| image:: https://readthedocs.org/projects/abydos/badge/?version=latest\n    :target: https://abydos.readthedocs.org/en/latest/\n    :alt: Documentation Status\n\n.. |mybinder| image:: https://img.shields.io/badge/launch-binder-579aca.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFkAAABZCAMAAABi1XidAAAB8lBMVEX///9XmsrmZYH1olJXmsr1olJXmsrmZYH1olJXmsr1olJXmsrmZYH1olL1olJXmsr1olJXmsrmZYH1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olJXmsrmZYH1olL1olL0nFf1olJXmsrmZYH1olJXmsq8dZb1olJXmsrmZYH1olJXmspXmspXmsr1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olLeaIVXmsrmZYH1olL1olL1olJXmsrmZYH1olLna31Xmsr1olJXmsr1olJXmsrmZYH1olLqoVr1olJXmsr1olJXmsrmZYH1olL1olKkfaPobXvviGabgadXmsqThKuofKHmZ4Dobnr1olJXmsr1olJXmspXmsr1olJXmsrfZ4TuhWn1olL1olJXmsqBi7X1olJXmspZmslbmMhbmsdemsVfl8ZgmsNim8Jpk8F0m7R4m7F5nLB6jbh7jbiDirOEibOGnKaMhq+PnaCVg6qWg6qegKaff6WhnpKofKGtnomxeZy3noG6dZi+n3vCcpPDcpPGn3bLb4/Mb47UbIrVa4rYoGjdaIbeaIXhoWHmZYHobXvpcHjqdHXreHLroVrsfG/uhGnuh2bwj2Hxk17yl1vzmljzm1j0nlX1olL3AJXWAAAAbXRSTlMAEBAQHx8gICAuLjAwMDw9PUBAQEpQUFBXV1hgYGBkcHBwcXl8gICAgoiIkJCQlJicnJ2goKCmqK+wsLC4usDAwMjP0NDQ1NbW3Nzg4ODi5+3v8PDw8/T09PX29vb39/f5+fr7+/z8/Pz9/v7+zczCxgAABC5JREFUeAHN1ul3k0UUBvCb1CTVpmpaitAGSLSpSuKCLWpbTKNJFGlcSMAFF63iUmRccNG6gLbuxkXU66JAUef/9LSpmXnyLr3T5AO/rzl5zj137p136BISy44fKJXuGN/d19PUfYeO67Znqtf2KH33Id1psXoFdW30sPZ1sMvs2D060AHqws4FHeJojLZqnw53cmfvg+XR8mC0OEjuxrXEkX5ydeVJLVIlV0e10PXk5k7dYeHu7Cj1j+49uKg7uLU61tGLw1lq27ugQYlclHC4bgv7VQ+TAyj5Zc/UjsPvs1sd5cWryWObtvWT2EPa4rtnWW3JkpjggEpbOsPr7F7EyNewtpBIslA7p43HCsnwooXTEc3UmPmCNn5lrqTJxy6nRmcavGZVt/3Da2pD5NHvsOHJCrdc1G2r3DITpU7yic7w/7Rxnjc0kt5GC4djiv2Sz3Fb2iEZg41/ddsFDoyuYrIkmFehz0HR2thPgQqMyQYb2OtB0WxsZ3BeG3+wpRb1vzl2UYBog8FfGhttFKjtAclnZYrRo9ryG9uG/FZQU4AEg8ZE9LjGMzTmqKXPLnlWVnIlQQTvxJf8ip7VgjZjyVPrjw1te5otM7RmP7xm+sK2Gv9I8Gi++BRbEkR9EBw8zRUcKxwp73xkaLiqQb+kGduJTNHG72zcW9LoJgqQxpP3/Tj//c3yB0tqzaml05/+orHLksVO+95kX7/7qgJvnjlrfr2Ggsyx0eoy9uPzN5SPd86aXggOsEKW2Prz7du3VID3/tzs/sSRs2w7ovVHKtjrX2pd7ZMlTxAYfBAL9jiDwfLkq55Tm7ifhMlTGPyCAs7RFRhn47JnlcB9RM5T97ASuZXIcVNuUDIndpDbdsfrqsOppeXl5Y+XVKdjFCTh+zGaVuj0d9zy05PPK3QzBamxdwtTCrzyg/2Rvf2EstUjordGwa/kx9mSJLr8mLLtCW8HHGJc2R5hS219IiF6PnTusOqcMl57gm0Z8kanKMAQg0qSyuZfn7zItsbGyO9QlnxY0eCuD1XL2ys/MsrQhltE7Ug0uFOzufJFE2PxBo/YAx8XPPdDwWN0MrDRYIZF0mSMKCNHgaIVFoBbNoLJ7tEQDKxGF0kcLQimojCZopv0OkNOyWCCg9XMVAi7ARJzQdM2QUh0gmBozjc3Skg6dSBRqDGYSUOu66Zg+I2fNZs/M3/f/Grl/XnyF1Gw3VKCez0PN5IUfFLqvgUN4C0qNqYs5YhPL+aVZYDE4IpUk57oSFnJm4FyCqqOE0jhY2SMyLFoo56zyo6becOS5UVDdj7Vih0zp+tcMhwRpBeLyqtIjlJKAIZSbI8SGSF3k0pA3mR5tHuwPFoa7N7reoq2bqCsAk1HqCu5uvI1n6JuRXI+S1Mco54YmYTwcn6Aeic+kssXi8XpXC4V3t7/ADuTNKaQJdScAAAAAElFTkSuQmCC\n    :target: https://mybinder.org/v2/gh/chrislit/abydos/master?filepath=binder\n    :alt: Binder\n\n.. |license| image:: https://img.shields.io/badge/License-GPL%20v3+-blue.svg?logo=gnu\n    :target: https://www.gnu.org/licenses/gpl-3.0\n    :alt: License: GPL v3.0+\n\n.. |sourcerank| image:: https://img.shields.io/librariesio/sourcerank/pypi/abydos.svg\n    :target: https://libraries.io/pypi/abydos\n    :alt: Libraries.io SourceRank\n\n.. |zenodo| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3603514.svg\n    :target: https://doi.org/10.5281/zenodo.3603514\n    :alt: Zenodo\n\n.. |openhub| image:: https://www.openhub.net/p/abydosnlp/widgets/project_thin_badge.gif\n    :target: https://www.openhub.net/p/abydosnlp\n    :alt: OpenHUB\n\n.. |gh-commits| image:: https://img.shields.io/github/commit-activity/y/chrislit/abydos.svg?logo=github\n    :target: https://github.com/chrislit/abydos/graphs/commit-activity\n    :alt: GitHub Commits\n\n.. |gh-issues| image:: https://img.shields.io/github/issues-closed/chrislit/abydos.svg?logo=github\n    :target: https://github.com/chrislit/abydos/issues?q=\n    :alt: GitHub Issues Closed\n\n.. |gh-stars| image:: https://img.shields.io/github/stars/chrislit/abydos.svg?logo=github\n    :target: https://github.com/chrislit/abydos/stargazers\n    :alt: GitHub Stars\n\n.. |pypi| image:: https://img.shields.io/pypi/v/abydos.svg?logo=python&logoColor=white\n    :target: https://pypi.python.org/pypi/abydos\n    :alt: PyPI\n\n.. |pypi-dl| image:: https://img.shields.io/pypi/dm/abydos.svg?logo=python&logoColor=white\n    :target: https://pypi.python.org/pypi/abydos\n    :alt: PyPI downloads/month\n\n.. |pypi-ver| image:: https://img.shields.io/pypi/pyversions/abydos.svg?logo=python&logoColor=white\n    :target: https://pypi.python.org/pypi/abydos\n    :alt: PyPI versions\n\n.. |conda| image:: https://img.shields.io/conda/vn/conda-forge/abydos.svg?logo=conda-forge\n    :target: https://anaconda.org/conda-forge/abydos\n    :alt: conda-forge\n\n.. |conda-dl| image:: \thttps://img.shields.io/conda/dn/conda-forge/abydos.svg?logo=conda-forge\n    :target: https://anaconda.org/conda-forge/abydos\n    :alt: conda-forge downloads\n\n.. |conda-platforms| image:: https://img.shields.io/conda/pn/conda-forge/abydos.svg?logo=conda-forge\n    :target: https://anaconda.org/conda-forge/abydos\n    :alt: conda-forge platforms\n\n|\n\n.. image:: https://raw.githubusercontent.com/chrislit/abydos/master/abydos-small.png\n    :target: https://github.com/chrislit/abydos\n    :alt: abydos\n    :align: right\n\n|\n| `Abydos NLP/IR library <https://github.com/chrislit/abydos>`_\n| Copyright 2014-2020 by Christopher C. Little\n\nAbydos is a library of phonetic algorithms, string distance measures & metrics,\nstemmers, and string fingerprinters including:\n\n- Phonetic algorithms\n    - Robert C. Russell's Index\n    - American Soundex\n    - Refined Soundex\n    - Daitch-Mokotoff Soundex\n    - K\u00f6lner Phonetik\n    - NYSIIS\n    - Match Rating Algorithm\n    - Metaphone\n    - Double Metaphone\n    - Caverphone\n    - Alpha Search Inquiry System\n    - Fuzzy Soundex\n    - Phonex\n    - Phonem\n    - Phonix\n    - SfinxBis\n    - phonet\n    - Standardized Phonetic Frequency Code\n    - Statistics Canada\n    - Lein\n    - Roger Root\n    - Oxford Name Compression Algorithm (ONCA)\n    - Eudex phonetic hash\n    - Haase Phonetik\n    - Reth-Schek Phonetik\n    - FONEM\n    - Parmar-Kumbharana\n    - Davidson's Consonant Code\n    - SoundD\n    - PSHP Soundex/Viewex Coding\n    - an early version of Henry Code\n    - Norphone\n    - Dolby Code\n    - Phonetic Spanish\n    - Spanish Metaphone\n    - MetaSoundex\n    - SoundexBR\n    - NRL English-to-phoneme\n    - Beider-Morse Phonetic Matching\n\n- String distance metrics\n    - Levenshtein distance\n    - Optimal String Alignment distance\n    - Levenshtein-Damerau distance\n    - Hamming distance\n    - Tversky index\n    - S\u00f8rensen\u2013Dice coefficient & distance\n    - Jaccard similarity coefficient & distance\n    - overlap similarity & distance\n    - Tanimoto coefficient & distance\n    - Minkowski distance & similarity\n    - Manhattan distance & similarity\n    - Euclidean distance & similarity\n    - Chebyshev distance\n    - cosine similarity & distance\n    - Jaro distance\n    - Jaro-Winkler distance (incl. the strcmp95 algorithm variant)\n    - Longest common substring\n    - Ratcliff-Obershelp similarity & distance\n    - Match Rating Algorithm similarity\n    - Normalized Compression Distance (NCD) & similarity\n    - Monge-Elkan similarity & distance\n    - Matrix similarity\n    - Needleman-Wunsch score\n    - Smith-Waterman score\n    - Gotoh score\n    - Length similarity\n    - Prefix, Suffix, and Identity similarity & distance\n    - Modified Language-Independent Product Name Search (MLIPNS) similarity &\n      distance\n    - Bag distance\n    - Editex distance\n    - Eudex distances\n    - Sift4 distance\n    - Baystat distance & similarity\n    - Typo distance\n    - Indel distance\n    - Synoname\n\n- Stemmers\n    - the Lovins stemmer\n    - the Porter and Porter2 (Snowball English) stemmers\n    - Snowball stemmers for German, Dutch, Norwegian, Swedish, and Danish\n    - CLEF German, German plus, and Swedish stemmers\n    - Caumann's German stemmer\n    - UEA-Lite Stemmer\n    - Paice-Husk Stemmer\n    - Schinke Latin stemmer\n    - S stemmer\n\n- String Fingerprints\n    - string fingerprint\n    - q-gram fingerprint\n    - phonetic fingerprint\n    - Pollock & Zomora's skeleton key\n    - Pollock & Zomora's omission key\n    - Cis\u0142ak & Grabowski's occurrence fingerprint\n    - Cis\u0142ak & Grabowski's occurrence halved fingerprint\n    - Cis\u0142ak & Grabowski's count fingerprint\n    - Cis\u0142ak & Grabowski's position fingerprint\n    - Synoname Toolcode\n\n\n-----\n\nInstallation\n============\n\nRequired libraries:\n\n- NumPy\n- deprecation\n\nOptional libraries (all available on PyPI, some available on conda or\nconda-forge):\n\n- `SyllabiPy <http://syllabipy.com/>`_\n- `NLTK <https://www.nltk.org/>`_\n- `PyLZSS <https://github.com/rumbah/pylzss>`_\n- `paq <https://github.com/observerss/paq>`_\n\n\nTo install Abydos (master) from Github source::\n\n   git clone https://github.com/chrislit/abydos.git --recursive\n   cd abydos\n   python setup install\n\nIf your default python command calls Python 2.7 but you want to install for\nPython 3, you may instead need to call::\n\n   python3 setup install\n\n\nTo install Abydos (latest release) from PyPI using pip::\n\n   pip install abydos\n\nTo install from `conda-forge <https://anaconda.org/conda-forge/abydos>`_::\n\n   conda install abydos\n\nIt should run on Python 3.5-3.8.\n\nTesting & Contributing\n======================\n\nTo run the whole test-suite just call tox::\n\n    tox\n\nThe tox setup has the following environments: black, py37, doctest,\nregression, fuzz, pylint, pydocstyle, flake8, doc8, docs, sloccount, badges, &\nbuild. So if you only want to generate documentation (in HTML, EPUB, & PDF\nformats), just call::\n\n    tox -e docs\n\nIn order to only run & generate Flake8 reports, call::\n\n    tox -e flake8\n\nContributions such as bug reports, PRs, suggestions, desired new features, etc.\nare welcome through Github\n`Issues <https://github.com/chrislit/abydos/issues>`_ &\n`Pull requests <https://github.com/chrislit/abydos/pulls>`_.\n\n\nRelease History\n---------------\n\n0.5.0 (2020-01-10) *ecgtheow*\n+++++++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.3603514\n\nChanges:\n\n- Support for Python 2.7 was removed.\n\n\n0.4.1 (2020-01-07) *distant dietrich*\n+++++++++++++++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.3600548\n\nChanges:\n\n- Support for Python 3.4 was removed. (3.4 reached end-of-life on March 18,\n  2019)\n- Fuzzy intersections were corrected to avoid over-counting partial\n  intersection instances.\n- Levenshtein can now return an optimal alignment\n- Added the following distance measures:\n    - Indice de Similitude-Guth (ISG)\n    - INClusion Programme\n    - Guth\n    - Victorian Panel Study (VPS) score\n    - LIG3 similarity\n    - Discounted Levenshtein\n    - Relaxed Hamming\n    - String subsequence kernel (SSK) similarity\n    - Phonetic edit distance\n    - Henderson-Heron dissimilarity\n    - Raup-Crick similarity\n    - Millar's binomial deviance dissimilarity\n    - Morisita similarity\n    - Horn-Morisita similarity\n    - Clark's coefficient of divergence\n    - Chao's Jaccard similarity\n    - Chao's Dice similarity\n    - Cao's CY similarity (CYs) and dissimilarity (CYd)\n- Added the following fingerprint classes:\n    - Taft's Consonant coding\n    - Taft's Extract - letter list\n    - Taft's Extract - position & frequency\n    - L.A. County Sheriff's System\n    - Library of Congres Cutter table encoding\n- Added the following phonetic algorithms:\n    - Ainsworth's grapheme-to-phoneme\n    - PHONIC\n\n\n0.4.0 (2019-05-30) *dietrich*\n+++++++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.3235034\n\nVersion 0.4.0 focuses on distance measures, adding 211 new measures. Attempts\nwere made to provide normalized version for measure that did not inherently\nrange from 0 to 1. The other major focus was the addition of 12 tokenizers, in\nservice of expanding distance measure options.\n\nChanges:\n\n- Support for Python 3.3 was dropped.\n- Deprecated functions that merely wrap class methods to maintain API\n  compatibility, for removal in 0.6.0\n- Added methods to ConfusionTable to return:\n    - its internal representation\n    - false negative rate\n    - false omission rate\n    - positive & negative likelihood ratios\n    - diagnostic odds ratio\n    - error rate\n    - prevalence\n    - Jaccard index\n    - D-measure\n    - Phi coefficient\n    - joint, actual, & predicted entropies\n    - mutual information\n    - proficiency (uncertainty coefficient)\n    - information gain ratio\n    - dependency\n    - lift\n- Deprecated f-measure & g-measure from ConfusionTable for removal in\n  0.6.0\n- Added notes to indicate when functions, classes, & methods were added\n- Added the following 12 tokenizers:\n    - QSkipgrams\n    - CharacterTokenizer\n    - RegexpTokenizer, WhitespaceTokenizer, & WordpunctTokenizer\n    - COrVClusterTokenizer, CVClusterTokenizer, & VCClusterTokenizer\n    - SonoriPyTokenizer & LegaliPyTokenizer\n    - NLTKTokenizer\n    - SAPSTokenizer\n- Added the UnigramCorpus class & a facility for downloading data, such as\n  pre-processed/trained data, from storage on GitHub\n- Added the W\u00e5hlin phonetic encoding\n- Added the following 211 similarity/distance/correlation measures:\n    - ALINE\n    - AMPLE\n    - Anderberg\n    - Andres & Marzo's Delta\n    - Average Linkage\n    - AZZOO\n    - Baroni-Urbani & Buser I & II\n    - Batagelj & Bren\n    - Baulieu I-XV\n    - Benini I & II\n    - Bennet\n    - Bhattacharyya\n    - BI-SIM\n    - BLEU\n    - Block Levenshtein\n    - Brainerd-Robinson\n    - Braun-Blanquet\n    - Canberra\n    - Chord\n    - Clement\n    - Cohen's Kappa\n    - Cole\n    - Complete Linkage\n    - Consonni & Todeschini I-V\n    - Cormode's LZ\n    - Covington\n    - Dennis\n    - Dice Asymmetric I & II\n    - Digby\n    - Dispersion\n    - Doolittle\n    - Dunning\n    - Eyraud\n    - Fager & McGowan\n    - Faith\n    - Fellegi-Sunter\n    - Fidelity\n    - Fleiss\n    - Fleiss-Levin-Paik\n    - FlexMetric\n    - Forbes I & II\n    - Fossum\n    - FuzzyWuzzy Partial String\n    - FuzzyWuzzy Token Set\n    - FuzzyWuzzy Token Sort\n    - Generalized Fleiss\n    - Gilbert\n    - Gilbert & Wells\n    - Gini I & II\n    - Goodall\n    - Goodman & Kruskal's Lambda\n    - Goodman & Kruskal's Lambda-r\n    - Goodman & Kruskal's Tau A & B\n    - Gower & Legendre\n    - Guttman's Lambda A & B\n    - Gwet's AC\n    - Hamann\n    - Harris & Lahey\n    - Hassanat\n    - Hawkins & Dotson\n    - Hellinger\n    - Higuera & Mico\n    - Hurlbert\n    - Iterative SubString\n    - Jaccard-NM\n    - Jensen-Shannon\n    - Johnson\n    - Kendall's Tau\n    - Kent & Foster I & II\n    - Koppen I & II\n    - Kuder & Richardson\n    - Kuhns I-XII\n    - Kulczynski I & II\n    - Longest Common Prefix\n    - Longest Common Suffix\n    - Lorentzian\n    - Maarel\n    - Marking\n    - Marking Metric\n    - MASI\n    - Matusita\n    - Maxwell & Pilliner\n    - McConnaughey\n    - McEwen & Michael\n    - MetaLevenshtein\n    - Michelet\n    - MinHash\n    - Mountford\n    - Mean Squared Contingency\n    - Mutual Information\n    - NCD with LZSS\n    - NCD with PAQ9a\n    - Ozbay\n    - Pattern\n    - Pearson's Chi-Squared\n    - Pearson & Heron II\n    - Pearson II & III\n    - Pearson's Phi\n    - Peirce\n    - Positional Q-Gram Dice, Jaccard, & Overlap\n    - Q-Gram\n    - Quantitative Cosine, Dice, & Jaccard\n    - Rees-Levenshtein\n    - Roberts\n    - Rogers & Tanimoto\n    - Rogot & Goldberg\n    - Rouge-L, -S, -SU, & -W\n    - Russell & Rao\n    - SAPS\n    - Scott's Pi\n    - Shape\n    - Shapira & Storer I\n    - Sift4 Extended\n    - Single Linkage\n    - Size\n    - Soft Cosine\n    - SoftTF-IDF\n    - Sokal & Michener\n    - Sokal & Sneath I-V\n    - Sorgenfrei\n    - Steffensen\n    - Stiles\n    - Stuart's Tau\n    - Tarantula\n    - Tarwid\n    - Tetrachoric\n    - TF-IDF\n    - Tichy\n    - Tulloss's R, S, T, & U\n    - Unigram Subtuple\n    - Unknown A-M\n    - Upholt\n    - Warrens I-V\n    - Weighted Jaccard\n    - Whittaker\n    - Yates' Chi-Squared\n    - YJHHR\n    - Yujian & Bo\n    - Yule's Q, Q II, & Y\n- Four intersection types are now supported for all distance measure that are\n  based on _TokenDistance. In addition to basic crisp intersections, soft,\n  fuzzy, and group linkage intersections have been provided.\n\n\n0.3.6 (2018-11-17) *classy carl*\n++++++++++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.1490537\n\nChanges:\n\n- Most functions were encapsulated into classes.\n- Each class is broken out into its own file, with test files paralleling\n  library files.\n- Documentation was converted from Sphinx markup to Numpy style.\n- A tutorial was written for each subpackage.\n- Documentation was cleaned up, with math markup corrections and many\n  additional links.\n\n\n0.3.5 (2018-10-31) *cantankerous carl*\n++++++++++++++++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.1463204\n\nVersion 0.3.5 focuses on refactoring the whole project. The API itself remains\nlargely the same as in previous versions, but underlyingly modules have been\nsplit up. Essentially no new features are added (bugfixes aside) in this\nversion.\n\nChanges:\n\n- Refactored library and tests into smaller modules\n- Broke compression distances (NCD) out into separate functions\n- Adopted Black code style\n- Added pyproject.toml to use Poetry for packaging (but will continue using\n  setuptools and setup.py for the present)\n- Minor bug fixes\n\n\n0.3.0 (2018-10-15) *carl*\n+++++++++++++++++++++++++\n\ndoi:10.5281/zenodo.1462443\n\nVersion 0.3.0 focuses on additional phonetic algorithms, but does add numerous\ndistance measures, fingerprints, and even a few stemmers. Another focus was\ngetting everything to build again (including docs) and to move to more\nstandard modern tools (flake8, tox, etc.).\n\nChanges:\n\n- Fixed implementation of Bag distance\n- Updated BMPM to version 3.10\n- Fixed Sphinx documentation on readthedocs.org\n- Split string fingerprints out of clustering into their own module\n- Added support for q-grams to skip-n characters\n- New phonetic algorithms:\n   - Statistics Canada\n   - Lein\n   - Roger Root\n   - Oxford Name Compression Algorithm (ONCA)\n   - Eudex phonetic hash\n   - Haase Phonetik\n   - Reth-Schek Phonetik\n   - FONEM\n   - Parmar-Kumbharana\n   - Davidson's Consonant Code\n   - SoundD\n   - PSHP Soundex/Viewex Coding\n   - an early version of Henry Code\n   - Norphone\n   - Dolby Code\n   - Phonetic Spanish\n   - Spanish Metaphone\n   - MetaSoundex\n   - SoundexBR\n   - NRL English-to-phoneme\n- New string fingerprints:\n   - Cis\u0142ak & Grabowski's occurrence fingerprint\n   - Cis\u0142ak & Grabowski's occurrence halved fingerprint\n   - Cis\u0142ak & Grabowski's count fingerprint\n   - Cis\u0142ak & Grabowski's position fingerprint\n   - Synoname Toolcode\n- New distance measures:\n   - Minkowski distance & similarity\n   - Manhattan distance & similarity\n   - Euclidean distance & similarity\n   - Chebyshev distance & similarity\n   - Eudex distances\n   - Sift4 distance\n   - Baystat distance & similarity\n   - Typo distance\n   - Indel distance\n   - Synoname\n- New stemmers:\n   - UEA-Lite Stemmer\n   - Paice-Husk Stemmer\n   - Schinke Latin stemmer\n   - S stemmer\n- Eliminated ._compat submodule in favor of six\n- Transitioned from PEP8 to flake8, etc.\n- Phonetic algorithms now consistently use max_length=-1 to indicate that\n  there should be no length limit\n- Added example notebooks in binder directory\n\n\n0.2.0 (2015-05-27) *berthold*\n+++++++++++++++++++++++++++++\n\n- Added Caumanns' German stemmer\n- Added Lovins' English stemmer\n- Updated Beider-Morse Phonetic Matching to 3.04\n- Added Sphinx documentation\n\n\n0.1.1 (2015-05-12) *albrecht*\n+++++++++++++++++++++++++++++\n\n- First Beta release to PyPI\n\n\n\nAuthors\n```````\n\n- Christopher C. Little (`@chrislit <https://github.com/chrislit>`_) <chrisclittle+abydos@gmail.com>\n\n\n",
    "bugtrack_url": null,
    "license": "GPLv3+",
    "summary": "Abydos NLP/IR library",
    "version": "0.5.0",
    "project_urls": {
        "Download": "https://github.com/chrislit/abydos/archive/master.zip",
        "Homepage": "https://github.com/chrislit/abydos"
    },
    "split_keywords": [
        "nlp",
        "ai",
        "ir",
        "language",
        "linguistics",
        "phonetic algorithms",
        "string distance"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "7fa5ca258a571997be1c9483d6075bbc1b9487ae80f3bb3bf1f60db0b29f5aa6",
                "md5": "7c3e776c523e723332beab3272f9a326",
                "sha256": "fe758c8f8456a703b7637ab9ac49457c1461d1ee61c97b52a6d803a567f355e1"
            },
            "downloads": -1,
            "filename": "abydos-0.5.0-py2.py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "7c3e776c523e723332beab3272f9a326",
            "packagetype": "bdist_wheel",
            "python_version": "py2.py3",
            "requires_python": "~=3.5",
            "size": 886001,
            "upload_time": "2020-01-11T00:00:25",
            "upload_time_iso_8601": "2020-01-11T00:00:25.853748Z",
            "url": "https://files.pythonhosted.org/packages/7f/a5/ca258a571997be1c9483d6075bbc1b9487ae80f3bb3bf1f60db0b29f5aa6/abydos-0.5.0-py2.py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "30534d8dfccbbfe6031a2293941d718dfda7cf2e39883f915b5e3b2c057b518c",
                "md5": "ab111139aae3221f242fe1f3b2675b5a",
                "sha256": "4c02e84e18211ede6885e4347a93e64fe15b777bdce0d69ac5a8617a26baef4f"
            },
            "downloads": -1,
            "filename": "abydos-0.5.0.tar.gz",
            "has_sig": false,
            "md5_digest": "ab111139aae3221f242fe1f3b2675b5a",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "~=3.5",
            "size": 416089,
            "upload_time": "2020-01-11T00:00:29",
            "upload_time_iso_8601": "2020-01-11T00:00:29.052897Z",
            "url": "https://files.pythonhosted.org/packages/30/53/4d8dfccbbfe6031a2293941d718dfda7cf2e39883f915b5e3b2c057b518c/abydos-0.5.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2020-01-11 00:00:29",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "chrislit",
    "github_project": "abydos",
    "travis_ci": true,
    "coveralls": true,
    "github_actions": false,
    "circle": true,
    "requirements": [],
    "tox": true,
    "lcname": "abydos"
}
        
Elapsed time: 0.35287s