==============
Super Collator
==============
.. |py39| image:: docs/_images/badge-py39.svg
.. |py310| image:: docs/_images/badge-py310.svg
.. |py311| image:: docs/_images/badge-py311.svg
.. |py312| image:: docs/_images/badge-py312.svg
.. |pypy39| image:: docs/_images/badge-pypy39.svg
.. |coverage| image:: docs/_images/badge-coverage.svg
|py39| |py310| |py311| |py312| |pypy39| |coverage|
Collates textual sources with relaxed spelling. Uses Gotoh's variant of the
Needleman-Wunsch sequence alignment algorithm.
.. code-block:: shell
$ pip install super-collator
.. code-block:: python
>>> from super_collator.aligner import Aligner
>>> from super_collator.ngrams import NGrams
>>> from super_collator.super_collator import to_table
>>> aligner = Aligner(-0.5, -0.5, -0.5)
>>> a = "Lorem ipsum dollar amat adipiscing elit"
>>> b = "qui dolorem ipsum quia dolor sit amet consectetur adipisci velit"
>>>
>>> a = [NGrams(s).load(s, 3) for s in a.split()]
>>> b = [NGrams(s).load(s, 3) for s in b.split()]
>>>
>>> a, b, score = aligner.align(a, b, NGrams.similarity, lambda: NGrams("-"))
>>> print(to_table(list(map(str, a)), list(map(str, b)))) # doctest: +NORMALIZE_WHITESPACE
- Lorem ipsum - dollar - amat - adipiscing elit
qui dolorem ipsum quia dolor sit amet consectetur adipisci velit
Documentation: https://cceh.github.io/super-collator/
PyPi: https://pypi.org/project/super-collator/
Raw data
{
"_id": null,
"home_page": "",
"name": "super-collator",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.9",
"maintainer_email": "",
"keywords": "collation,collator,needleman-wunsch,needleman-wunsch-gotoh,python",
"author": "",
"author_email": "Marcello Perathoner <marcello@perathoner.de>",
"download_url": "https://files.pythonhosted.org/packages/81/79/63e2dc885651154f9ecf165400655e749a3d9a208bd5fea006acdf74b0a3/super_collator-0.0.5.tar.gz",
"platform": null,
"description": "==============\nSuper Collator\n==============\n\n.. |py39| image:: docs/_images/badge-py39.svg\n\n.. |py310| image:: docs/_images/badge-py310.svg\n\n.. |py311| image:: docs/_images/badge-py311.svg\n\n.. |py312| image:: docs/_images/badge-py312.svg\n\n.. |pypy39| image:: docs/_images/badge-pypy39.svg\n\n.. |coverage| image:: docs/_images/badge-coverage.svg\n\n|py39| |py310| |py311| |py312| |pypy39| |coverage|\n\nCollates textual sources with relaxed spelling. Uses Gotoh's variant of the\nNeedleman-Wunsch sequence alignment algorithm.\n\n.. code-block:: shell\n\n $ pip install super-collator\n\n.. code-block:: python\n\n >>> from super_collator.aligner import Aligner\n >>> from super_collator.ngrams import NGrams\n >>> from super_collator.super_collator import to_table\n\n >>> aligner = Aligner(-0.5, -0.5, -0.5)\n >>> a = \"Lorem ipsum dollar amat adipiscing elit\"\n >>> b = \"qui dolorem ipsum quia dolor sit amet consectetur adipisci velit\"\n >>>\n >>> a = [NGrams(s).load(s, 3) for s in a.split()]\n >>> b = [NGrams(s).load(s, 3) for s in b.split()]\n >>>\n >>> a, b, score = aligner.align(a, b, NGrams.similarity, lambda: NGrams(\"-\"))\n >>> print(to_table(list(map(str, a)), list(map(str, b)))) # doctest: +NORMALIZE_WHITESPACE\n - Lorem ipsum - dollar - amat - adipiscing elit\n qui dolorem ipsum quia dolor sit amet consectetur adipisci velit\n\nDocumentation: https://cceh.github.io/super-collator/\n\nPyPi: https://pypi.org/project/super-collator/\n",
"bugtrack_url": null,
"license": "",
"summary": "Collate textual sources with relaxed spelling.",
"version": "0.0.5",
"project_urls": {
"Bug Tracker": "https://github.com/cceh/super-collator/issues",
"Homepage": "https://github.com/cceh/super-collator"
},
"split_keywords": [
"collation",
"collator",
"needleman-wunsch",
"needleman-wunsch-gotoh",
"python"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "9681ea6abace7b271a04a5c47561f62006cbd2c952f8703e3ade00290bdd9143",
"md5": "089c12a8887931d66d06990fd723ecb5",
"sha256": "50b476f0c7980078c5bdc78491b3bf0d9573fc24d57fc2b30e309e1435bdc334"
},
"downloads": -1,
"filename": "super_collator-0.0.5-py3-none-any.whl",
"has_sig": false,
"md5_digest": "089c12a8887931d66d06990fd723ecb5",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.9",
"size": 19657,
"upload_time": "2023-10-09T12:46:14",
"upload_time_iso_8601": "2023-10-09T12:46:14.705115Z",
"url": "https://files.pythonhosted.org/packages/96/81/ea6abace7b271a04a5c47561f62006cbd2c952f8703e3ade00290bdd9143/super_collator-0.0.5-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "817963e2dc885651154f9ecf165400655e749a3d9a208bd5fea006acdf74b0a3",
"md5": "b0f2e0d8a278f374fb67453da7320291",
"sha256": "e0306f48131d70ca7e26dff1b022a2737c0769352b0024e51a8e5c89d333b651"
},
"downloads": -1,
"filename": "super_collator-0.0.5.tar.gz",
"has_sig": false,
"md5_digest": "b0f2e0d8a278f374fb67453da7320291",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.9",
"size": 37100,
"upload_time": "2023-10-09T12:46:17",
"upload_time_iso_8601": "2023-10-09T12:46:17.359459Z",
"url": "https://files.pythonhosted.org/packages/81/79/63e2dc885651154f9ecf165400655e749a3d9a208bd5fea006acdf74b0a3/super_collator-0.0.5.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-10-09 12:46:17",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "cceh",
"github_project": "super-collator",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"requirements": [],
"lcname": "super-collator"
}