# Wiki2Md
An opinionated tool for converting wikipedia HTML into markdown suitable for ingestion by LLMs.
- removes citations (.reference)
- removes ref list (.reflist)
- removes js table headers and footers (.pcs-collapse-table-icon)
- removes metadata like portal lists (.metadata)
- removes flag icons
- optionally removes links
Install the pre-commit hooks with `poetry run pre-commit install` or just run them manually e.g. `poetry run ruff check`
Raw data
{
"_id": null,
"home_page": "https://gitlab.wikimedia.org/repos/future-audiences/wiki2md",
"name": "wiki2md",
"maintainer": null,
"docs_url": null,
"requires_python": "<4.0,>=3.11",
"maintainer_email": null,
"keywords": "wikimedia, api, python",
"author": "Daniel Erenrich",
"author_email": "derenrich@wikimedia.org",
"download_url": "https://files.pythonhosted.org/packages/09/c7/29babbeb0761d76eb6015073b5290119baaad1ac23704ec2832c996004ba/wiki2md-0.1.3.tar.gz",
"platform": null,
"description": "# Wiki2Md\n\nAn opinionated tool for converting wikipedia HTML into markdown suitable for ingestion by LLMs.\n\n- removes citations (.reference)\n- removes ref list (.reflist)\n- removes js table headers and footers (.pcs-collapse-table-icon)\n- removes metadata like portal lists (.metadata)\n- removes flag icons\n- optionally removes links\n\nInstall the pre-commit hooks with `poetry run pre-commit install` or just run them manually e.g. `poetry run ruff check`\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "python library for converting wikipedia articles to markdown",
"version": "0.1.3",
"project_urls": {
"Homepage": "https://gitlab.wikimedia.org/repos/future-audiences/wiki2md",
"Repository": "https://gitlab.wikimedia.org/repos/future-audiences/wiki2md"
},
"split_keywords": [
"wikimedia",
" api",
" python"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "7e019e28369a3e0de55918fc81dbd2a73783b03b98c8d1eb63735245f2610ec7",
"md5": "a57dd9e3909a32f1a69b07004c531e76",
"sha256": "a568873fcf488d2acb18f987f24c8f4a2e92a20cc98f2d459ac3ba16f886bec7"
},
"downloads": -1,
"filename": "wiki2md-0.1.3-py3-none-any.whl",
"has_sig": false,
"md5_digest": "a57dd9e3909a32f1a69b07004c531e76",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "<4.0,>=3.11",
"size": 5926,
"upload_time": "2025-01-01T00:01:42",
"upload_time_iso_8601": "2025-01-01T00:01:42.608973Z",
"url": "https://files.pythonhosted.org/packages/7e/01/9e28369a3e0de55918fc81dbd2a73783b03b98c8d1eb63735245f2610ec7/wiki2md-0.1.3-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "09c729babbeb0761d76eb6015073b5290119baaad1ac23704ec2832c996004ba",
"md5": "62126f578c67b6a60ecb21073ee68c50",
"sha256": "83d28d8d432cb4f69cfd21a4518e07d50c60b87ef167f0835ddf4ca12d2da2b9"
},
"downloads": -1,
"filename": "wiki2md-0.1.3.tar.gz",
"has_sig": false,
"md5_digest": "62126f578c67b6a60ecb21073ee68c50",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "<4.0,>=3.11",
"size": 4835,
"upload_time": "2025-01-01T00:01:43",
"upload_time_iso_8601": "2025-01-01T00:01:43.613932Z",
"url": "https://files.pythonhosted.org/packages/09/c7/29babbeb0761d76eb6015073b5290119baaad1ac23704ec2832c996004ba/wiki2md-0.1.3.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-01-01 00:01:43",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "wiki2md"
}