# markitdown-office-extension
A markitdown plugin use to customize markdownif. Supports docx, pptx, xlsx, epub, and html file formats.
## Usage
```python
from typing import Any, Optional
from io import BytesIO
from markitdown import MarkItDown
from markitdown_office_extension.markdown_converter import MarkdownConverter
class CustomMarkdownConverter(MarkdownConverter):
def convert_img(
self,
el: Any,
text: str,
convert_as_inline: Optional[bool] = False,
**kwargs,
) -> str:
if (src := el.attrs.get("src", None)) is not None:
# process extracted image such as upload to s3
# in example, we print image attr only
print("image alt: {alt}, title: {title}, src: {src}".format(
alt=el.attrs.get("alt", ""),
title=el.attrs.get("title", ""),
src=src,
))
# ... or modify image attr such as `src`
el.attrs["src"] = "https://example.com/assets/example.png"
# if not set keep_data_uris, or keep_data_uris is False,
# markitdown won't display whole image uri
kwargs["keep_data_uris"] = True
return super().convert_img(el, text, convert_as_inline, **kwargs)
converter = MarkItDown(enable_plugins=True, markdownify=CustomMarkdownConverter)
document = converter.convert(BytesIO(bytes(
"",
encoding="utf-8"
)))
print(document) # 
```
Raw data
{
"_id": null,
"home_page": null,
"name": "markitdown-office-extension",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": "markdown, markitdown, plugin",
"author": null,
"author_email": "qzwxsaedc <me@qzwxsaedc.com>",
"download_url": "https://files.pythonhosted.org/packages/5c/f1/6fb45633994a58809dfb479a7b9b3950e40b247413e899576b68bbcf90fa/markitdown_office_extension-0.1.1.tar.gz",
"platform": null,
"description": "# markitdown-office-extension\nA markitdown plugin use to customize markdownif. Supports docx, pptx, xlsx, epub, and html file formats.\n\n## Usage\n```python\nfrom typing import Any, Optional\nfrom io import BytesIO\n\nfrom markitdown import MarkItDown\nfrom markitdown_office_extension.markdown_converter import MarkdownConverter\n\n\nclass CustomMarkdownConverter(MarkdownConverter):\n def convert_img(\n self,\n el: Any,\n text: str,\n convert_as_inline: Optional[bool] = False,\n **kwargs,\n ) -> str:\n if (src := el.attrs.get(\"src\", None)) is not None:\n # process extracted image such as upload to s3\n # in example, we print image attr only\n print(\"image alt: {alt}, title: {title}, src: {src}\".format(\n alt=el.attrs.get(\"alt\", \"\"),\n title=el.attrs.get(\"title\", \"\"),\n src=src,\n ))\n\n # ... or modify image attr such as `src`\n el.attrs[\"src\"] = \"https://example.com/assets/example.png\"\n\n # if not set keep_data_uris, or keep_data_uris is False,\n # markitdown won't display whole image uri\n kwargs[\"keep_data_uris\"] = True\n\n return super().convert_img(el, text, convert_as_inline, **kwargs)\n\nconverter = MarkItDown(enable_plugins=True, markdownify=CustomMarkdownConverter)\ndocument = converter.convert(BytesIO(bytes(\n \"\",\n encoding=\"utf-8\"\n)))\n\nprint(document) # \n```",
"bugtrack_url": null,
"license": null,
"summary": "A markitdown plugin use to customize markdownify",
"version": "0.1.1",
"project_urls": {
"Homepage": "https://github.com/qzwxsaedc/markitdown-office-extension",
"Issues": "https://github.com/qzwxsaedc/markitdown-office-extension/issues",
"Repository": "https://github.com/qzwxsaedc/markitdown-office-extension"
},
"split_keywords": [
"markdown",
" markitdown",
" plugin"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "553fe05eac78ecf3a6a4daa167c5beff8f5dee7e3e03b656a260f1c0833cb216",
"md5": "7d7aa6a904fa40e4d2ebef862e791d86",
"sha256": "5fc5558f7abc4b3b8c17066295447b091db529979c634ad964697afe4e95c0e3"
},
"downloads": -1,
"filename": "markitdown_office_extension-0.1.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "7d7aa6a904fa40e4d2ebef862e791d86",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 7033,
"upload_time": "2025-08-09T19:27:14",
"upload_time_iso_8601": "2025-08-09T19:27:14.690973Z",
"url": "https://files.pythonhosted.org/packages/55/3f/e05eac78ecf3a6a4daa167c5beff8f5dee7e3e03b656a260f1c0833cb216/markitdown_office_extension-0.1.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "5cf16fb45633994a58809dfb479a7b9b3950e40b247413e899576b68bbcf90fa",
"md5": "abd90eb3fb5674d98433ae4169bf28be",
"sha256": "4b354888bba5a81b18a7c0f00fba007a10021e5eb0eca024113d8a4270a88f35"
},
"downloads": -1,
"filename": "markitdown_office_extension-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "abd90eb3fb5674d98433ae4169bf28be",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 33598,
"upload_time": "2025-08-09T19:27:15",
"upload_time_iso_8601": "2025-08-09T19:27:15.832160Z",
"url": "https://files.pythonhosted.org/packages/5c/f1/6fb45633994a58809dfb479a7b9b3950e40b247413e899576b68bbcf90fa/markitdown_office_extension-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-09 19:27:15",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "qzwxsaedc",
"github_project": "markitdown-office-extension",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "markitdown-office-extension"
}