# Python Document Scanner SDK
This project provides Python bindings for the [Dynamsoft C/C++ Document Scanner SDK v1.x](https://www.dynamsoft.com/document-normalizer/docs/core/introduction/?ver=latest&ver=latest), enabling developers to quickly create document scanner applications for Windows and Linux desktop environments.
> Note: This project is an unofficial, community-maintained Python wrapper for the Dynamsoft Document Normalizer SDK. For those seeking the most reliable and fully-supported solution, Dynamsoft offers an official Python package. Visit the [Dynamsoft Capture Vision Bundle](https://pypi.org/project/dynamsoft-capture-vision-bundle/) page on PyPI for more details.
## About Dynamsoft Capture Vision Bundle
- Activate the SDK with a [30-day FREE trial license](https://www.dynamsoft.com/customer/license/trialLicense/?product=dcv&package=cross-platform).
- Install the SDK via `pip install dynamsoft-capture-vision-bundle`.
### Comparison Table
| Feature | Unofficial Wrapper (Community) | Official Dynamsoft Capture Vision SDK |
| --- | --- | --- |
| Support | Community-driven, best effort | Official support from Dynamsoft |
| Documentation | README only | [Comprehensive Online Documentation](https://www.dynamsoft.com/capture-vision/docs/server/programming/python/?lang=python) |
| API Coverage | Limited | Full API coverage |
|Feature Updates| May lag behind the official SDK | First to receive new features |
| Compatibility | Limited testing across environments| Thoroughly tested across all supported environments|
| OS Support | Windows, Linux | Windows, Linux, **macOS** |
## Supported Python Versions
* Python 3.x
## Dependencies
Install the required dependencies using pip:
```bash
pip install opencv-python
```
## Command-line Usage
- Scan documents from images:
```bash
scandocument -f <file-name> -l <license-key>
```
- Scan documents from a camera video stream:
```bash
scandocument -c 1 -l <license-key>
```
## Quick Start
- Scan documents from an image file:
```python
import argparse
import docscanner
import sys
import numpy as np
import cv2
import time
def showNormalizedImage(name, normalized_image):
mat = docscanner.convertNormalizedImage2Mat(normalized_image)
cv2.imshow(name, mat)
return mat
def process_file(filename, scanner):
image = cv2.imread(filename)
results = scanner.detectMat(image)
for result in results:
x1 = result.x1
y1 = result.y1
x2 = result.x2
y2 = result.y2
x3 = result.x3
y3 = result.y3
x4 = result.x4
y4 = result.y4
normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
showNormalizedImage("Normalized Image", normalized_image)
cv2.drawContours(image, [np.intp([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
cv2.imshow('Document Image', image)
cv2.waitKey(0)
normalized_image.save(str(time.time()) + '.png')
print('Image saved')
def scandocument():
"""
Command-line script for scanning documents from a given image
"""
parser = argparse.ArgumentParser(description='Scan documents from an image file')
parser.add_argument('-f', '--file', help='Path to the image file')
parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')
args = parser.parse_args()
# print(args)
try:
filename = args.file
license = args.license
if filename is None:
parser.print_help()
return
# set license
if license == '':
docscanner.initLicense("LICENSE-KEY")
else:
docscanner.initLicense(license)
# initialize mrz scanner
scanner = docscanner.createInstance()
ret = scanner.setParameters(docscanner.Templates.color)
if filename is not None:
process_file(filename, scanner)
except Exception as err:
print(err)
sys.exit(1)
scandocument()
```

- Scan documents from camera video stream:
```python
import argparse
import docscanner
import sys
import numpy as np
import cv2
import time
g_results = None
g_normalized_images = []
def callback(results):
global g_results
g_results = results
def showNormalizedImage(name, normalized_image):
mat = docscanner.convertNormalizedImage2Mat(normalized_image)
cv2.imshow(name, mat)
return mat
def process_video(scanner):
scanner.addAsyncListener(callback)
cap = cv2.VideoCapture(0)
while True:
ret, image = cap.read()
ch = cv2.waitKey(1)
if ch == 27:
break
elif ch == ord('n'): # normalize image
if g_results != None:
g_normalized_images = []
index = 0
for result in g_results:
x1 = result.x1
y1 = result.y1
x2 = result.x2
y2 = result.y2
x3 = result.x3
y3 = result.y3
x4 = result.x4
y4 = result.y4
normalized_image = scanner.normalizeBuffer(
image, x1, y1, x2, y2, x3, y3, x4, y4)
g_normalized_images.append(
(str(index), normalized_image))
mat = showNormalizedImage(str(index), normalized_image)
index += 1
elif ch == ord('s'): # save image
for data in g_normalized_images:
# cv2.imwrite('images/' + str(time.time()) + '.png', image)
cv2.destroyWindow(data[0])
data[1].save(str(time.time()) + '.png')
print('Image saved')
g_normalized_images = []
if image is not None:
scanner.detectMatAsync(image)
if g_results != None:
for result in g_results:
x1 = result.x1
y1 = result.y1
x2 = result.x2
y2 = result.y2
x3 = result.x3
y3 = result.y3
x4 = result.x4
y4 = result.y4
cv2.drawContours(
image, [np.intp([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)
cv2.putText(image, 'Press "n" to normalize image',
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.putText(image, 'Press "s" to save image', (10, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.putText(image, 'Press "ESC" to exit', (10, 90),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.imshow('Document Scanner', image)
docscanner.initLicense(
"LICENSE-KEY")
scanner = docscanner.createInstance()
ret = scanner.setParameters(docscanner.Templates.color)
process_video(scanner)
```

## API Methods
- `docscanner.initLicense('YOUR-LICENSE-KEY')`: Set the license key.
```python
docscanner.initLicense("LICENSE-KEY")
```
- `docscanner.createInstance()`: Create a Document Scanner instance.
```python
scanner = docscanner.createInstance()
```
- `detectFile(filename)`: Perform edge detection from an image file.
```python
results = scanner.detectFile(<filename>)
```
- `detectMat(Mat image)`: Perform edge detection from an OpenCV Mat.
```python
image = cv2.imread(<filename>)
results = scanner.detectMat(image)
for result in results:
x1 = result.x1
y1 = result.y1
x2 = result.x2
y2 = result.y2
x3 = result.x3
y3 = result.y3
x4 = result.x4
y4 = result.y4
```
- `setParameters(Template)`: Select color, binary, or grayscale template.
```python
scanner.setParameters(docscanner.Templates.color)
```
- `addAsyncListener(callback function)`: Start a native thread to run document scanning tasks asynchronously.
- `detectMatAsync(<opencv mat data>)`: Queue a document scanning task into the native thread.
```python
def callback(results):
for result in results:
print(result.x1)
print(result.y1)
print(result.x2)
print(result.y2)
print(result.x3)
print(result.y3)
print(result.x4)
print(result.y4)
import cv2
image = cv2.imread(<filename>)
scanner.addAsyncListener(callback)
scanner.detectMatAsync(image)
sleep(5)
```
- `normalizeBuffer(mat, x1, y1, x2, y2, x3, y3, x4, y4)`: Perform perspective correction from an OpenCV Mat.
```python
normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)
```
- `normalizeFile(filename, x1, y1, x2, y2, x3, y3, x4, y4)`: Perform perspective correction from an image file.
```python
normalized_image = scanner.normalizeFile(<filename>, x1, y1, x2, y2, x3, y3, x4, y4)
```
- `normalized_image.save(filename)`: Save the normalized image to a file.
```python
normalized_image.save(<filename>)
```
- `normalized_image.recycle()`: Release the memory of the normalized image.
- `clearAsyncListener()`: Stop the native thread and clear the registered Python function.
## How to Build the Python Document Scanner Extension
- Create a source distribution:
```bash
python setup.py sdist
```
- setuptools:
```bash
python setup_setuptools.py build
python setup_setuptools.py develop
```
- Build wheel:
```bash
pip wheel . --verbose
# Or
python setup.py bdist_wheel
```
Raw data
{
"_id": null,
"home_page": "https://github.com/yushulx/python-document-scanner-sdk",
"name": "document-scanner-sdk",
"maintainer": null,
"docs_url": null,
"requires_python": null,
"maintainer_email": null,
"keywords": null,
"author": "yushulx",
"author_email": null,
"download_url": "https://files.pythonhosted.org/packages/69/3a/4592ed53a0dc1cee1ef95fa072255bf078135fa374eb3bf1deab2ca900c9/document-scanner-sdk-1.1.1.tar.gz",
"platform": null,
"description": "# Python Document Scanner SDK \r\nThis project provides Python bindings for the [Dynamsoft C/C++ Document Scanner SDK v1.x](https://www.dynamsoft.com/document-normalizer/docs/core/introduction/?ver=latest&ver=latest), enabling developers to quickly create document scanner applications for Windows and Linux desktop environments.\r\n\r\n> Note: This project is an unofficial, community-maintained Python wrapper for the Dynamsoft Document Normalizer SDK. For those seeking the most reliable and fully-supported solution, Dynamsoft offers an official Python package. Visit the [Dynamsoft Capture Vision Bundle](https://pypi.org/project/dynamsoft-capture-vision-bundle/) page on PyPI for more details.\r\n\r\n## About Dynamsoft Capture Vision Bundle\r\n- Activate the SDK with a [30-day FREE trial license](https://www.dynamsoft.com/customer/license/trialLicense/?product=dcv&package=cross-platform).\r\n- Install the SDK via `pip install dynamsoft-capture-vision-bundle`.\r\n\r\n### Comparison Table\r\n| Feature | Unofficial Wrapper (Community) | Official Dynamsoft Capture Vision SDK |\r\n| --- | --- | --- |\r\n| Support | Community-driven, best effort | Official support from Dynamsoft |\r\n| Documentation | README only | [Comprehensive Online Documentation](https://www.dynamsoft.com/capture-vision/docs/server/programming/python/?lang=python) |\r\n| API Coverage | Limited | Full API coverage |\r\n|Feature Updates| May lag behind the official SDK | First to receive new features |\r\n| Compatibility | Limited testing across environments| Thoroughly tested across all supported environments|\r\n| OS Support | Windows, Linux | Windows, Linux, **macOS** |\r\n\r\n## Supported Python Versions\r\n* Python 3.x\r\n\r\n## Dependencies\r\nInstall the required dependencies using pip:\r\n\r\n```bash \r\npip install opencv-python\r\n```\r\n\r\n## Command-line Usage\r\n- Scan documents from images:\r\n \r\n ```bash\r\n scandocument -f <file-name> -l <license-key>\r\n ```\r\n\r\n- Scan documents from a camera video stream:\r\n \r\n ```bash\r\n scandocument -c 1 -l <license-key>\r\n ```\r\n\r\n## Quick Start\r\n- Scan documents from an image file:\r\n ```python\r\n import argparse\r\n import docscanner\r\n import sys\r\n import numpy as np\r\n import cv2\r\n import time\r\n\r\n def showNormalizedImage(name, normalized_image):\r\n mat = docscanner.convertNormalizedImage2Mat(normalized_image)\r\n cv2.imshow(name, mat)\r\n return mat\r\n\r\n def process_file(filename, scanner):\r\n image = cv2.imread(filename)\r\n results = scanner.detectMat(image)\r\n for result in results:\r\n x1 = result.x1\r\n y1 = result.y1\r\n x2 = result.x2\r\n y2 = result.y2\r\n x3 = result.x3\r\n y3 = result.y3\r\n x4 = result.x4\r\n y4 = result.y4\r\n \r\n normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)\r\n showNormalizedImage(\"Normalized Image\", normalized_image)\r\n cv2.drawContours(image, [np.intp([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)\r\n \r\n cv2.imshow('Document Image', image)\r\n cv2.waitKey(0)\r\n \r\n normalized_image.save(str(time.time()) + '.png')\r\n print('Image saved')\r\n\r\n def scandocument():\r\n \"\"\"\r\n Command-line script for scanning documents from a given image\r\n \"\"\"\r\n parser = argparse.ArgumentParser(description='Scan documents from an image file')\r\n parser.add_argument('-f', '--file', help='Path to the image file')\r\n parser.add_argument('-l', '--license', default='', type=str, help='Set a valid license key')\r\n args = parser.parse_args()\r\n # print(args)\r\n try:\r\n filename = args.file\r\n license = args.license\r\n \r\n if filename is None:\r\n parser.print_help()\r\n return\r\n \r\n # set license\r\n if license == '':\r\n docscanner.initLicense(\"LICENSE-KEY\")\r\n else:\r\n docscanner.initLicense(license)\r\n \r\n # initialize mrz scanner\r\n scanner = docscanner.createInstance()\r\n ret = scanner.setParameters(docscanner.Templates.color)\r\n\r\n if filename is not None:\r\n process_file(filename, scanner)\r\n \r\n except Exception as err:\r\n print(err)\r\n sys.exit(1)\r\n\r\n scandocument()\r\n ```\r\n\r\n \r\n\r\n- Scan documents from camera video stream:\r\n ```python\r\n import argparse\r\n import docscanner\r\n import sys\r\n import numpy as np\r\n import cv2\r\n import time\r\n\r\n g_results = None\r\n g_normalized_images = []\r\n\r\n\r\n def callback(results):\r\n global g_results\r\n g_results = results\r\n\r\n\r\n def showNormalizedImage(name, normalized_image):\r\n mat = docscanner.convertNormalizedImage2Mat(normalized_image)\r\n cv2.imshow(name, mat)\r\n return mat\r\n\r\n\r\n def process_video(scanner):\r\n scanner.addAsyncListener(callback)\r\n\r\n cap = cv2.VideoCapture(0)\r\n while True:\r\n ret, image = cap.read()\r\n\r\n ch = cv2.waitKey(1)\r\n if ch == 27:\r\n break\r\n elif ch == ord('n'): # normalize image\r\n if g_results != None:\r\n g_normalized_images = []\r\n index = 0\r\n for result in g_results:\r\n x1 = result.x1\r\n y1 = result.y1\r\n x2 = result.x2\r\n y2 = result.y2\r\n x3 = result.x3\r\n y3 = result.y3\r\n x4 = result.x4\r\n y4 = result.y4\r\n\r\n normalized_image = scanner.normalizeBuffer(\r\n image, x1, y1, x2, y2, x3, y3, x4, y4)\r\n g_normalized_images.append(\r\n (str(index), normalized_image))\r\n mat = showNormalizedImage(str(index), normalized_image)\r\n index += 1\r\n elif ch == ord('s'): # save image\r\n for data in g_normalized_images:\r\n # cv2.imwrite('images/' + str(time.time()) + '.png', image)\r\n cv2.destroyWindow(data[0])\r\n data[1].save(str(time.time()) + '.png')\r\n print('Image saved')\r\n\r\n g_normalized_images = []\r\n\r\n if image is not None:\r\n scanner.detectMatAsync(image)\r\n\r\n if g_results != None:\r\n for result in g_results:\r\n x1 = result.x1\r\n y1 = result.y1\r\n x2 = result.x2\r\n y2 = result.y2\r\n x3 = result.x3\r\n y3 = result.y3\r\n x4 = result.x4\r\n y4 = result.y4\r\n\r\n cv2.drawContours(\r\n image, [np.intp([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])], 0, (0, 255, 0), 2)\r\n\r\n cv2.putText(image, 'Press \"n\" to normalize image',\r\n (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)\r\n cv2.putText(image, 'Press \"s\" to save image', (10, 60),\r\n cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)\r\n cv2.putText(image, 'Press \"ESC\" to exit', (10, 90),\r\n cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)\r\n cv2.imshow('Document Scanner', image)\r\n\r\n\r\n docscanner.initLicense(\r\n \"LICENSE-KEY\")\r\n\r\n scanner = docscanner.createInstance()\r\n ret = scanner.setParameters(docscanner.Templates.color)\r\n process_video(scanner)\r\n\r\n ```\r\n \r\n \r\n\r\n## API Methods\r\n- `docscanner.initLicense('YOUR-LICENSE-KEY')`: Set the license key.\r\n \r\n ```python\r\n docscanner.initLicense(\"LICENSE-KEY\")\r\n ```\r\n\r\n- `docscanner.createInstance()`: Create a Document Scanner instance.\r\n \r\n ```python\r\n scanner = docscanner.createInstance()\r\n ```\r\n- `detectFile(filename)`: Perform edge detection from an image file.\r\n\r\n ```python\r\n results = scanner.detectFile(<filename>)\r\n ```\r\n- `detectMat(Mat image)`: Perform edge detection from an OpenCV Mat.\r\n ```python\r\n image = cv2.imread(<filename>)\r\n results = scanner.detectMat(image)\r\n for result in results:\r\n x1 = result.x1\r\n y1 = result.y1\r\n x2 = result.x2\r\n y2 = result.y2\r\n x3 = result.x3\r\n y3 = result.y3\r\n x4 = result.x4\r\n y4 = result.y4\r\n ```\r\n\r\n- `setParameters(Template)`: Select color, binary, or grayscale template.\r\n \r\n ```python\r\n scanner.setParameters(docscanner.Templates.color)\r\n ```\r\n\r\n- `addAsyncListener(callback function)`: Start a native thread to run document scanning tasks asynchronously.\r\n- `detectMatAsync(<opencv mat data>)`: Queue a document scanning task into the native thread.\r\n ```python\r\n def callback(results):\r\n for result in results:\r\n print(result.x1)\r\n print(result.y1)\r\n print(result.x2)\r\n print(result.y2)\r\n print(result.x3)\r\n print(result.y3)\r\n print(result.x4)\r\n print(result.y4)\r\n \r\n import cv2\r\n image = cv2.imread(<filename>)\r\n scanner.addAsyncListener(callback)\r\n scanner.detectMatAsync(image)\r\n sleep(5)\r\n ```\r\n\r\n- `normalizeBuffer(mat, x1, y1, x2, y2, x3, y3, x4, y4)`: Perform perspective correction from an OpenCV Mat.\r\n \r\n ```python\r\n normalized_image = scanner.normalizeBuffer(image, x1, y1, x2, y2, x3, y3, x4, y4)\r\n ```\r\n- `normalizeFile(filename, x1, y1, x2, y2, x3, y3, x4, y4)`: Perform perspective correction from an image file.\r\n \r\n ```python\r\n normalized_image = scanner.normalizeFile(<filename>, x1, y1, x2, y2, x3, y3, x4, y4)\r\n ```\r\n- `normalized_image.save(filename)`: Save the normalized image to a file.\r\n ```python\r\n normalized_image.save(<filename>)\r\n ```\r\n- `normalized_image.recycle()`: Release the memory of the normalized image.\r\n- `clearAsyncListener()`: Stop the native thread and clear the registered Python function.\r\n\r\n\r\n## How to Build the Python Document Scanner Extension\r\n- Create a source distribution:\r\n \r\n ```bash\r\n python setup.py sdist\r\n ```\r\n\r\n- setuptools:\r\n \r\n ```bash\r\n python setup_setuptools.py build\r\n python setup_setuptools.py develop \r\n ```\r\n\r\n- Build wheel:\r\n \r\n ```bash\r\n pip wheel . --verbose\r\n # Or\r\n python setup.py bdist_wheel\r\n ```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Document Scanner SDK for document edge detection, border cropping, perspective correction and brightness adjustment",
"version": "1.1.1",
"project_urls": {
"Homepage": "https://github.com/yushulx/python-document-scanner-sdk"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "d9194dfb8c82f74376201f189a0193b9c5297c62557051224b85507e6772dae9",
"md5": "a86ee0778e3eb2c3292b52d45b39dc3c",
"sha256": "23ba23c00107020fca5830e0a7e9d99058669ad291cd54974bcb359350a07823"
},
"downloads": -1,
"filename": "document_scanner_sdk-1.1.1-cp310-cp310-win_amd64.whl",
"has_sig": false,
"md5_digest": "a86ee0778e3eb2c3292b52d45b39dc3c",
"packagetype": "bdist_wheel",
"python_version": "cp310",
"requires_python": null,
"size": 8019647,
"upload_time": "2024-10-15T02:54:37",
"upload_time_iso_8601": "2024-10-15T02:54:37.839855Z",
"url": "https://files.pythonhosted.org/packages/d9/19/4dfb8c82f74376201f189a0193b9c5297c62557051224b85507e6772dae9/document_scanner_sdk-1.1.1-cp310-cp310-win_amd64.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "693a4592ed53a0dc1cee1ef95fa072255bf078135fa374eb3bf1deab2ca900c9",
"md5": "d81c561b1914defb6177317bd631e6b5",
"sha256": "9c9895577b18129abcb3d68cfbc35c25e88501d6c46eff0410fc72ade057256c"
},
"downloads": -1,
"filename": "document-scanner-sdk-1.1.1.tar.gz",
"has_sig": false,
"md5_digest": "d81c561b1914defb6177317bd631e6b5",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 21954551,
"upload_time": "2024-10-15T02:54:40",
"upload_time_iso_8601": "2024-10-15T02:54:40.844770Z",
"url": "https://files.pythonhosted.org/packages/69/3a/4592ed53a0dc1cee1ef95fa072255bf078135fa374eb3bf1deab2ca900c9/document-scanner-sdk-1.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-10-15 02:54:40",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "yushulx",
"github_project": "python-document-scanner-sdk",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "document-scanner-sdk"
}