# PII Scanner
A Python library designed for text processing using SpaCy and custom regex pattern matching. This library is capable of processing a variety of text data formats, such as lists, plain text, PDFs, JSON, CSV, and XLSX files
## Installation
```bash
pip install pii_scanner
```
## Usage
```bash
import asyncio
from pii_scanner.scanner import PIIScanner
from pii_scanner.constants.patterns_countries import Regions
async def run_scan():
# Start the timer
start_time = time.time()
pii_scanner = PIIScanner()
# file_path = 'dummy-pii/test.json'
file_path = 'dummy-pii/test.xlsx'
data = ['Ankit Gupta', '+919140562125', 'Indian']
results_list_data = await pii_scanner.scan(data=, sample_size=0.005, region=Regions.IN)
# results_file_data = await pii_scanner.scan(file_path=file_path, sample_size=0.005, region=Regions.IN)
print("Results:", results_list_data, results_list_data)
# Run the asynchronous scan
asyncio.run(run_scan())
```
## Output
```bash
[
{
"text": "Ankit Gupta",
"entity_detected": [
{"type": "PERSON", "start": 0, "end": 11, "score": 0.85}
]
},
{
"text": "+919140562195",
"entity_detected": [
{"type": "PHONE_NUMBER", "start": 0, "end": 13, "score": 0.85}
]
},
{
"text": "Indian",
"entity_detected": [
{"type": "NATIONALITY", "start": 0, "end": 6, "score": 0.9}
]
}
]
```
Raw data
{
"_id": null,
"home_page": "https://github.com/devankit01/pii_scanner",
"name": "pii-Scanner",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": null,
"keywords": null,
"author": "Ankit Gupta",
"author_email": "devankitgupta01@gmail.com",
"download_url": null,
"platform": null,
"description": "# PII Scanner\n\nA Python library designed for text processing using SpaCy and custom regex pattern matching. This library is capable of processing a variety of text data formats, such as lists, plain text, PDFs, JSON, CSV, and XLSX files\n\n## Installation\n\n```bash\npip install pii_scanner\n```\n\n## Usage \n\n```bash\nimport asyncio\nfrom pii_scanner.scanner import PIIScanner\nfrom pii_scanner.constants.patterns_countries import Regions\n\nasync def run_scan():\n # Start the timer\n start_time = time.time()\n\n pii_scanner = PIIScanner()\n # file_path = 'dummy-pii/test.json' \n file_path = 'dummy-pii/test.xlsx' \n\n data = ['Ankit Gupta', '+919140562125', 'Indian']\n results_list_data = await pii_scanner.scan(data=, sample_size=0.005, region=Regions.IN)\n # results_file_data = await pii_scanner.scan(file_path=file_path, sample_size=0.005, region=Regions.IN)\n\n print(\"Results:\", results_list_data, results_list_data)\n\n# Run the asynchronous scan\nasyncio.run(run_scan())\n\n\n```\n\n\n## Output \n\n```bash\n[\n {\n \"text\": \"Ankit Gupta\",\n \"entity_detected\": [\n {\"type\": \"PERSON\", \"start\": 0, \"end\": 11, \"score\": 0.85}\n ]\n },\n {\n \"text\": \"+919140562195\",\n \"entity_detected\": [\n {\"type\": \"PHONE_NUMBER\", \"start\": 0, \"end\": 13, \"score\": 0.85}\n ]\n },\n {\n \"text\": \"Indian\",\n \"entity_detected\": [\n {\"type\": \"NATIONALITY\", \"start\": 0, \"end\": 6, \"score\": 0.9}\n ]\n }\n]\n\n\n```\n\n\n",
"bugtrack_url": null,
"license": null,
"summary": "A library for scanning Personally Identifiable Information (PII).",
"version": "0.1.10",
"project_urls": {
"Homepage": "https://github.com/devankit01/pii_scanner"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "f9578df7a044570393fd153322b1887743283b1b2423440a9693136d15d7f2ff",
"md5": "7df17292e17be62a7b18de1a99c49936",
"sha256": "dc4c44e9ba743c9086ec62d1c54b0f4f4bc69515e475bc0138bfa1e187281368"
},
"downloads": -1,
"filename": "pii_Scanner-0.1.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "7df17292e17be62a7b18de1a99c49936",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 22204,
"upload_time": "2024-12-24T11:21:32",
"upload_time_iso_8601": "2024-12-24T11:21:32.981049Z",
"url": "https://files.pythonhosted.org/packages/f9/57/8df7a044570393fd153322b1887743283b1b2423440a9693136d15d7f2ff/pii_Scanner-0.1.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-12-24 11:21:32",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "devankit01",
"github_project": "pii_scanner",
"github_not_found": true,
"lcname": "pii-scanner"
}