# Spider MCP Client
[](https://badge.fury.io/py/spider-mcp-client)
[](https://pypi.org/project/spider-mcp-client/)
[](https://opensource.org/licenses/MIT)
Official Python client for **Spider MCP** - a professional web scraping API with advanced anti-detection capabilities.
## 🚀 Quick Start
### Installation
```bash
pip install spider-mcp-client
```
### Basic Usage
```python
from spider_mcp_client import SpiderMCPClient
# Initialize client
client = SpiderMCPClient(
api_key="your-api-key-here",
base_url="http://localhost:8003" # Your Spider MCP server
)
# Parse a URL
result = client.parse_url("https://example.com/article")
print(f"Title: {result['title']}")
print(f"Content: {result['content'][:200]}...")
print(f"Images: {len(result.get('images', []))}")
```
## 📋 Features
- ✅ **Simple API** - One method to parse any supported URL
- ✅ **Built-in retry logic** - Automatic retries with exponential backoff
- ✅ **Rate limiting** - Respectful delays between requests
- ✅ **Error handling** - Clear exceptions for different error types
- ✅ **Image support** - Optional image download and localization
- ✅ **Session isolation** - Multiple isolated browser sessions
- ✅ **Type hints** - Full typing support for better IDE experience
## 🔧 API Reference
### SpiderMCPClient
```python
client = SpiderMCPClient(
api_key="your-api-key", # Required: Your API key
base_url="http://localhost:8003", # Spider MCP server URL
timeout=30, # Request timeout (seconds)
max_retries=3, # Max retry attempts
rate_limit_delay=1.0 # Delay between requests (seconds)
)
```
### parse_url()
```python
result = client.parse_url(
url="https://example.com/article", # Required: URL to parse
download_images=False, # Optional: Download images
app_name="my-app" # Optional: Session isolation
)
```
**Returns:**
```python
{
"title": "Article Title",
"content": "Full article content...",
"author": "Author Name",
"publish_date": "2025-01-17",
"images": ["http://localhost:8003/downloaded_images/image1.jpg"],
"url": "https://example.com/article",
"parser_info": {
"site_name": "example.com",
"url_name": "article_parser"
}
}
```
## 📖 Examples
### Basic Article Parsing
```python
from spider_mcp_client import SpiderMCPClient
client = SpiderMCPClient(api_key="sk-1234567890abcdef")
# Parse a news article
result = client.parse_url("https://techcrunch.com/2025/01/17/ai-news")
if result:
print(f"📰 {result['title']}")
print(f"✍️ {result.get('author', 'Unknown')}")
print(f"📅 {result.get('publish_date', 'Unknown')}")
print(f"📝 Content: {len(result.get('content', ''))} characters")
```
### With Image Download
```python
# Parse with image download
result = client.parse_url(
url="https://news-site.com/photo-story",
download_images=True
)
print(f"Downloaded {len(result.get('images', []))} images:")
for img_url in result.get('images', []):
print(f" 🖼️ {img_url}")
```
### Error Handling
```python
from spider_mcp_client import (
SpiderMCPClient,
ParserNotFoundError,
AuthenticationError
)
client = SpiderMCPClient(api_key="your-api-key")
try:
result = client.parse_url("https://unsupported-site.com/article")
print(f"Success: {result['title']}")
except ParserNotFoundError:
print("❌ No parser available for this website")
except AuthenticationError:
print("❌ Invalid API key")
except Exception as e:
print(f"❌ Error: {e}")
```
### Batch Processing
```python
import time
from spider_mcp_client import SpiderMCPClient
def batch_parse(urls, api_key, delay=2):
"""Parse multiple URLs with delays"""
client = SpiderMCPClient(api_key=api_key, rate_limit_delay=delay)
results = []
for url in urls:
try:
print(f"Parsing: {url}")
result = client.parse_url(url)
results.append({
'url': url,
'title': result.get('title'),
'success': True
})
except Exception as e:
print(f"Failed {url}: {e}")
results.append({
'url': url,
'error': str(e),
'success': False
})
return results
# Usage
urls = [
"https://site1.com/article1",
"https://site2.com/article2",
"https://site3.com/article3"
]
results = batch_parse(urls, "your-api-key")
successful = [r for r in results if r['success']]
print(f"✅ Successfully parsed: {len(successful)}/{len(urls)} URLs")
```
### Context Manager
```python
# Automatic cleanup with context manager
with SpiderMCPClient(api_key="your-api-key") as client:
result = client.parse_url("https://example.com/article")
print(f"Title: {result['title']}")
# Session automatically closed
```
### Check Parser Availability
```python
# Check if parser exists before parsing
parser_info = client.check_parser("https://target-site.com/article")
if parser_info.get('found'):
print(f"✅ Parser available: {parser_info['parser']['site_name']}")
result = client.parse_url("https://target-site.com/article")
else:
print("❌ No parser found for this URL")
```
## 🚨 Exception Types
```python
from spider_mcp_client import (
SpiderMCPError, # Base exception
AuthenticationError, # Invalid API key
ParserNotFoundError, # No parser for URL
RateLimitError, # Rate limit exceeded
ServerError, # Server error (5xx)
TimeoutError, # Request timeout
ConnectionError # Connection failed
)
```
## 🔑 Getting Your API Key
1. **Start Spider MCP server:**
```bash
# On your Spider MCP server
./restart.sh
```
2. **Visit admin interface:**
```
http://localhost:8003/admin/users
```
3. **Create/view user and copy API key**
## 🌐 Server Requirements
This client requires a running **Spider MCP server**. The server provides:
- ✅ **Custom parsers** for each website
- ✅ **Undetected ChromeDriver** for Cloudflare bypass
- ✅ **Professional anti-detection** capabilities
- ✅ **Image processing** and localization
- ✅ **Session management** and isolation
## 📚 Advanced Usage
### Custom Session Names
```python
# Use different sessions for different applications
client = SpiderMCPClient(api_key="your-api-key")
# Session for news parsing
news_result = client.parse_url(
"https://news-site.com/article",
app_name="news-parser"
)
# Session for e-commerce parsing
product_result = client.parse_url(
"https://shop-site.com/product",
app_name="product-parser"
)
```
### Configuration
```python
# Production configuration
client = SpiderMCPClient(
api_key="your-api-key",
base_url="https://your-spider-mcp-server.com",
timeout=60, # Longer timeout for complex pages
max_retries=5, # More retries for reliability
rate_limit_delay=2.0 # Slower rate for respectful scraping
)
```
## 🤝 Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
## 📄 License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
## 🔗 Links
- **PyPI Package:** https://pypi.org/project/spider-mcp-client/
- **GitHub Repository:** https://github.com/spider-mcp/spider-mcp-client
- **Documentation:** https://spider-mcp.readthedocs.io/
- **Spider MCP Server:** https://github.com/spider-mcp/spider-mcp
---
**Made with ❤️ by the Spider MCP Team**
Raw data
{
"_id": null,
"home_page": "https://github.com/spider-mcp/spider-mcp-client",
"name": "spider-mcp-client",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": "Spider MCP Team <support@spider-mcp.com>",
"keywords": "web scraping, spider, mcp, api client, html parsing, data extraction",
"author": "Spider MCP Team",
"author_email": "Spider MCP Team <support@spider-mcp.com>",
"download_url": "https://files.pythonhosted.org/packages/49/b4/a91531e5f77b46816c9ca0555e019d918d2e9e0d00e687f0d16ed39e7fbb/spider_mcp_client-0.1.2.tar.gz",
"platform": null,
"description": "# Spider MCP Client\n\n[](https://badge.fury.io/py/spider-mcp-client)\n[](https://pypi.org/project/spider-mcp-client/)\n[](https://opensource.org/licenses/MIT)\n\nOfficial Python client for **Spider MCP** - a professional web scraping API with advanced anti-detection capabilities.\n\n## \ud83d\ude80 Quick Start\n\n### Installation\n\n```bash\npip install spider-mcp-client\n```\n\n### Basic Usage\n\n```python\nfrom spider_mcp_client import SpiderMCPClient\n\n# Initialize client\nclient = SpiderMCPClient(\n api_key=\"your-api-key-here\",\n base_url=\"http://localhost:8003\" # Your Spider MCP server\n)\n\n# Parse a URL\nresult = client.parse_url(\"https://example.com/article\")\n\nprint(f\"Title: {result['title']}\")\nprint(f\"Content: {result['content'][:200]}...\")\nprint(f\"Images: {len(result.get('images', []))}\")\n```\n\n## \ud83d\udccb Features\n\n- \u2705 **Simple API** - One method to parse any supported URL\n- \u2705 **Built-in retry logic** - Automatic retries with exponential backoff\n- \u2705 **Rate limiting** - Respectful delays between requests\n- \u2705 **Error handling** - Clear exceptions for different error types\n- \u2705 **Image support** - Optional image download and localization\n- \u2705 **Session isolation** - Multiple isolated browser sessions\n- \u2705 **Type hints** - Full typing support for better IDE experience\n\n## \ud83d\udd27 API Reference\n\n### SpiderMCPClient\n\n```python\nclient = SpiderMCPClient(\n api_key=\"your-api-key\", # Required: Your API key\n base_url=\"http://localhost:8003\", # Spider MCP server URL\n timeout=30, # Request timeout (seconds)\n max_retries=3, # Max retry attempts\n rate_limit_delay=1.0 # Delay between requests (seconds)\n)\n```\n\n### parse_url()\n\n```python\nresult = client.parse_url(\n url=\"https://example.com/article\", # Required: URL to parse\n download_images=False, # Optional: Download images\n app_name=\"my-app\" # Optional: Session isolation\n)\n```\n\n**Returns:**\n```python\n{\n \"title\": \"Article Title\",\n \"content\": \"Full article content...\",\n \"author\": \"Author Name\",\n \"publish_date\": \"2025-01-17\",\n \"images\": [\"http://localhost:8003/downloaded_images/image1.jpg\"],\n \"url\": \"https://example.com/article\",\n \"parser_info\": {\n \"site_name\": \"example.com\",\n \"url_name\": \"article_parser\"\n }\n}\n```\n\n## \ud83d\udcd6 Examples\n\n### Basic Article Parsing\n\n```python\nfrom spider_mcp_client import SpiderMCPClient\n\nclient = SpiderMCPClient(api_key=\"sk-1234567890abcdef\")\n\n# Parse a news article\nresult = client.parse_url(\"https://techcrunch.com/2025/01/17/ai-news\")\n\nif result:\n print(f\"\ud83d\udcf0 {result['title']}\")\n print(f\"\u270d\ufe0f {result.get('author', 'Unknown')}\")\n print(f\"\ud83d\udcc5 {result.get('publish_date', 'Unknown')}\")\n print(f\"\ud83d\udcdd Content: {len(result.get('content', ''))} characters\")\n```\n\n### With Image Download\n\n```python\n# Parse with image download\nresult = client.parse_url(\n url=\"https://news-site.com/photo-story\",\n download_images=True\n)\n\nprint(f\"Downloaded {len(result.get('images', []))} images:\")\nfor img_url in result.get('images', []):\n print(f\" \ud83d\uddbc\ufe0f {img_url}\")\n```\n\n### Error Handling\n\n```python\nfrom spider_mcp_client import (\n SpiderMCPClient, \n ParserNotFoundError, \n AuthenticationError\n)\n\nclient = SpiderMCPClient(api_key=\"your-api-key\")\n\ntry:\n result = client.parse_url(\"https://unsupported-site.com/article\")\n print(f\"Success: {result['title']}\")\n \nexcept ParserNotFoundError:\n print(\"\u274c No parser available for this website\")\n \nexcept AuthenticationError:\n print(\"\u274c Invalid API key\")\n \nexcept Exception as e:\n print(f\"\u274c Error: {e}\")\n```\n\n### Batch Processing\n\n```python\nimport time\nfrom spider_mcp_client import SpiderMCPClient\n\ndef batch_parse(urls, api_key, delay=2):\n \"\"\"Parse multiple URLs with delays\"\"\"\n client = SpiderMCPClient(api_key=api_key, rate_limit_delay=delay)\n results = []\n \n for url in urls:\n try:\n print(f\"Parsing: {url}\")\n result = client.parse_url(url)\n results.append({\n 'url': url,\n 'title': result.get('title'),\n 'success': True\n })\n except Exception as e:\n print(f\"Failed {url}: {e}\")\n results.append({\n 'url': url,\n 'error': str(e),\n 'success': False\n })\n \n return results\n\n# Usage\nurls = [\n \"https://site1.com/article1\",\n \"https://site2.com/article2\", \n \"https://site3.com/article3\"\n]\n\nresults = batch_parse(urls, \"your-api-key\")\nsuccessful = [r for r in results if r['success']]\nprint(f\"\u2705 Successfully parsed: {len(successful)}/{len(urls)} URLs\")\n```\n\n### Context Manager\n\n```python\n# Automatic cleanup with context manager\nwith SpiderMCPClient(api_key=\"your-api-key\") as client:\n result = client.parse_url(\"https://example.com/article\")\n print(f\"Title: {result['title']}\")\n# Session automatically closed\n```\n\n### Check Parser Availability\n\n```python\n# Check if parser exists before parsing\nparser_info = client.check_parser(\"https://target-site.com/article\")\n\nif parser_info.get('found'):\n print(f\"\u2705 Parser available: {parser_info['parser']['site_name']}\")\n result = client.parse_url(\"https://target-site.com/article\")\nelse:\n print(\"\u274c No parser found for this URL\")\n```\n\n## \ud83d\udea8 Exception Types\n\n```python\nfrom spider_mcp_client import (\n SpiderMCPError, # Base exception\n AuthenticationError, # Invalid API key\n ParserNotFoundError, # No parser for URL\n RateLimitError, # Rate limit exceeded\n ServerError, # Server error (5xx)\n TimeoutError, # Request timeout\n ConnectionError # Connection failed\n)\n```\n\n## \ud83d\udd11 Getting Your API Key\n\n1. **Start Spider MCP server:**\n ```bash\n # On your Spider MCP server\n ./restart.sh\n ```\n\n2. **Visit admin interface:**\n ```\n http://localhost:8003/admin/users\n ```\n\n3. **Create/view user and copy API key**\n\n## \ud83c\udf10 Server Requirements\n\nThis client requires a running **Spider MCP server**. The server provides:\n\n- \u2705 **Custom parsers** for each website\n- \u2705 **Undetected ChromeDriver** for Cloudflare bypass \n- \u2705 **Professional anti-detection** capabilities\n- \u2705 **Image processing** and localization\n- \u2705 **Session management** and isolation\n\n## \ud83d\udcda Advanced Usage\n\n### Custom Session Names\n\n```python\n# Use different sessions for different applications\nclient = SpiderMCPClient(api_key=\"your-api-key\")\n\n# Session for news parsing\nnews_result = client.parse_url(\n \"https://news-site.com/article\",\n app_name=\"news-parser\"\n)\n\n# Session for e-commerce parsing \nproduct_result = client.parse_url(\n \"https://shop-site.com/product\",\n app_name=\"product-parser\"\n)\n```\n\n### Configuration\n\n```python\n# Production configuration\nclient = SpiderMCPClient(\n api_key=\"your-api-key\",\n base_url=\"https://your-spider-mcp-server.com\",\n timeout=60, # Longer timeout for complex pages\n max_retries=5, # More retries for reliability\n rate_limit_delay=2.0 # Slower rate for respectful scraping\n)\n```\n\n## \ud83e\udd1d Contributing\n\nContributions are welcome! Please feel free to submit a Pull Request.\n\n## \ud83d\udcc4 License\n\nThis project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.\n\n## \ud83d\udd17 Links\n\n- **PyPI Package:** https://pypi.org/project/spider-mcp-client/\n- **GitHub Repository:** https://github.com/spider-mcp/spider-mcp-client\n- **Documentation:** https://spider-mcp.readthedocs.io/\n- **Spider MCP Server:** https://github.com/spider-mcp/spider-mcp\n\n---\n\n**Made with \u2764\ufe0f by the Spider MCP Team**\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Official Python client for Spider MCP web scraping API",
"version": "0.1.2",
"project_urls": {
"Bug Reports": "https://github.com/spider-mcp/spider-mcp-client/issues",
"Changelog": "https://github.com/spider-mcp/spider-mcp-client/blob/main/CHANGELOG.md",
"Documentation": "https://spider-mcp.readthedocs.io/",
"Homepage": "https://github.com/spider-mcp/spider-mcp-client",
"Repository": "https://github.com/spider-mcp/spider-mcp-client"
},
"split_keywords": [
"web scraping",
" spider",
" mcp",
" api client",
" html parsing",
" data extraction"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "4a5bbe849f6cfb188fd7e0c3684d3d3fa755e81561293ce2a36847e03ab4651a",
"md5": "9392062cc64c05afe67c063b89ac63cf",
"sha256": "53da1a4e171fd54cb38707d36425ffef84a72f02250cb080621d5d02244c24e4"
},
"downloads": -1,
"filename": "spider_mcp_client-0.1.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "9392062cc64c05afe67c063b89ac63cf",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.8",
"size": 8631,
"upload_time": "2025-08-24T21:59:59",
"upload_time_iso_8601": "2025-08-24T21:59:59.284155Z",
"url": "https://files.pythonhosted.org/packages/4a/5b/be849f6cfb188fd7e0c3684d3d3fa755e81561293ce2a36847e03ab4651a/spider_mcp_client-0.1.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "49b4a91531e5f77b46816c9ca0555e019d918d2e9e0d00e687f0d16ed39e7fbb",
"md5": "bb053172fc72749c92ed5904e6f7cdae",
"sha256": "f12441b77e3dd43ed8f9dd6be363fb1871d67ab36bf854217abe61a5dd52cd9e"
},
"downloads": -1,
"filename": "spider_mcp_client-0.1.2.tar.gz",
"has_sig": false,
"md5_digest": "bb053172fc72749c92ed5904e6f7cdae",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 16766,
"upload_time": "2025-08-24T22:00:00",
"upload_time_iso_8601": "2025-08-24T22:00:00.501503Z",
"url": "https://files.pythonhosted.org/packages/49/b4/a91531e5f77b46816c9ca0555e019d918d2e9e0d00e687f0d16ed39e7fbb/spider_mcp_client-0.1.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-24 22:00:00",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "spider-mcp",
"github_project": "spider-mcp-client",
"github_not_found": true,
"lcname": "spider-mcp-client"
}