# Supadata Python SDK
[](https://badge.fury.io/py/supadata)
[](http://opensource.org/licenses/MIT)
The official Python SDK for Supadata.
Get your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.
## Installation
```bash
pip install supadata
```
## Usage
### Initialization
```python
from supadata import Supadata, SupadataError
# Initialize the client
supadata = Supadata(api_key="YOUR_API_KEY")
```
### Transcripts
```python
# Get transcript from any supported platform (YouTube, TikTok, Instagram,Twitter, file URLs)
transcript = supadata.transcript(
url="https://x.com/SpaceX/status/1481651037291225113",
lang="en", # Optional: preferred language
text=True, # Optional: return plain text instead of timestamped chunks
mode="auto" # Optional: "native", "auto", or "generate"
)
# For immediate results
if hasattr(transcript, 'content'):
print(f"Transcript: {transcript.content}")
print(f"Language: {transcript.lang}")
else:
# For async processing (large files)
print(f"Processing started with job ID: {transcript.job_id}")
# Poll for results using existing batch.get_batch_results method
```
### YouTube
```python
# Get YouTube transcript with Spanish language preference
transcript = supadata.youtube.transcript(video_id="dQw4w9WgXcQ", lang="es")
print(f"Got transcript {transcript.content}")
# Translate YouTube transcript to Spanish
translated = supadata.youtube.translate(
video_id="dQw4w9WgXcQ",
lang="es"
)
print(f"Got translated transcript in {translated.lang}")
# Get plain text transcript
text_transcript = supadata.youtube.transcript(
video_id="dQw4w9WgXcQ",
text=True
)
print(text_transcript.content)
# Get Video Metadata
video = supadata.youtube.video(id="https://youtu.be/dQw4w9WgXcQ") # can be url or video id
print(f"Video: {video}")
# Get Channel Metadata
channel = supadata.youtube.channel(id="https://youtube.com/@RickAstleyVEVO") # can be url, channel id, handle
print(f"Channel: {channel}")
# Get video IDs from a YouTube channel
channel_videos = supadata.youtube.channel.videos(
id="RickAstleyVEVO", # can be url, channel id, or handle
type="all", # 'all', 'video', 'short', or 'live'
limit=50
)
print(f"Regular videos: {channel_videos.video_ids}")
print(f"Shorts: {channel_videos.short_ids}")
print(f"Live: {channel_videos.live_ids}")
# Get Playlist metadata
playlist = supadata.youtube.playlist(id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc") # can be url or playlist id
print(f"Playlist: {playlist}")
# Get video IDs from a YouTube playlist
playlist_videos = supadata.youtube.playlist.videos(
id="https://www.youtube.com/playlist?list=PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc", # can be url or playlist id
limit=50
)
print(f"Regular videos: {playlist_videos.video_ids}")
print(f"Shorts: {playlist_videos.short_ids}")
print(f"Live: {playlist_videos.live_ids}")
# Batch Operations
transcript_batch_job = supadata.youtube.transcript.batch(
video_ids=["dQw4w9WgXcQ", "xvFZjo5PgG0"],
# playlist_id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc", # alternatively
# channel_id="UC_9-kyTW8ZkZNDHQJ6FgpwQ", # alternatively
lang="en", # Optional: specify preferred transcript language
limit=100 # Optional: limit for playlist/channel
)
print(f"Started transcript batch job: {transcript_batch_job.job_id}")
# Start a batch job to get video metadata for a playlist
video_batch_job = supadata.youtube.video.batch(
playlist_id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc",
limit=50
)
print(f"Started video metadata batch job: {video_batch_job.job_id}")
# Get the results of a batch job (poll until status is 'completed' or 'failed')
batch_results = supadata.youtube.batch.get_batch_results(job_id=transcript_batch_job.job_id)
print(f"Job status: {batch_results.status}")
print(f"Stats: {batch_results.stats.succeeded}/{batch_results.stats.total} videos processed")
print(f"First result: {batch_results.results[0].video_id if batch_results.results else 'No results yet'}")
```
### Web
```python
# Scrape web content
web_content = supadata.web.scrape("https://supadata.ai")
print(f"Page title: {web_content.name}")
print(f"Page content: {web_content.content}")
# Map website URLs
site_map = supadata.web.map("https://supadata.ai")
print(f"Found {len(site_map.urls)} URLs")
# Start a crawl job
crawl_job = supadata.web.crawl(
url="https://supadata.ai",
limit=100 # Optional: limit the number of pages to crawl
)
print(f"Started crawl job: {crawl_job.job_id}")
# Get crawl results
# This automatically handles pagination and returns all pages
try:
pages = supadata.web.get_crawl_results(job_id=crawl_job.job_id)
for page in pages:
print(f"Crawled page: {page.url}")
print(f"Page title: {page.name}")
print(f"Content: {page.content}")
except SupadataError as e:
print(f"Crawl job failed: {e}")
```
## Error Handling
The SDK uses custom `SupadataError` exceptions that provide structured error information:
```python
from supadata.errors import SupadataError
try:
transcript = supadata.youtube.transcript(video_id="INVALID_ID")
except SupadataError as error:
print(f"Error code: {error.error}")
print(f"Error message: {error.message}")
print(f"Error details: {error.details}")
if error.documentation_url:
print(f"Documentation: {error.documentation_url}")
```
## API Reference
See the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.
## License
MIT
Raw data
{
"_id": null,
"home_page": null,
"name": "supadata",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": null,
"keywords": "ai, api, llm, supadata, transcripts, web-scraping, youtube",
"author": null,
"author_email": "Supadata <support@supadata.ai>",
"download_url": "https://files.pythonhosted.org/packages/e1/4c/f1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3/supadata-1.3.2.tar.gz",
"platform": null,
"description": "# Supadata Python SDK\n\n[](https://badge.fury.io/py/supadata)\n[](http://opensource.org/licenses/MIT)\n\nThe official Python SDK for Supadata.\n\nGet your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.\n\n## Installation\n\n```bash\npip install supadata\n```\n\n## Usage\n\n### Initialization\n\n```python\nfrom supadata import Supadata, SupadataError\n\n# Initialize the client\nsupadata = Supadata(api_key=\"YOUR_API_KEY\")\n```\n\n### Transcripts\n\n```python\n# Get transcript from any supported platform (YouTube, TikTok, Instagram,Twitter, file URLs)\ntranscript = supadata.transcript(\n url=\"https://x.com/SpaceX/status/1481651037291225113\",\n lang=\"en\", # Optional: preferred language\n text=True, # Optional: return plain text instead of timestamped chunks\n mode=\"auto\" # Optional: \"native\", \"auto\", or \"generate\"\n)\n\n# For immediate results\nif hasattr(transcript, 'content'):\n print(f\"Transcript: {transcript.content}\")\n print(f\"Language: {transcript.lang}\")\nelse:\n # For async processing (large files)\n print(f\"Processing started with job ID: {transcript.job_id}\")\n # Poll for results using existing batch.get_batch_results method\n```\n\n### YouTube\n\n```python\n# Get YouTube transcript with Spanish language preference\ntranscript = supadata.youtube.transcript(video_id=\"dQw4w9WgXcQ\", lang=\"es\")\nprint(f\"Got transcript {transcript.content}\")\n\n# Translate YouTube transcript to Spanish\ntranslated = supadata.youtube.translate(\n video_id=\"dQw4w9WgXcQ\",\n lang=\"es\"\n)\nprint(f\"Got translated transcript in {translated.lang}\")\n\n# Get plain text transcript\ntext_transcript = supadata.youtube.transcript(\n video_id=\"dQw4w9WgXcQ\",\n text=True\n)\nprint(text_transcript.content)\n\n# Get Video Metadata\nvideo = supadata.youtube.video(id=\"https://youtu.be/dQw4w9WgXcQ\") # can be url or video id\nprint(f\"Video: {video}\")\n\n# Get Channel Metadata\nchannel = supadata.youtube.channel(id=\"https://youtube.com/@RickAstleyVEVO\") # can be url, channel id, handle\nprint(f\"Channel: {channel}\")\n\n# Get video IDs from a YouTube channel\nchannel_videos = supadata.youtube.channel.videos(\n id=\"RickAstleyVEVO\", # can be url, channel id, or handle\n type=\"all\", # 'all', 'video', 'short', or 'live'\n limit=50\n)\nprint(f\"Regular videos: {channel_videos.video_ids}\")\nprint(f\"Shorts: {channel_videos.short_ids}\")\nprint(f\"Live: {channel_videos.live_ids}\")\n\n# Get Playlist metadata\nplaylist = supadata.youtube.playlist(id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\") # can be url or playlist id\nprint(f\"Playlist: {playlist}\")\n\n# Get video IDs from a YouTube playlist\nplaylist_videos = supadata.youtube.playlist.videos(\n id=\"https://www.youtube.com/playlist?list=PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\", # can be url or playlist id\n limit=50\n)\nprint(f\"Regular videos: {playlist_videos.video_ids}\")\nprint(f\"Shorts: {playlist_videos.short_ids}\")\nprint(f\"Live: {playlist_videos.live_ids}\")\n\n# Batch Operations\ntranscript_batch_job = supadata.youtube.transcript.batch(\n video_ids=[\"dQw4w9WgXcQ\", \"xvFZjo5PgG0\"],\n # playlist_id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\", # alternatively\n # channel_id=\"UC_9-kyTW8ZkZNDHQJ6FgpwQ\", # alternatively\n lang=\"en\", # Optional: specify preferred transcript language\n limit=100 # Optional: limit for playlist/channel\n)\nprint(f\"Started transcript batch job: {transcript_batch_job.job_id}\")\n\n# Start a batch job to get video metadata for a playlist\nvideo_batch_job = supadata.youtube.video.batch(\n playlist_id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\",\n limit=50\n)\nprint(f\"Started video metadata batch job: {video_batch_job.job_id}\")\n\n# Get the results of a batch job (poll until status is 'completed' or 'failed')\nbatch_results = supadata.youtube.batch.get_batch_results(job_id=transcript_batch_job.job_id)\nprint(f\"Job status: {batch_results.status}\")\nprint(f\"Stats: {batch_results.stats.succeeded}/{batch_results.stats.total} videos processed\")\nprint(f\"First result: {batch_results.results[0].video_id if batch_results.results else 'No results yet'}\")\n```\n\n### Web\n\n```python\n# Scrape web content\nweb_content = supadata.web.scrape(\"https://supadata.ai\")\nprint(f\"Page title: {web_content.name}\")\nprint(f\"Page content: {web_content.content}\")\n\n# Map website URLs\nsite_map = supadata.web.map(\"https://supadata.ai\")\nprint(f\"Found {len(site_map.urls)} URLs\")\n\n# Start a crawl job\ncrawl_job = supadata.web.crawl(\n url=\"https://supadata.ai\",\n limit=100 # Optional: limit the number of pages to crawl\n)\nprint(f\"Started crawl job: {crawl_job.job_id}\")\n\n# Get crawl results\n# This automatically handles pagination and returns all pages\ntry:\n pages = supadata.web.get_crawl_results(job_id=crawl_job.job_id)\n for page in pages:\n print(f\"Crawled page: {page.url}\")\n print(f\"Page title: {page.name}\")\n print(f\"Content: {page.content}\")\nexcept SupadataError as e:\n print(f\"Crawl job failed: {e}\")\n```\n\n## Error Handling\n\nThe SDK uses custom `SupadataError` exceptions that provide structured error information:\n\n```python\nfrom supadata.errors import SupadataError\n\ntry:\n transcript = supadata.youtube.transcript(video_id=\"INVALID_ID\")\nexcept SupadataError as error:\n print(f\"Error code: {error.error}\")\n print(f\"Error message: {error.message}\")\n print(f\"Error details: {error.details}\")\n if error.documentation_url:\n print(f\"Documentation: {error.documentation_url}\")\n```\n\n## API Reference\n\nSee the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.\n\n## License\n\nMIT\n",
"bugtrack_url": null,
"license": null,
"summary": "The official Python SDK for Supadata - scrape web and YouTube content with ease",
"version": "1.3.2",
"project_urls": {
"documentation": "https://supadata.ai/documentation",
"homepage": "https://supadata.ai",
"repository": "https://github.com/supadata/py"
},
"split_keywords": [
"ai",
" api",
" llm",
" supadata",
" transcripts",
" web-scraping",
" youtube"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "bf51cfb2181d6ad168bea142e560d5e41b6af55efb7a977c4e809811d388121c",
"md5": "30fc19b5195785529ce92f7711e0478c",
"sha256": "264147474c4676c382a0193e1b208e14d5ab77e11ae976dc48f01ce2e94c9a77"
},
"downloads": -1,
"filename": "supadata-1.3.2-py3-none-any.whl",
"has_sig": false,
"md5_digest": "30fc19b5195785529ce92f7711e0478c",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 14575,
"upload_time": "2025-08-14T07:52:38",
"upload_time_iso_8601": "2025-08-14T07:52:38.876902Z",
"url": "https://files.pythonhosted.org/packages/bf/51/cfb2181d6ad168bea142e560d5e41b6af55efb7a977c4e809811d388121c/supadata-1.3.2-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "e14cf1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3",
"md5": "6593ed0b90b76182ede52a3484519c84",
"sha256": "41478d315381df0b966370f8e0613d3e610d86666b253ec1fa4f5a3ee7a6c4cc"
},
"downloads": -1,
"filename": "supadata-1.3.2.tar.gz",
"has_sig": false,
"md5_digest": "6593ed0b90b76182ede52a3484519c84",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.7",
"size": 16869,
"upload_time": "2025-08-14T07:52:39",
"upload_time_iso_8601": "2025-08-14T07:52:39.832279Z",
"url": "https://files.pythonhosted.org/packages/e1/4c/f1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3/supadata-1.3.2.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-14 07:52:39",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "supadata",
"github_project": "py",
"github_not_found": true,
"lcname": "supadata"
}