supadata


Namesupadata JSON
Version 1.3.2 PyPI version JSON
download
home_pageNone
SummaryThe official Python SDK for Supadata - scrape web and YouTube content with ease
upload_time2025-08-14 07:52:39
maintainerNone
docs_urlNone
authorNone
requires_python>=3.7
licenseNone
keywords ai api llm supadata transcripts web-scraping youtube
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Supadata Python SDK

[![PyPI version](https://badge.fury.io/py/supadata.svg)](https://badge.fury.io/py/supadata)
[![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](http://opensource.org/licenses/MIT)

The official Python SDK for Supadata.

Get your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.

## Installation

```bash
pip install supadata
```

## Usage

### Initialization

```python
from supadata import Supadata, SupadataError

# Initialize the client
supadata = Supadata(api_key="YOUR_API_KEY")
```

### Transcripts

```python
# Get transcript from any supported platform (YouTube, TikTok, Instagram,Twitter, file URLs)
transcript = supadata.transcript(
    url="https://x.com/SpaceX/status/1481651037291225113",
    lang="en",  # Optional: preferred language
    text=True,  # Optional: return plain text instead of timestamped chunks
    mode="auto"  # Optional: "native", "auto", or "generate"
)

# For immediate results
if hasattr(transcript, 'content'):
    print(f"Transcript: {transcript.content}")
    print(f"Language: {transcript.lang}")
else:
    # For async processing (large files)
    print(f"Processing started with job ID: {transcript.job_id}")
    # Poll for results using existing batch.get_batch_results method
```

### YouTube

```python
# Get YouTube transcript with Spanish language preference
transcript = supadata.youtube.transcript(video_id="dQw4w9WgXcQ", lang="es")
print(f"Got transcript {transcript.content}")

# Translate YouTube transcript to Spanish
translated = supadata.youtube.translate(
    video_id="dQw4w9WgXcQ",
    lang="es"
)
print(f"Got translated transcript in {translated.lang}")

# Get plain text transcript
text_transcript = supadata.youtube.transcript(
    video_id="dQw4w9WgXcQ",
    text=True
)
print(text_transcript.content)

# Get Video Metadata
video = supadata.youtube.video(id="https://youtu.be/dQw4w9WgXcQ") # can be url or video id
print(f"Video: {video}")

# Get Channel Metadata
channel = supadata.youtube.channel(id="https://youtube.com/@RickAstleyVEVO") # can be url, channel id, handle
print(f"Channel: {channel}")

# Get video IDs from a YouTube channel
channel_videos = supadata.youtube.channel.videos(
    id="RickAstleyVEVO",  # can be url, channel id, or handle
    type="all",  # 'all', 'video', 'short', or 'live'
    limit=50
)
print(f"Regular videos: {channel_videos.video_ids}")
print(f"Shorts: {channel_videos.short_ids}")
print(f"Live: {channel_videos.live_ids}")

# Get Playlist metadata
playlist = supadata.youtube.playlist(id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc") # can be url or playlist id
print(f"Playlist: {playlist}")

# Get video IDs from a YouTube playlist
playlist_videos = supadata.youtube.playlist.videos(
    id="https://www.youtube.com/playlist?list=PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc",  # can be url or playlist id
    limit=50
)
print(f"Regular videos: {playlist_videos.video_ids}")
print(f"Shorts: {playlist_videos.short_ids}")
print(f"Live: {playlist_videos.live_ids}")

# Batch Operations
transcript_batch_job = supadata.youtube.transcript.batch(
    video_ids=["dQw4w9WgXcQ", "xvFZjo5PgG0"],
    # playlist_id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc", # alternatively
    # channel_id="UC_9-kyTW8ZkZNDHQJ6FgpwQ", # alternatively
    lang="en",  # Optional: specify preferred transcript language
    limit=100   # Optional: limit for playlist/channel
)
print(f"Started transcript batch job: {transcript_batch_job.job_id}")

# Start a batch job to get video metadata for a playlist
video_batch_job = supadata.youtube.video.batch(
    playlist_id="PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc",
    limit=50
)
print(f"Started video metadata batch job: {video_batch_job.job_id}")

# Get the results of a batch job (poll until status is 'completed' or 'failed')
batch_results = supadata.youtube.batch.get_batch_results(job_id=transcript_batch_job.job_id)
print(f"Job status: {batch_results.status}")
print(f"Stats: {batch_results.stats.succeeded}/{batch_results.stats.total} videos processed")
print(f"First result: {batch_results.results[0].video_id if batch_results.results else 'No results yet'}")
```

### Web

```python
# Scrape web content
web_content = supadata.web.scrape("https://supadata.ai")
print(f"Page title: {web_content.name}")
print(f"Page content: {web_content.content}")

# Map website URLs
site_map = supadata.web.map("https://supadata.ai")
print(f"Found {len(site_map.urls)} URLs")

# Start a crawl job
crawl_job = supadata.web.crawl(
    url="https://supadata.ai",
    limit=100  # Optional: limit the number of pages to crawl
)
print(f"Started crawl job: {crawl_job.job_id}")

# Get crawl results
# This automatically handles pagination and returns all pages
try:
    pages = supadata.web.get_crawl_results(job_id=crawl_job.job_id)
    for page in pages:
        print(f"Crawled page: {page.url}")
        print(f"Page title: {page.name}")
        print(f"Content: {page.content}")
except SupadataError as e:
    print(f"Crawl job failed: {e}")
```

## Error Handling

The SDK uses custom `SupadataError` exceptions that provide structured error information:

```python
from supadata.errors import SupadataError

try:
    transcript = supadata.youtube.transcript(video_id="INVALID_ID")
except SupadataError as error:
    print(f"Error code: {error.error}")
    print(f"Error message: {error.message}")
    print(f"Error details: {error.details}")
    if error.documentation_url:
        print(f"Documentation: {error.documentation_url}")
```

## API Reference

See the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.

## License

MIT

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "supadata",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.7",
    "maintainer_email": null,
    "keywords": "ai, api, llm, supadata, transcripts, web-scraping, youtube",
    "author": null,
    "author_email": "Supadata <support@supadata.ai>",
    "download_url": "https://files.pythonhosted.org/packages/e1/4c/f1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3/supadata-1.3.2.tar.gz",
    "platform": null,
    "description": "# Supadata Python SDK\n\n[![PyPI version](https://badge.fury.io/py/supadata.svg)](https://badge.fury.io/py/supadata)\n[![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](http://opensource.org/licenses/MIT)\n\nThe official Python SDK for Supadata.\n\nGet your free API key at [supadata.ai](https://supadata.ai) and start scraping data in minutes.\n\n## Installation\n\n```bash\npip install supadata\n```\n\n## Usage\n\n### Initialization\n\n```python\nfrom supadata import Supadata, SupadataError\n\n# Initialize the client\nsupadata = Supadata(api_key=\"YOUR_API_KEY\")\n```\n\n### Transcripts\n\n```python\n# Get transcript from any supported platform (YouTube, TikTok, Instagram,Twitter, file URLs)\ntranscript = supadata.transcript(\n    url=\"https://x.com/SpaceX/status/1481651037291225113\",\n    lang=\"en\",  # Optional: preferred language\n    text=True,  # Optional: return plain text instead of timestamped chunks\n    mode=\"auto\"  # Optional: \"native\", \"auto\", or \"generate\"\n)\n\n# For immediate results\nif hasattr(transcript, 'content'):\n    print(f\"Transcript: {transcript.content}\")\n    print(f\"Language: {transcript.lang}\")\nelse:\n    # For async processing (large files)\n    print(f\"Processing started with job ID: {transcript.job_id}\")\n    # Poll for results using existing batch.get_batch_results method\n```\n\n### YouTube\n\n```python\n# Get YouTube transcript with Spanish language preference\ntranscript = supadata.youtube.transcript(video_id=\"dQw4w9WgXcQ\", lang=\"es\")\nprint(f\"Got transcript {transcript.content}\")\n\n# Translate YouTube transcript to Spanish\ntranslated = supadata.youtube.translate(\n    video_id=\"dQw4w9WgXcQ\",\n    lang=\"es\"\n)\nprint(f\"Got translated transcript in {translated.lang}\")\n\n# Get plain text transcript\ntext_transcript = supadata.youtube.transcript(\n    video_id=\"dQw4w9WgXcQ\",\n    text=True\n)\nprint(text_transcript.content)\n\n# Get Video Metadata\nvideo = supadata.youtube.video(id=\"https://youtu.be/dQw4w9WgXcQ\") # can be url or video id\nprint(f\"Video: {video}\")\n\n# Get Channel Metadata\nchannel = supadata.youtube.channel(id=\"https://youtube.com/@RickAstleyVEVO\") # can be url, channel id, handle\nprint(f\"Channel: {channel}\")\n\n# Get video IDs from a YouTube channel\nchannel_videos = supadata.youtube.channel.videos(\n    id=\"RickAstleyVEVO\",  # can be url, channel id, or handle\n    type=\"all\",  # 'all', 'video', 'short', or 'live'\n    limit=50\n)\nprint(f\"Regular videos: {channel_videos.video_ids}\")\nprint(f\"Shorts: {channel_videos.short_ids}\")\nprint(f\"Live: {channel_videos.live_ids}\")\n\n# Get Playlist metadata\nplaylist = supadata.youtube.playlist(id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\") # can be url or playlist id\nprint(f\"Playlist: {playlist}\")\n\n# Get video IDs from a YouTube playlist\nplaylist_videos = supadata.youtube.playlist.videos(\n    id=\"https://www.youtube.com/playlist?list=PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\",  # can be url or playlist id\n    limit=50\n)\nprint(f\"Regular videos: {playlist_videos.video_ids}\")\nprint(f\"Shorts: {playlist_videos.short_ids}\")\nprint(f\"Live: {playlist_videos.live_ids}\")\n\n# Batch Operations\ntranscript_batch_job = supadata.youtube.transcript.batch(\n    video_ids=[\"dQw4w9WgXcQ\", \"xvFZjo5PgG0\"],\n    # playlist_id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\", # alternatively\n    # channel_id=\"UC_9-kyTW8ZkZNDHQJ6FgpwQ\", # alternatively\n    lang=\"en\",  # Optional: specify preferred transcript language\n    limit=100   # Optional: limit for playlist/channel\n)\nprint(f\"Started transcript batch job: {transcript_batch_job.job_id}\")\n\n# Start a batch job to get video metadata for a playlist\nvideo_batch_job = supadata.youtube.video.batch(\n    playlist_id=\"PLlaN88a7y2_plecYoJxvRFTLHVbIVAOoc\",\n    limit=50\n)\nprint(f\"Started video metadata batch job: {video_batch_job.job_id}\")\n\n# Get the results of a batch job (poll until status is 'completed' or 'failed')\nbatch_results = supadata.youtube.batch.get_batch_results(job_id=transcript_batch_job.job_id)\nprint(f\"Job status: {batch_results.status}\")\nprint(f\"Stats: {batch_results.stats.succeeded}/{batch_results.stats.total} videos processed\")\nprint(f\"First result: {batch_results.results[0].video_id if batch_results.results else 'No results yet'}\")\n```\n\n### Web\n\n```python\n# Scrape web content\nweb_content = supadata.web.scrape(\"https://supadata.ai\")\nprint(f\"Page title: {web_content.name}\")\nprint(f\"Page content: {web_content.content}\")\n\n# Map website URLs\nsite_map = supadata.web.map(\"https://supadata.ai\")\nprint(f\"Found {len(site_map.urls)} URLs\")\n\n# Start a crawl job\ncrawl_job = supadata.web.crawl(\n    url=\"https://supadata.ai\",\n    limit=100  # Optional: limit the number of pages to crawl\n)\nprint(f\"Started crawl job: {crawl_job.job_id}\")\n\n# Get crawl results\n# This automatically handles pagination and returns all pages\ntry:\n    pages = supadata.web.get_crawl_results(job_id=crawl_job.job_id)\n    for page in pages:\n        print(f\"Crawled page: {page.url}\")\n        print(f\"Page title: {page.name}\")\n        print(f\"Content: {page.content}\")\nexcept SupadataError as e:\n    print(f\"Crawl job failed: {e}\")\n```\n\n## Error Handling\n\nThe SDK uses custom `SupadataError` exceptions that provide structured error information:\n\n```python\nfrom supadata.errors import SupadataError\n\ntry:\n    transcript = supadata.youtube.transcript(video_id=\"INVALID_ID\")\nexcept SupadataError as error:\n    print(f\"Error code: {error.error}\")\n    print(f\"Error message: {error.message}\")\n    print(f\"Error details: {error.details}\")\n    if error.documentation_url:\n        print(f\"Documentation: {error.documentation_url}\")\n```\n\n## API Reference\n\nSee the [Documentation](https://supadata.ai/documentation) for more details on all possible parameters and options.\n\n## License\n\nMIT\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "The official Python SDK for Supadata - scrape web and YouTube content with ease",
    "version": "1.3.2",
    "project_urls": {
        "documentation": "https://supadata.ai/documentation",
        "homepage": "https://supadata.ai",
        "repository": "https://github.com/supadata/py"
    },
    "split_keywords": [
        "ai",
        " api",
        " llm",
        " supadata",
        " transcripts",
        " web-scraping",
        " youtube"
    ],
    "urls": [
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "bf51cfb2181d6ad168bea142e560d5e41b6af55efb7a977c4e809811d388121c",
                "md5": "30fc19b5195785529ce92f7711e0478c",
                "sha256": "264147474c4676c382a0193e1b208e14d5ab77e11ae976dc48f01ce2e94c9a77"
            },
            "downloads": -1,
            "filename": "supadata-1.3.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "30fc19b5195785529ce92f7711e0478c",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.7",
            "size": 14575,
            "upload_time": "2025-08-14T07:52:38",
            "upload_time_iso_8601": "2025-08-14T07:52:38.876902Z",
            "url": "https://files.pythonhosted.org/packages/bf/51/cfb2181d6ad168bea142e560d5e41b6af55efb7a977c4e809811d388121c/supadata-1.3.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "e14cf1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3",
                "md5": "6593ed0b90b76182ede52a3484519c84",
                "sha256": "41478d315381df0b966370f8e0613d3e610d86666b253ec1fa4f5a3ee7a6c4cc"
            },
            "downloads": -1,
            "filename": "supadata-1.3.2.tar.gz",
            "has_sig": false,
            "md5_digest": "6593ed0b90b76182ede52a3484519c84",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.7",
            "size": 16869,
            "upload_time": "2025-08-14T07:52:39",
            "upload_time_iso_8601": "2025-08-14T07:52:39.832279Z",
            "url": "https://files.pythonhosted.org/packages/e1/4c/f1d32b3daab9facd53f05c3c44521c9d023ae2dd882217c3043b1e727fd3/supadata-1.3.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-08-14 07:52:39",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "supadata",
    "github_project": "py",
    "github_not_found": true,
    "lcname": "supadata"
}
        
Elapsed time: 1.90165s