whisper-lid


Namewhisper-lid JSON
Version 0.0.1 PyPI version JSON
download
home_pagehttps://github.com/bond005/whisper-lid
SummarySpoken Language IDentification (LID) using multilingual Whisper model
upload_time2025-08-04 14:19:07
maintainerNone
docs_urlNone
authorIvan Bondarenko
requires_pythonNone
licenseApache License Version 2.0
keywords whisper lid spoken-language language-identification spoken-language-identification
VCS
bugtrack_url
requirements librosa numpy scipy sentencepiece soundfile torch torchaudio transformers datasets
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
Whisper-LID
===============

This is a spoken language identification system that is based on the Whisper
model. The system uses the Whisper-based algorithm to identify spoken languages
or non-speech event. The Section 2.3 of the paper about Whisper
(https://arxiv.org/abs/2212.04356) states that language tags or non-speech
tags need to be predicted after the `<|startoftranscript|>` special token.
Based on this information, the system estimates a probability distribution
for the next token after the `<|startoftranscript|>` and selects the token
with the highest probability as the final spoken language prediction. Since
the predicted token can be either a language tag or a non-speech tag, the
system combines the features of a spoken language identifier and a voice
activity detector.

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/bond005/whisper-lid",
    "name": "whisper-lid",
    "maintainer": null,
    "docs_url": null,
    "requires_python": null,
    "maintainer_email": null,
    "keywords": "whisper, LID, spoken-language, language-identification, spoken-language-identification",
    "author": "Ivan Bondarenko",
    "author_email": "bond005@yandex.ru",
    "download_url": "https://files.pythonhosted.org/packages/ea/d2/bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828/whisper_lid-0.0.1.tar.gz",
    "platform": null,
    "description": "\nWhisper-LID\n===============\n\nThis is a spoken language identification system that is based on the Whisper\nmodel. The system uses the Whisper-based algorithm to identify spoken languages\nor non-speech event. The Section 2.3 of the paper about Whisper\n(https://arxiv.org/abs/2212.04356) states that language tags or non-speech\ntags need to be predicted after the `<|startoftranscript|>` special token.\nBased on this information, the system estimates a probability distribution\nfor the next token after the `<|startoftranscript|>` and selects the token\nwith the highest probability as the final spoken language prediction. Since\nthe predicted token can be either a language tag or a non-speech tag, the\nsystem combines the features of a spoken language identifier and a voice\nactivity detector.\n",
    "bugtrack_url": null,
    "license": "Apache License Version 2.0",
    "summary": "Spoken Language IDentification (LID) using multilingual Whisper model",
    "version": "0.0.1",
    "project_urls": {
        "Homepage": "https://github.com/bond005/whisper-lid"
    },
    "split_keywords": [
        "whisper",
        " lid",
        " spoken-language",
        " language-identification",
        " spoken-language-identification"
    ],
    "urls": [
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "abc1fbc6b30a5b758b4e022fda38f7436899a722df4dcb3f9a8462d9849b870d",
                "md5": "690a1c49e08c99166679f9e69ddbe125",
                "sha256": "15f75214411bdd75f03a38e196e545ae7365c1c0a7572b0d971b5b25f85549d7"
            },
            "downloads": -1,
            "filename": "whisper_lid-0.0.1-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "690a1c49e08c99166679f9e69ddbe125",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 8352,
            "upload_time": "2025-08-04T14:19:04",
            "upload_time_iso_8601": "2025-08-04T14:19:04.788005Z",
            "url": "https://files.pythonhosted.org/packages/ab/c1/fbc6b30a5b758b4e022fda38f7436899a722df4dcb3f9a8462d9849b870d/whisper_lid-0.0.1-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "ead2bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828",
                "md5": "0fbcb64f63859281d6462c919476649c",
                "sha256": "ea3f3c4d787f28c2ccff5d5677d49774f0293a0cc9628c6f68b3695df466d64c"
            },
            "downloads": -1,
            "filename": "whisper_lid-0.0.1.tar.gz",
            "has_sig": false,
            "md5_digest": "0fbcb64f63859281d6462c919476649c",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 96597,
            "upload_time": "2025-08-04T14:19:07",
            "upload_time_iso_8601": "2025-08-04T14:19:07.624619Z",
            "url": "https://files.pythonhosted.org/packages/ea/d2/bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828/whisper_lid-0.0.1.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-08-04 14:19:07",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "bond005",
    "github_project": "whisper-lid",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "librosa",
            "specs": [
                [
                    ">=",
                    "0.10.0"
                ]
            ]
        },
        {
            "name": "numpy",
            "specs": []
        },
        {
            "name": "scipy",
            "specs": []
        },
        {
            "name": "sentencepiece",
            "specs": []
        },
        {
            "name": "soundfile",
            "specs": [
                [
                    ">=",
                    "0.11.0"
                ]
            ]
        },
        {
            "name": "torch",
            "specs": [
                [
                    ">=",
                    "2.0.1"
                ]
            ]
        },
        {
            "name": "torchaudio",
            "specs": [
                [
                    ">=",
                    "2.0.1"
                ]
            ]
        },
        {
            "name": "transformers",
            "specs": [
                [
                    ">=",
                    "4.38.1"
                ]
            ]
        },
        {
            "name": "datasets",
            "specs": [
                [
                    "<",
                    "4.0"
                ]
            ]
        }
    ],
    "lcname": "whisper-lid"
}
        
Elapsed time: 1.75983s