Whisper-LID
===============
This is a spoken language identification system that is based on the Whisper
model. The system uses the Whisper-based algorithm to identify spoken languages
or non-speech event. The Section 2.3 of the paper about Whisper
(https://arxiv.org/abs/2212.04356) states that language tags or non-speech
tags need to be predicted after the `<|startoftranscript|>` special token.
Based on this information, the system estimates a probability distribution
for the next token after the `<|startoftranscript|>` and selects the token
with the highest probability as the final spoken language prediction. Since
the predicted token can be either a language tag or a non-speech tag, the
system combines the features of a spoken language identifier and a voice
activity detector.
Raw data
{
"_id": null,
"home_page": "https://github.com/bond005/whisper-lid",
"name": "whisper-lid",
"maintainer": null,
"docs_url": null,
"requires_python": null,
"maintainer_email": null,
"keywords": "whisper, LID, spoken-language, language-identification, spoken-language-identification",
"author": "Ivan Bondarenko",
"author_email": "bond005@yandex.ru",
"download_url": "https://files.pythonhosted.org/packages/ea/d2/bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828/whisper_lid-0.0.1.tar.gz",
"platform": null,
"description": "\nWhisper-LID\n===============\n\nThis is a spoken language identification system that is based on the Whisper\nmodel. The system uses the Whisper-based algorithm to identify spoken languages\nor non-speech event. The Section 2.3 of the paper about Whisper\n(https://arxiv.org/abs/2212.04356) states that language tags or non-speech\ntags need to be predicted after the `<|startoftranscript|>` special token.\nBased on this information, the system estimates a probability distribution\nfor the next token after the `<|startoftranscript|>` and selects the token\nwith the highest probability as the final spoken language prediction. Since\nthe predicted token can be either a language tag or a non-speech tag, the\nsystem combines the features of a spoken language identifier and a voice\nactivity detector.\n",
"bugtrack_url": null,
"license": "Apache License Version 2.0",
"summary": "Spoken Language IDentification (LID) using multilingual Whisper model",
"version": "0.0.1",
"project_urls": {
"Homepage": "https://github.com/bond005/whisper-lid"
},
"split_keywords": [
"whisper",
" lid",
" spoken-language",
" language-identification",
" spoken-language-identification"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "abc1fbc6b30a5b758b4e022fda38f7436899a722df4dcb3f9a8462d9849b870d",
"md5": "690a1c49e08c99166679f9e69ddbe125",
"sha256": "15f75214411bdd75f03a38e196e545ae7365c1c0a7572b0d971b5b25f85549d7"
},
"downloads": -1,
"filename": "whisper_lid-0.0.1-py3-none-any.whl",
"has_sig": false,
"md5_digest": "690a1c49e08c99166679f9e69ddbe125",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 8352,
"upload_time": "2025-08-04T14:19:04",
"upload_time_iso_8601": "2025-08-04T14:19:04.788005Z",
"url": "https://files.pythonhosted.org/packages/ab/c1/fbc6b30a5b758b4e022fda38f7436899a722df4dcb3f9a8462d9849b870d/whisper_lid-0.0.1-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "ead2bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828",
"md5": "0fbcb64f63859281d6462c919476649c",
"sha256": "ea3f3c4d787f28c2ccff5d5677d49774f0293a0cc9628c6f68b3695df466d64c"
},
"downloads": -1,
"filename": "whisper_lid-0.0.1.tar.gz",
"has_sig": false,
"md5_digest": "0fbcb64f63859281d6462c919476649c",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 96597,
"upload_time": "2025-08-04T14:19:07",
"upload_time_iso_8601": "2025-08-04T14:19:07.624619Z",
"url": "https://files.pythonhosted.org/packages/ea/d2/bc5257f7f97f512810b13f7497df0a12cbaae625a6e636bcf44c6b221828/whisper_lid-0.0.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-04 14:19:07",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bond005",
"github_project": "whisper-lid",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "librosa",
"specs": [
[
">=",
"0.10.0"
]
]
},
{
"name": "numpy",
"specs": []
},
{
"name": "scipy",
"specs": []
},
{
"name": "sentencepiece",
"specs": []
},
{
"name": "soundfile",
"specs": [
[
">=",
"0.11.0"
]
]
},
{
"name": "torch",
"specs": [
[
">=",
"2.0.1"
]
]
},
{
"name": "torchaudio",
"specs": [
[
">=",
"2.0.1"
]
]
},
{
"name": "transformers",
"specs": [
[
">=",
"4.38.1"
]
]
},
{
"name": "datasets",
"specs": [
[
"<",
"4.0"
]
]
}
],
"lcname": "whisper-lid"
}