# Torchaudio-Forced-Aligner
## Install
``` bash
$ pip install torchfa
```
## Usage
``` python
from torchfa import TorchaudioForcedAligner
aligner = TorchaudioForcedAligner()
audio = "assets/clean_speech.wav"
transcript = "关服务高端产品仍处于供不应求的局面"
cut = aligner.align_audios(audio, transcript)
cut.trim_to_alignments("word").save_audios("./")
for alignment in cut.supervisions[0].alignment["word"]:
print(alignment)
```
```
AlignmentItem(symbol='关', start=0.02, duration=0.121, score=0.21)
AlignmentItem(symbol='服', start=0.241, duration=0.141, score=0.07)
AlignmentItem(symbol='务', start=0.502, duration=0.101, score=0.49)
AlignmentItem(symbol='高', start=0.724, duration=0.181, score=0.97)
AlignmentItem(symbol='端', start=0.945, duration=0.141, score=0.52)
AlignmentItem(symbol='产', start=1.126, duration=0.201, score=0.81)
AlignmentItem(symbol='品', start=1.367, duration=0.141, score=0.35)
AlignmentItem(symbol='仍', start=1.608, duration=0.201, score=0.89)
AlignmentItem(symbol='处', start=1.869, duration=0.121, score=0.72)
AlignmentItem(symbol='于', start=2.09, duration=0.06, score=0.96)
AlignmentItem(symbol='供', start=2.251, duration=0.161, score=0.95)
AlignmentItem(symbol='不', start=2.452, duration=0.06, score=0.69)
AlignmentItem(symbol='应', start=2.573, duration=0.161, score=0.63)
AlignmentItem(symbol='求', start=2.754, duration=0.141, score=0.95)
AlignmentItem(symbol='的', start=2.935, duration=0.08, score=0.99)
AlignmentItem(symbol='局', start=3.075, duration=0.101, score=0.98)
AlignmentItem(symbol='面', start=3.256, duration=0.221, score=0.94)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/pengzhendong/Torchaudio-Forced-Aligner",
"name": "torchfa",
"maintainer": null,
"docs_url": null,
"requires_python": null,
"maintainer_email": null,
"keywords": null,
"author": "Zhendong Peng",
"author_email": "pzd17@tsinghua.org.cn",
"download_url": null,
"platform": null,
"description": "# Torchaudio-Forced-Aligner\n\n## Install\n\n``` bash\n$ pip install torchfa\n```\n\n## Usage\n\n``` python\nfrom torchfa import TorchaudioForcedAligner\n\naligner = TorchaudioForcedAligner()\n\naudio = \"assets/clean_speech.wav\"\ntranscript = \"\u5173\u670d\u52a1\u9ad8\u7aef\u4ea7\u54c1\u4ecd\u5904\u4e8e\u4f9b\u4e0d\u5e94\u6c42\u7684\u5c40\u9762\"\ncut = aligner.align_audios(audio, transcript)\n\ncut.trim_to_alignments(\"word\").save_audios(\"./\")\nfor alignment in cut.supervisions[0].alignment[\"word\"]:\n print(alignment)\n```\n\n```\nAlignmentItem(symbol='\u5173', start=0.02, duration=0.121, score=0.21)\nAlignmentItem(symbol='\u670d', start=0.241, duration=0.141, score=0.07)\nAlignmentItem(symbol='\u52a1', start=0.502, duration=0.101, score=0.49)\nAlignmentItem(symbol='\u9ad8', start=0.724, duration=0.181, score=0.97)\nAlignmentItem(symbol='\u7aef', start=0.945, duration=0.141, score=0.52)\nAlignmentItem(symbol='\u4ea7', start=1.126, duration=0.201, score=0.81)\nAlignmentItem(symbol='\u54c1', start=1.367, duration=0.141, score=0.35)\nAlignmentItem(symbol='\u4ecd', start=1.608, duration=0.201, score=0.89)\nAlignmentItem(symbol='\u5904', start=1.869, duration=0.121, score=0.72)\nAlignmentItem(symbol='\u4e8e', start=2.09, duration=0.06, score=0.96)\nAlignmentItem(symbol='\u4f9b', start=2.251, duration=0.161, score=0.95)\nAlignmentItem(symbol='\u4e0d', start=2.452, duration=0.06, score=0.69)\nAlignmentItem(symbol='\u5e94', start=2.573, duration=0.161, score=0.63)\nAlignmentItem(symbol='\u6c42', start=2.754, duration=0.141, score=0.95)\nAlignmentItem(symbol='\u7684', start=2.935, duration=0.08, score=0.99)\nAlignmentItem(symbol='\u5c40', start=3.075, duration=0.101, score=0.98)\nAlignmentItem(symbol='\u9762', start=3.256, duration=0.221, score=0.94)\n```\n",
"bugtrack_url": null,
"license": null,
"summary": "Torchaudio Forced Aligner",
"version": "0.0.5",
"project_urls": {
"Homepage": "https://github.com/pengzhendong/Torchaudio-Forced-Aligner"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "5ae1bff9224b109074a4f63262caf620f208a358a26365fa816e8e52e481a7d6",
"md5": "59016c31a530231e6d732e7e5edce88d",
"sha256": "8cdaf1e19ea8cb5e7ae5818e636f06ba18a7e53b065f5f20accaa4c9d8138e62"
},
"downloads": -1,
"filename": "torchfa-0.0.5-py3-none-any.whl",
"has_sig": false,
"md5_digest": "59016c31a530231e6d732e7e5edce88d",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 6570,
"upload_time": "2024-12-25T10:56:22",
"upload_time_iso_8601": "2024-12-25T10:56:22.810516Z",
"url": "https://files.pythonhosted.org/packages/5a/e1/bff9224b109074a4f63262caf620f208a358a26365fa816e8e52e481a7d6/torchfa-0.0.5-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-12-25 10:56:22",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "pengzhendong",
"github_project": "Torchaudio-Forced-Aligner",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"requirements": [
{
"name": "g2p-mix",
"specs": []
},
{
"name": "lhotse",
"specs": []
},
{
"name": "torch",
"specs": []
},
{
"name": "torchaudio",
"specs": []
}
],
"lcname": "torchfa"
}