# diffsptk
*diffsptk* is a differentiable version of [SPTK](https://github.com/sp-nitech/SPTK) based on the PyTorch framework.
[data:image/s3,"s3://crabby-images/afad3/afad38562de2c63a29fd1cc72b0ef4afde270cbe" alt="Latest Manual"](https://sp-nitech.github.io/diffsptk/latest/)
[data:image/s3,"s3://crabby-images/a6bc8/a6bc899dab812ce8dc371f4e0abca12bc41d877c" alt="Stable Manual"](https://sp-nitech.github.io/diffsptk/2.5.0/)
[data:image/s3,"s3://crabby-images/e63dd/e63dd299c3caffef3607fb8ecf4dca4f1a97ab2b" alt="Downloads"](https://pepy.tech/project/diffsptk)
[data:image/s3,"s3://crabby-images/b498b/b498b170bf751fc409bea8cf47446383240f5b85" alt="Python Version"](https://pypi.python.org/pypi/diffsptk)
[data:image/s3,"s3://crabby-images/42904/4290495d6c8b93032e4f7eaeb68eba0eb164115e" alt="PyTorch Version"](https://pypi.python.org/pypi/diffsptk)
[data:image/s3,"s3://crabby-images/76d34/76d34e7563146a195e862263b171d07c645600d0" alt="PyPI Version"](https://pypi.python.org/pypi/diffsptk)
[data:image/s3,"s3://crabby-images/9977a/9977a31393c2ddd100a7bba449d0fb5f48ce5af5" alt="Codecov"](https://app.codecov.io/gh/sp-nitech/diffsptk)
[data:image/s3,"s3://crabby-images/ccfa5/ccfa5bd2313d4956828b3d603bb7ff7c1dda49a7" alt="License"](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
[data:image/s3,"s3://crabby-images/f3c4c/f3c4cb95d176b358ab5bfaf88963066aa0d5a9b6" alt="GitHub Actions"](https://github.com/sp-nitech/diffsptk/actions)
[data:image/s3,"s3://crabby-images/6a099/6a099727a52cf617121ab5d23cc43109ed9fa550" alt="Ruff"](https://github.com/astral-sh/ruff)
## Requirements
- Python 3.10+
- PyTorch 2.3.1+
## Documentation
- See [this page](https://sp-nitech.github.io/diffsptk/latest/) for the reference manual.
- Our [paper](https://www.isca-speech.org/archive/ssw_2023/yoshimura23_ssw.html) is available on the ISCA Archive.
## Installation
The latest stable release can be installed through PyPI by running
```sh
pip install diffsptk
```
The development release can be installed from the master branch:
```sh
pip install git+https://github.com/sp-nitech/diffsptk.git@master
```
## Examples
### Mel-cepstral analysis and synthesis
```python
import diffsptk
fl = 400 # Frame length.
fp = 80 # Frame period.
n_fft = 512 # FFT length.
M = 24 # Mel-cepstrum dimensions.
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Compute STFT amplitude of x.
stft = diffsptk.STFT(frame_length=fl, frame_period=fp, fft_length=n_fft)
X = stft(x)
# Estimate mel-cepstrum of x.
alpha = diffsptk.get_alpha(sr)
mcep = diffsptk.MelCepstralAnalysis(
cep_order=M,
fft_length=n_fft,
alpha=alpha,
n_iter=10,
)
mc = mcep(X)
# Reconstruct x.
mlsa = diffsptk.MLSA(filter_order=M, frame_period=fp, alpha=alpha, taylor_order=20)
x_hat = mlsa(mlsa(x, -mc), mc)
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
# Extract pitch of x.
pitch = diffsptk.Pitch(
frame_period=fp,
sample_rate=sr,
f_min=80,
f_max=180,
voicing_threshold=0.4,
out_format="pitch",
)
p = pitch(x)
# Generate excitation signal.
excite = diffsptk.ExcitationGeneration(frame_period=fp)
e = excite(p)
n = diffsptk.nrand(x.size(0) - 1)
# Synthesize waveform.
x_voiced = mlsa(e, mc)
x_unvoiced = mlsa(n, mc)
# Output analysis-synthesis result.
diffsptk.write("voiced.wav", x_voiced, sr)
diffsptk.write("unvoiced.wav", x_unvoiced, sr)
```
### WORLD analysis and mel-cepstral synthesis
```python
import diffsptk
fp = 80 # Frame period.
n_fft = 1024 # FFT length.
M = 24 # Mel-cepstrum dimensions.
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Extract F0 of x, or prepare well-estimated F0.
pitch = diffsptk.Pitch(
frame_period=fp,
sample_rate=sr,
f_min=80,
f_max=180,
voicing_threshold=0.4,
out_format="f0",
)
f0 = pitch(x)
# Extract aperiodicity of x by D4C.
ap = diffsptk.Aperiodicity(
frame_period=fp,
sample_rate=sr,
fft_length=n_fft,
algorithm="d4c",
out_format="a",
)
A = ap(x, f0)
# Extract spectral envelope of x by CheapTrick.
pitch_spec = diffsptk.PitchAdaptiveSpectralAnalysis(
frame_period=fp,
sample_rate=sr,
fft_length=n_fft,
)
H = pitch_spec(x, f0)
# Estimate mel-cepstrum of x.
alpha = diffsptk.get_alpha(sr)
mcep = diffsptk.MelCepstralAnalysis(cep_order=M, fft_length=n_fft, alpha=alpha)
mc_a = mcep(A)
mc_h = mcep(H)
# Generate excitation signals.
excite = diffsptk.ExcitationGeneration(frame_period=fp, unvoiced_region="zeros")
p = (sr / f0).nan_to_num(posinf=0)
pulse = excite(p)
noise = diffsptk.nrand(len(pulse) - 1)
# Make mixed excitation signal and reconstruct x.
mlsa = diffsptk.MLSA(filter_order=M, frame_period=fp, alpha=alpha, taylor_order=20)
e_p = pulse - mlsa(pulse, mc_a)
e_a = mlsa(noise, mc_a)
e = e_p + e_a
x_hat = mlsa(e, mc_h)
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
```
### LPC analysis and synthesis
```python
import diffsptk
fl = 400 # Frame length.
fp = 80 # Frame period.
M = 24 # LPC dimensions.
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Estimate LPC of x.
frame = diffsptk.Frame(frame_length=fl, frame_period=fp)
window = diffsptk.Window(in_length=fl)
lpc = diffsptk.LPC(frame_length=fl, lpc_order=M, eps=1e-6)
a = lpc(window(frame(x)))
# Convert to inverse filter coefficients.
norm0 = diffsptk.AllPoleToAllZeroDigitalFilterCoefficients(filter_order=M)
b = norm0(a)
# Reconstruct x.
zerodf = diffsptk.AllZeroDigitalFilter(filter_order=M, frame_period=fp)
poledf = diffsptk.AllPoleDigitalFilter(filter_order=M, frame_period=fp)
x_hat = poledf(zerodf(x, b), a)
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
### Mel-spectrogram, MFCC, and PLP extraction
```python
import diffsptk
fl = 400 # Frame length
fp = 80 # Frame period
n_fft = 512 # FFT length
n_channel = 80 # Number of channels
M = 12 # MFCC/PLP dimensions
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Compute STFT amplitude of x.
stft = diffsptk.STFT(frame_length=fl, frame_period=fp, fft_length=n_fft)
X = stft(x)
# Extract log mel-spectrogram.
fbank = diffsptk.MelFilterBankAnalysis(
n_channel=n_channel,
fft_length=n_fft,
sample_rate=sr,
)
Y = fbank(X)
print(Y.shape)
# Extract MFCC.
mfcc = diffsptk.MFCC(
mfcc_order=M,
n_channel=n_channel,
fft_length=n_fft,
sample_rate=sr,
)
Y = mfcc(X)
print(Y.shape)
# Extract PLP.
plp = diffsptk.PLP(
plp_order=M,
n_channel=n_channel,
fft_length=n_fft,
sample_rate=sr,
)
Y = plp(X)
print(Y.shape)
```
### Subband decomposition
```python
import diffsptk
K = 4 # Number of subbands.
M = 40 # Order of filter.
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Decompose x.
pqmf = diffsptk.PQMF(K, M)
decimate = diffsptk.Decimation(K)
y = decimate(pqmf(x))
# Reconstruct x.
interpolate = diffsptk.Interpolation(K)
ipqmf = diffsptk.IPQMF(K, M)
x_hat = ipqmf(interpolate(K * y)).reshape(-1)
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
### Gammatone filter bank analysis and synthesis
```python
import diffsptk
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Decompose x.
gammatone = diffsptk.GammatoneFilterBankAnalysis(sr)
y = gammatone(x)
# Reconstruct x.
igammatone = diffsptk.GammatoneFilterBankSynthesis(sr)
x_hat = igammatone(y).reshape(-1)
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
### Constant-Q transform
```python
import diffsptk
import librosa # This is to get sample audio.
fp = 128 # Frame period.
K = 252 # Number of CQ-bins.
B = 36 # Number of bins per octave.
# Read waveform.
x, sr = diffsptk.read(librosa.ex("trumpet"))
# Transform x.
cqt = diffsptk.CQT(fp, sr, n_bin=K, n_bin_per_octave=B)
c = cqt(x)
# Reconstruct x.
icqt = diffsptk.ICQT(fp, sr, n_bin=K, n_bin_per_octave=B)
x_hat = icqt(c, out_length=x.size(0))
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
### Modified discrete cosine transform
```python
import diffsptk
fl = 512 # Frame length.
# Read waveform.
x, sr = diffsptk.read("assets/data.wav")
# Transform x.
mdct = diffsptk.MDCT(fl)
c = mdct(x)
# Reconstruct x.
imdct = diffpstk.IMDCT(fl)
x_hat = imdct(c, out_length=x.size(0))
# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
### Vector quantization
```python
import diffsptk
K = 2 # Codebook size.
M = 4 # Order of vector.
# Prepare input.
x = diffsptk.nrand(M)
# Quantize x.
vq = diffsptk.VectorQuantization(M, K)
x_hat, indices, commitment_loss = vq(x)
# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```
## License
This software is released under the Apache License 2.0.
## Citation
```bibtex
@InProceedings{sp-nitech2023sptk,
author = {Takenori Yoshimura and Takato Fujimoto and Keiichiro Oura and Keiichi Tokuda},
title = {{SPTK4}: An open-source software toolkit for speech signal processing},
booktitle = {12th ISCA Speech Synthesis Workshop (SSW 2023)},
pages = {211--217},
year = {2023},
}
```
Raw data
{
"_id": null,
"home_page": null,
"name": "diffsptk",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.10",
"maintainer_email": "Takenori Yoshimura <takenori@sp.nitech.ac.jp>",
"keywords": "dsp, pytorch, signal processing, sptk",
"author": "SPTK Working Group",
"author_email": null,
"download_url": "https://files.pythonhosted.org/packages/0d/f4/4d8972f29f9c398231811631f52c07943afe49cd4abb47393b6939bccf3b/diffsptk-2.5.0.tar.gz",
"platform": null,
"description": "# diffsptk\n\n*diffsptk* is a differentiable version of [SPTK](https://github.com/sp-nitech/SPTK) based on the PyTorch framework.\n\n[data:image/s3,"s3://crabby-images/afad3/afad38562de2c63a29fd1cc72b0ef4afde270cbe" alt="Latest Manual"](https://sp-nitech.github.io/diffsptk/latest/)\n[data:image/s3,"s3://crabby-images/a6bc8/a6bc899dab812ce8dc371f4e0abca12bc41d877c" alt="Stable Manual"](https://sp-nitech.github.io/diffsptk/2.5.0/)\n[data:image/s3,"s3://crabby-images/e63dd/e63dd299c3caffef3607fb8ecf4dca4f1a97ab2b" alt="Downloads"](https://pepy.tech/project/diffsptk)\n[data:image/s3,"s3://crabby-images/b498b/b498b170bf751fc409bea8cf47446383240f5b85" alt="Python Version"](https://pypi.python.org/pypi/diffsptk)\n[data:image/s3,"s3://crabby-images/42904/4290495d6c8b93032e4f7eaeb68eba0eb164115e" alt="PyTorch Version"](https://pypi.python.org/pypi/diffsptk)\n[data:image/s3,"s3://crabby-images/76d34/76d34e7563146a195e862263b171d07c645600d0" alt="PyPI Version"](https://pypi.python.org/pypi/diffsptk)\n[data:image/s3,"s3://crabby-images/9977a/9977a31393c2ddd100a7bba449d0fb5f48ce5af5" alt="Codecov"](https://app.codecov.io/gh/sp-nitech/diffsptk)\n[data:image/s3,"s3://crabby-images/ccfa5/ccfa5bd2313d4956828b3d603bb7ff7c1dda49a7" alt="License"](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)\n[data:image/s3,"s3://crabby-images/f3c4c/f3c4cb95d176b358ab5bfaf88963066aa0d5a9b6" alt="GitHub Actions"](https://github.com/sp-nitech/diffsptk/actions)\n[data:image/s3,"s3://crabby-images/6a099/6a099727a52cf617121ab5d23cc43109ed9fa550" alt="Ruff"](https://github.com/astral-sh/ruff)\n\n## Requirements\n\n- Python 3.10+\n- PyTorch 2.3.1+\n\n## Documentation\n\n- See [this page](https://sp-nitech.github.io/diffsptk/latest/) for the reference manual.\n- Our [paper](https://www.isca-speech.org/archive/ssw_2023/yoshimura23_ssw.html) is available on the ISCA Archive.\n\n## Installation\n\nThe latest stable release can be installed through PyPI by running\n\n```sh\npip install diffsptk\n```\n\nThe development release can be installed from the master branch:\n\n```sh\npip install git+https://github.com/sp-nitech/diffsptk.git@master\n```\n\n## Examples\n\n### Mel-cepstral analysis and synthesis\n\n```python\nimport diffsptk\n\nfl = 400 # Frame length.\nfp = 80 # Frame period.\nn_fft = 512 # FFT length.\nM = 24 # Mel-cepstrum dimensions.\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Compute STFT amplitude of x.\nstft = diffsptk.STFT(frame_length=fl, frame_period=fp, fft_length=n_fft)\nX = stft(x)\n\n# Estimate mel-cepstrum of x.\nalpha = diffsptk.get_alpha(sr)\nmcep = diffsptk.MelCepstralAnalysis(\n cep_order=M,\n fft_length=n_fft,\n alpha=alpha,\n n_iter=10,\n)\nmc = mcep(X)\n\n# Reconstruct x.\nmlsa = diffsptk.MLSA(filter_order=M, frame_period=fp, alpha=alpha, taylor_order=20)\nx_hat = mlsa(mlsa(x, -mc), mc)\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n\n# Extract pitch of x.\npitch = diffsptk.Pitch(\n frame_period=fp,\n sample_rate=sr,\n f_min=80,\n f_max=180,\n voicing_threshold=0.4,\n out_format=\"pitch\",\n)\np = pitch(x)\n\n# Generate excitation signal.\nexcite = diffsptk.ExcitationGeneration(frame_period=fp)\ne = excite(p)\nn = diffsptk.nrand(x.size(0) - 1)\n\n# Synthesize waveform.\nx_voiced = mlsa(e, mc)\nx_unvoiced = mlsa(n, mc)\n\n# Output analysis-synthesis result.\ndiffsptk.write(\"voiced.wav\", x_voiced, sr)\ndiffsptk.write(\"unvoiced.wav\", x_unvoiced, sr)\n```\n\n### WORLD analysis and mel-cepstral synthesis\n\n```python\nimport diffsptk\n\nfp = 80 # Frame period.\nn_fft = 1024 # FFT length.\nM = 24 # Mel-cepstrum dimensions.\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Extract F0 of x, or prepare well-estimated F0.\npitch = diffsptk.Pitch(\n frame_period=fp,\n sample_rate=sr,\n f_min=80,\n f_max=180,\n voicing_threshold=0.4,\n out_format=\"f0\",\n)\nf0 = pitch(x)\n\n# Extract aperiodicity of x by D4C.\nap = diffsptk.Aperiodicity(\n frame_period=fp,\n sample_rate=sr,\n fft_length=n_fft,\n algorithm=\"d4c\",\n out_format=\"a\",\n)\nA = ap(x, f0)\n\n# Extract spectral envelope of x by CheapTrick.\npitch_spec = diffsptk.PitchAdaptiveSpectralAnalysis(\n frame_period=fp,\n sample_rate=sr,\n fft_length=n_fft,\n)\nH = pitch_spec(x, f0)\n\n# Estimate mel-cepstrum of x.\nalpha = diffsptk.get_alpha(sr)\nmcep = diffsptk.MelCepstralAnalysis(cep_order=M, fft_length=n_fft, alpha=alpha)\nmc_a = mcep(A)\nmc_h = mcep(H)\n\n# Generate excitation signals.\nexcite = diffsptk.ExcitationGeneration(frame_period=fp, unvoiced_region=\"zeros\")\np = (sr / f0).nan_to_num(posinf=0)\npulse = excite(p)\nnoise = diffsptk.nrand(len(pulse) - 1)\n\n# Make mixed excitation signal and reconstruct x.\nmlsa = diffsptk.MLSA(filter_order=M, frame_period=fp, alpha=alpha, taylor_order=20)\ne_p = pulse - mlsa(pulse, mc_a)\ne_a = mlsa(noise, mc_a)\ne = e_p + e_a\nx_hat = mlsa(e, mc_h)\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n```\n\n### LPC analysis and synthesis\n\n```python\nimport diffsptk\n\nfl = 400 # Frame length.\nfp = 80 # Frame period.\nM = 24 # LPC dimensions.\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Estimate LPC of x.\nframe = diffsptk.Frame(frame_length=fl, frame_period=fp)\nwindow = diffsptk.Window(in_length=fl)\nlpc = diffsptk.LPC(frame_length=fl, lpc_order=M, eps=1e-6)\na = lpc(window(frame(x)))\n\n# Convert to inverse filter coefficients.\nnorm0 = diffsptk.AllPoleToAllZeroDigitalFilterCoefficients(filter_order=M)\nb = norm0(a)\n\n# Reconstruct x.\nzerodf = diffsptk.AllZeroDigitalFilter(filter_order=M, frame_period=fp)\npoledf = diffsptk.AllPoleDigitalFilter(filter_order=M, frame_period=fp)\nx_hat = poledf(zerodf(x, b), a)\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n### Mel-spectrogram, MFCC, and PLP extraction\n\n```python\nimport diffsptk\n\nfl = 400 # Frame length\nfp = 80 # Frame period\nn_fft = 512 # FFT length\nn_channel = 80 # Number of channels\nM = 12 # MFCC/PLP dimensions\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Compute STFT amplitude of x.\nstft = diffsptk.STFT(frame_length=fl, frame_period=fp, fft_length=n_fft)\nX = stft(x)\n\n# Extract log mel-spectrogram.\nfbank = diffsptk.MelFilterBankAnalysis(\n n_channel=n_channel,\n fft_length=n_fft,\n sample_rate=sr,\n)\nY = fbank(X)\nprint(Y.shape)\n\n# Extract MFCC.\nmfcc = diffsptk.MFCC(\n mfcc_order=M,\n n_channel=n_channel,\n fft_length=n_fft,\n sample_rate=sr,\n)\nY = mfcc(X)\nprint(Y.shape)\n\n# Extract PLP.\nplp = diffsptk.PLP(\n plp_order=M,\n n_channel=n_channel,\n fft_length=n_fft,\n sample_rate=sr,\n)\nY = plp(X)\nprint(Y.shape)\n```\n\n### Subband decomposition\n\n```python\nimport diffsptk\n\nK = 4 # Number of subbands.\nM = 40 # Order of filter.\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Decompose x.\npqmf = diffsptk.PQMF(K, M)\ndecimate = diffsptk.Decimation(K)\ny = decimate(pqmf(x))\n\n# Reconstruct x.\ninterpolate = diffsptk.Interpolation(K)\nipqmf = diffsptk.IPQMF(K, M)\nx_hat = ipqmf(interpolate(K * y)).reshape(-1)\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n### Gammatone filter bank analysis and synthesis\n\n```python\nimport diffsptk\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Decompose x.\ngammatone = diffsptk.GammatoneFilterBankAnalysis(sr)\ny = gammatone(x)\n\n# Reconstruct x.\nigammatone = diffsptk.GammatoneFilterBankSynthesis(sr)\nx_hat = igammatone(y).reshape(-1)\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n### Constant-Q transform\n\n```python\nimport diffsptk\nimport librosa # This is to get sample audio.\n\nfp = 128 # Frame period.\nK = 252 # Number of CQ-bins.\nB = 36 # Number of bins per octave.\n\n# Read waveform.\nx, sr = diffsptk.read(librosa.ex(\"trumpet\"))\n\n# Transform x.\ncqt = diffsptk.CQT(fp, sr, n_bin=K, n_bin_per_octave=B)\nc = cqt(x)\n\n# Reconstruct x.\nicqt = diffsptk.ICQT(fp, sr, n_bin=K, n_bin_per_octave=B)\nx_hat = icqt(c, out_length=x.size(0))\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n### Modified discrete cosine transform\n\n```python\nimport diffsptk\n\nfl = 512 # Frame length.\n\n# Read waveform.\nx, sr = diffsptk.read(\"assets/data.wav\")\n\n# Transform x.\nmdct = diffsptk.MDCT(fl)\nc = mdct(x)\n\n# Reconstruct x.\nimdct = diffpstk.IMDCT(fl)\nx_hat = imdct(c, out_length=x.size(0))\n\n# Write reconstructed waveform.\ndiffsptk.write(\"reconst.wav\", x_hat, sr)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n### Vector quantization\n\n```python\nimport diffsptk\n\nK = 2 # Codebook size.\nM = 4 # Order of vector.\n\n# Prepare input.\nx = diffsptk.nrand(M)\n\n# Quantize x.\nvq = diffsptk.VectorQuantization(M, K)\nx_hat, indices, commitment_loss = vq(x)\n\n# Compute error.\nerror = (x_hat - x).abs().sum()\nprint(error)\n```\n\n## License\n\nThis software is released under the Apache License 2.0.\n\n## Citation\n\n```bibtex\n@InProceedings{sp-nitech2023sptk,\n author = {Takenori Yoshimura and Takato Fujimoto and Keiichiro Oura and Keiichi Tokuda},\n title = {{SPTK4}: An open-source software toolkit for speech signal processing},\n booktitle = {12th ISCA Speech Synthesis Workshop (SSW 2023)},\n pages = {211--217},\n year = {2023},\n}\n```\n",
"bugtrack_url": null,
"license": "Apache 2.0",
"summary": "Speech signal processing modules for machine learning",
"version": "2.5.0",
"project_urls": {
"Documentation": "https://sp-nitech.github.io/diffsptk/latest/",
"Homepage": "https://sp-tk.sourceforge.net/",
"Source": "https://github.com/sp-nitech/diffsptk"
},
"split_keywords": [
"dsp",
" pytorch",
" signal processing",
" sptk"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "4fe6ca4b6a3970a1bd74e720a4497837dbac5034b6a5ee74160dcc7b5e542ef3",
"md5": "99146b3cd95c52f4a867ca8c92a66bb0",
"sha256": "3b5988fc018bf9c23f1ba06dbb6c2f6042e88864b42be0abb45807d86e58ac2f"
},
"downloads": -1,
"filename": "diffsptk-2.5.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "99146b3cd95c52f4a867ca8c92a66bb0",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.10",
"size": 229295,
"upload_time": "2025-02-18T07:36:40",
"upload_time_iso_8601": "2025-02-18T07:36:40.665577Z",
"url": "https://files.pythonhosted.org/packages/4f/e6/ca4b6a3970a1bd74e720a4497837dbac5034b6a5ee74160dcc7b5e542ef3/diffsptk-2.5.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "0df44d8972f29f9c398231811631f52c07943afe49cd4abb47393b6939bccf3b",
"md5": "6970b03adc65631ed61a01b4e42814b8",
"sha256": "79e70f13b449bd0231fa8ff91411dff750fb3d7d704b4d78c04d326591fdfca1"
},
"downloads": -1,
"filename": "diffsptk-2.5.0.tar.gz",
"has_sig": false,
"md5_digest": "6970b03adc65631ed61a01b4e42814b8",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.10",
"size": 116749,
"upload_time": "2025-02-18T07:37:21",
"upload_time_iso_8601": "2025-02-18T07:37:21.022555Z",
"url": "https://files.pythonhosted.org/packages/0d/f4/4d8972f29f9c398231811631f52c07943afe49cd4abb47393b6939bccf3b/diffsptk-2.5.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-02-18 07:37:21",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "sp-nitech",
"github_project": "diffsptk",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"lcname": "diffsptk"
}