Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e759570f98 |
@@ -24,9 +24,7 @@ Download the code. The code is based on signal processing package in Python call
|
|||||||
Dependencies:
|
Dependencies:
|
||||||
Run these lines in a terminal to install everything necessary for feature extraction.
|
Run these lines in a terminal to install everything necessary for feature extraction.
|
||||||
```
|
```
|
||||||
sudo apt-get install python-numpy python-scipy python-nose python-pip
|
sudo apt-get install python3-numpy python3-scipy python3-nose
|
||||||
|
|
||||||
sudo pip install scikits.talkbox
|
|
||||||
```
|
```
|
||||||
Next for the installation of Torch for loading the models run this.
|
Next for the installation of Torch for loading the models run this.
|
||||||
```
|
```
|
||||||
@@ -37,30 +35,31 @@ cd ~/torch; bash install-deps;
|
|||||||
./install.sh
|
./install.sh
|
||||||
```
|
```
|
||||||
```
|
```
|
||||||
luarocks install rnn
|
git clone https://github.com/Element-Research/rnn.git old-rnn
|
||||||
|
cd old-rnn; luarocks make rocks/rnn-scm-1.rockspec
|
||||||
```
|
```
|
||||||
The Estimation model can be downloaded here and because of size constraints the Tracking model can be abtained by download from this link
|
The Estimation model can be downloaded here and because of size constraints the Tracking model can be obtained by download from this link:
|
||||||
[tracking_model.mat] (https://drive.google.com/open?id=0Bxkc5_D0JjpiZWx4eTU1d0hsVXc)
|
[tracking_model.mat](https://drive.google.com/open?id=0Bxkc5_D0JjpiZWx4eTU1d0hsVXc)
|
||||||
|
|
||||||
## How to use:
|
## How to use:
|
||||||
|
|
||||||
For vowel formant estimation, call the main script in a terminal with the following inputs: wav file, formant output filename, and the vowel begin and end times:
|
For vowel formant estimation, call the main script in a terminal with the following inputs: wav file, formant output filename, and the vowel begin and end times:
|
||||||
|
|
||||||
```
|
```
|
||||||
python formants.py data/Example.wav data/ExamplePredictions.csv --begin 1.2 --end 1.3
|
python3 formants.py data/Example.wav data/ExamplePredictions.csv --begin 1.2 --end 1.3
|
||||||
```
|
```
|
||||||
|
|
||||||
or the vowel begin and end times can be taken from a TextGrid file (here the name of the TextGrid is Example.TextGrid and the vowel is taken from a tier called "VOWEL"):
|
or the vowel begin and end times can be taken from a TextGrid file (here the name of the TextGrid is Example.TextGrid and the vowel is taken from a tier called "VOWEL"):
|
||||||
|
|
||||||
```
|
```
|
||||||
python formants.py data/Example.wav data/examplePredictions.csv --textgrid_filename data/Example.TextGrid \
|
python3 formants.py data/Example.wav data/examplePredictions.csv --textgrid_filename data/Example.TextGrid \
|
||||||
--textgrid_tier VOWEL
|
--textgrid_tier VOWEL
|
||||||
```
|
```
|
||||||
|
|
||||||
For formant tracking, just call the script with the wav file and output filename:
|
For formant tracking, just call the script with the wav file and output filename:
|
||||||
|
|
||||||
```
|
```
|
||||||
python formants.py data/Example.wav data/ExamplePredictions.csv
|
python3 formants.py data/Example.wav data/ExamplePredictions.csv
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+17
-15
@@ -9,9 +9,9 @@ from os.path import isfile, join
|
|||||||
import math
|
import math
|
||||||
from scipy.fftpack.realtransforms import dct
|
from scipy.fftpack.realtransforms import dct
|
||||||
from scipy.signal import lfilter, hamming
|
from scipy.signal import lfilter, hamming
|
||||||
from copy import deepcopy
|
|
||||||
from scipy.fftpack import fft, ifft
|
from scipy.fftpack import fft, ifft
|
||||||
from scikits.talkbox.linpred import lpc
|
#from scikits.talkbox.linpred import lpc # obsolete
|
||||||
|
from helpers.conch_lpc import lpc
|
||||||
import shutil
|
import shutil
|
||||||
from helpers.utilities import *
|
from helpers.utilities import *
|
||||||
|
|
||||||
@@ -88,9 +88,9 @@ def periodogram(x, nfft=None, fs=1):
|
|||||||
|
|
||||||
pxx = np.abs(fft(x, nfft)) ** 2
|
pxx = np.abs(fft(x, nfft)) ** 2
|
||||||
if nfft % 2 == 0:
|
if nfft % 2 == 0:
|
||||||
pn = nfft / 2 + 1
|
pn = nfft // 2 + 1
|
||||||
else:
|
else:
|
||||||
pn = (nfft + 1 )/ 2
|
pn = (nfft + 1) // 2
|
||||||
|
|
||||||
fgrid = np.linspace(0, fs * 0.5, pn)
|
fgrid = np.linspace(0, fs * 0.5, pn)
|
||||||
return pxx[:pn] / (n * fs), fgrid
|
return pxx[:pn] / (n * fs), fgrid
|
||||||
@@ -137,9 +137,9 @@ def arspec(x, order, nfft=None, fs=1):
|
|||||||
|
|
||||||
# This is not enough to deal correctly with even/odd size
|
# This is not enough to deal correctly with even/odd size
|
||||||
if nfft % 2 == 0:
|
if nfft % 2 == 0:
|
||||||
pn = nfft / 2 + 1
|
pn = nfft // 2 + 1
|
||||||
else:
|
else:
|
||||||
pn = (nfft + 1 )/ 2
|
pn = (nfft + 1) // 2
|
||||||
|
|
||||||
px = 1 / np.fft.fft(a, nfft)[:pn]
|
px = 1 / np.fft.fft(a, nfft)[:pn]
|
||||||
pxx = np.real(np.conj(px) * px)
|
pxx = np.real(np.conj(px) * px)
|
||||||
@@ -200,7 +200,6 @@ def preemp(input, p):
|
|||||||
|
|
||||||
|
|
||||||
def arspecs(input_wav,order,Atal=False):
|
def arspecs(input_wav,order,Atal=False):
|
||||||
epsilon = 0.0000000001
|
|
||||||
data = input_wav
|
data = input_wav
|
||||||
if Atal:
|
if Atal:
|
||||||
ar = atal(data, order, 30)
|
ar = atal(data, order, 30)
|
||||||
@@ -211,8 +210,10 @@ def arspecs(input_wav,order,Atal=False):
|
|||||||
for k, l in zip(ars[0], ars[1]):
|
for k, l in zip(ars[0], ars[1]):
|
||||||
ar.append(math.log(math.sqrt((k**2)+(l**2))))
|
ar.append(math.log(math.sqrt((k**2)+(l**2))))
|
||||||
for val in range(0,len(ar)):
|
for val in range(0,len(ar)):
|
||||||
if ar[val] == 0.0:
|
if ar[val] < 0.0:
|
||||||
ar[val] = deepcopy(epsilon)
|
ar[val] = np.nan
|
||||||
|
elif ar[val] == 0.0:
|
||||||
|
ar[val] = epsilon
|
||||||
mspec1 = np.log10(ar)
|
mspec1 = np.log10(ar)
|
||||||
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||||
ar = dct(mspec1, type=2, norm='ortho', axis=-1)
|
ar = dct(mspec1, type=2, norm='ortho', axis=-1)
|
||||||
@@ -221,10 +222,10 @@ def arspecs(input_wav,order,Atal=False):
|
|||||||
|
|
||||||
def specPS(input_wav,pitch):
|
def specPS(input_wav,pitch):
|
||||||
N = len(input_wav)
|
N = len(input_wav)
|
||||||
samps = N/pitch
|
samps = N // pitch
|
||||||
if samps == 0:
|
if samps == 0:
|
||||||
samps = 1
|
samps = 1
|
||||||
frames = N/samps
|
frames = N // samps
|
||||||
data = input_wav[0:frames]
|
data = input_wav[0:frames]
|
||||||
specs = periodogram(data,nfft=4096)
|
specs = periodogram(data,nfft=4096)
|
||||||
for i in range(1,int(samps)):
|
for i in range(1,int(samps)):
|
||||||
@@ -236,10 +237,11 @@ def specPS(input_wav,pitch):
|
|||||||
specs[0][s] /= float(samps)
|
specs[0][s] /= float(samps)
|
||||||
peri = []
|
peri = []
|
||||||
for k, l in zip(specs[0], specs[1]):
|
for k, l in zip(specs[0], specs[1]):
|
||||||
if k == 0 and l == 0:
|
m = math.sqrt((k ** 2) + (l ** 2))
|
||||||
peri.append(epsilon)
|
if m > 0: m = math.log(m)
|
||||||
else:
|
if m == 0: m = epsilon
|
||||||
peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
|
elif m < 0: m = np.nan
|
||||||
|
peri.append(m)
|
||||||
# Filter the spectrum through the triangle filterbank
|
# Filter the spectrum through the triangle filterbank
|
||||||
mspec = np.log10(peri)
|
mspec = np.log10(peri)
|
||||||
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||||
|
|||||||
+4
-4
@@ -9,19 +9,19 @@ import shutil
|
|||||||
|
|
||||||
def predict_from_times(wav_filename, preds_filename, begin, end):
|
def predict_from_times(wav_filename, preds_filename, begin, end):
|
||||||
tmp_features_filename = tempfile._get_default_tempdir() + "/" + next(tempfile._get_candidate_names()) + ".txt"
|
tmp_features_filename = tempfile._get_default_tempdir() + "/" + next(tempfile._get_candidate_names()) + ".txt"
|
||||||
print tmp_features_filename
|
print(tmp_features_filename)
|
||||||
|
|
||||||
if begin > 0.0 or end > 0.0:
|
if begin > 0.0 or end > 0.0:
|
||||||
features.create_features(wav_filename, tmp_features_filename, begin, end)
|
features.create_features(wav_filename, tmp_features_filename, begin, end)
|
||||||
easy_call("th load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
easy_call("luajit load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||||
else:
|
else:
|
||||||
features.create_features(wav_filename, tmp_features_filename)
|
features.create_features(wav_filename, tmp_features_filename)
|
||||||
easy_call("th load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
easy_call("luajit load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||||
|
|
||||||
|
|
||||||
def predict_from_textgrid(wav_filename, preds_filename, textgrid_filename, textgrid_tier):
|
def predict_from_textgrid(wav_filename, preds_filename, textgrid_filename, textgrid_tier):
|
||||||
|
|
||||||
print wav_filename
|
print(wav_filename)
|
||||||
|
|
||||||
if os.path.exists(preds_filename):
|
if os.path.exists(preds_filename):
|
||||||
os.remove(preds_filename)
|
os.remove(preds_filename)
|
||||||
|
|||||||
+2
-2
@@ -4,12 +4,12 @@ if [ $# -eq 2 ]
|
|||||||
then
|
then
|
||||||
tempfile=`mktemp -t txt`
|
tempfile=`mktemp -t txt`
|
||||||
python extract_features.py $1 $tempfile
|
python extract_features.py $1 $tempfile
|
||||||
th load_estimation_model.lua $tempfile $2
|
luajit load_estimation_model.lua $tempfile $2
|
||||||
elif [ $# -eq 4 ]
|
elif [ $# -eq 4 ]
|
||||||
then
|
then
|
||||||
tempfile=`mktemp -t txt`
|
tempfile=`mktemp -t txt`
|
||||||
python extract_features.py $1 $tempfile --begin $3 --end $4
|
python extract_features.py $1 $tempfile --begin $3 --end $4
|
||||||
th load_estimation_model.lua $tempfile $2
|
luajit load_estimation_model.lua $tempfile $2
|
||||||
else
|
else
|
||||||
echo "$0 wav_filename pred_csv_filename [begin_time end_time]"
|
echo "$0 wav_filename pred_csv_filename [begin_time end_time]"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -0,0 +1,286 @@
|
|||||||
|
# This file has been copied (with minor changes) from Michael
|
||||||
|
# McAuliffe's Conch project, to provide a compatible replacement
|
||||||
|
# implementation of the lpc() function from the obsolete Python-2-only
|
||||||
|
# scikits.talkbox library.
|
||||||
|
#
|
||||||
|
# Conch repository: https://github.com/mmcauliffe/Conch-sounds
|
||||||
|
# Source: https://github.com/mmcauliffe/Conch-sounds/blob/master/conch/analysis/formants/lpc.py
|
||||||
|
|
||||||
|
# Copyright (c) 2015 Michael McAuliffe
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
|
||||||
|
#import librosa
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
from scipy.signal import lfilter
|
||||||
|
|
||||||
|
from scipy.fftpack import fft, ifft
|
||||||
|
from scipy.signal import gaussian
|
||||||
|
|
||||||
|
#from ..helper import nextpow2
|
||||||
|
#from ..functions import BaseAnalysisFunction
|
||||||
|
|
||||||
|
# Source: https://github.com/mmcauliffe/Conch-sounds/blob/master/conch/analysis/helper.py
|
||||||
|
def nextpow2(x):
|
||||||
|
"""Return the first integer N such that 2**N >= abs(x)"""
|
||||||
|
return np.ceil(np.log2(np.abs(x)))
|
||||||
|
|
||||||
|
def lpc_ref(signal, order):
|
||||||
|
"""Compute the Linear Prediction Coefficients.
|
||||||
|
|
||||||
|
Return the order + 1 LPC coefficients for the signal. c = lpc(x, k) will
|
||||||
|
find the k+1 coefficients of a k order linear filter:
|
||||||
|
|
||||||
|
xp[n] = -c[1] * x[n-2] - ... - c[k-1] * x[n-k-1]
|
||||||
|
|
||||||
|
Such as the sum of the squared-error e[i] = xp[i] - x[i] is minimized.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
signal: array_like
|
||||||
|
input signal
|
||||||
|
order : int
|
||||||
|
LPC order (the output will have order + 1 items)
|
||||||
|
|
||||||
|
Notes
|
||||||
|
----
|
||||||
|
This is just for reference, as it is using the direct inversion of the
|
||||||
|
toeplitz matrix, which is really slow"""
|
||||||
|
if signal.ndim > 1:
|
||||||
|
raise ValueError("Array of rank > 1 not supported yet")
|
||||||
|
if order > signal.size:
|
||||||
|
raise ValueError("Input signal must have a lenght >= lpc order")
|
||||||
|
|
||||||
|
if order > 0:
|
||||||
|
p = order + 1
|
||||||
|
r = np.zeros(p, 'float32')
|
||||||
|
# Number of non zero values in autocorrelation one needs for p LPC
|
||||||
|
# coefficients
|
||||||
|
nx = np.min([p, signal.size])
|
||||||
|
x = np.correlate(signal, signal, 'full')
|
||||||
|
r[:nx] = x[signal.size - 1:signal.size + order]
|
||||||
|
phi = np.dot(sp.linalg.inv(sp.linalg.toeplitz(r[:-1])), -r[1:])
|
||||||
|
return np.concatenate(([1.], phi))
|
||||||
|
else:
|
||||||
|
return np.ones(1, dtype='float32')
|
||||||
|
|
||||||
|
|
||||||
|
# @jit
|
||||||
|
def levinson_1d(r, order):
|
||||||
|
"""Levinson-Durbin recursion, to efficiently solve symmetric linear systems
|
||||||
|
with toeplitz structure.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
---------
|
||||||
|
r : array-like
|
||||||
|
input array to invert (since the matrix is symmetric Toeplitz, the
|
||||||
|
corresponding pxp matrix is defined by p items only). Generally the
|
||||||
|
autocorrelation of the signal for linear prediction coefficients
|
||||||
|
estimation. The first item must be a non zero real.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
----
|
||||||
|
This implementation is in python, hence unsuitable for any serious
|
||||||
|
computation. Use it as educational and reference purpose only.
|
||||||
|
|
||||||
|
Levinson is a well-known algorithm to solve the Hermitian toeplitz
|
||||||
|
equation:
|
||||||
|
|
||||||
|
_ _
|
||||||
|
-R[1] = R[0] R[1] ... R[p-1] a[1]
|
||||||
|
: : : : * :
|
||||||
|
: : : _ * :
|
||||||
|
-R[p] = R[p-1] R[p-2] ... R[0] a[p]
|
||||||
|
_
|
||||||
|
with respect to a ( is the complex conjugate). Using the special symmetry
|
||||||
|
in the matrix, the inversion can be done in O(p^2) instead of O(p^3).
|
||||||
|
"""
|
||||||
|
r = np.atleast_1d(r)
|
||||||
|
if r.ndim > 1:
|
||||||
|
raise ValueError("Only rank 1 are supported for now.")
|
||||||
|
|
||||||
|
n = r.size
|
||||||
|
if n < 1:
|
||||||
|
raise ValueError("Cannot operate on empty array !")
|
||||||
|
elif order > n - 1:
|
||||||
|
raise ValueError("Order should be <= size-1")
|
||||||
|
|
||||||
|
if not np.isreal(r[0]):
|
||||||
|
raise ValueError("First item of input must be real.")
|
||||||
|
elif not np.isfinite(1 / r[0]):
|
||||||
|
raise ValueError("First item should be != 0")
|
||||||
|
|
||||||
|
# Estimated coefficients
|
||||||
|
a = np.empty(order + 1, 'float32')
|
||||||
|
# temporary array
|
||||||
|
t = np.empty(order + 1, 'float32')
|
||||||
|
# Reflection coefficients
|
||||||
|
k = np.empty(order, 'float32')
|
||||||
|
|
||||||
|
a[0] = 1.
|
||||||
|
e = r[0]
|
||||||
|
|
||||||
|
for i in range(1, order + 1):
|
||||||
|
acc = r[i]
|
||||||
|
for j in range(1, i):
|
||||||
|
acc += a[j] * r[i - j]
|
||||||
|
k[i - 1] = -acc / e
|
||||||
|
a[i] = k[i - 1]
|
||||||
|
|
||||||
|
for j in range(order):
|
||||||
|
t[j] = a[j]
|
||||||
|
|
||||||
|
for j in range(1, i):
|
||||||
|
a[j] += k[i - 1] * np.conj(t[i - j])
|
||||||
|
|
||||||
|
e *= 1 - k[i - 1] * np.conj(k[i - 1])
|
||||||
|
|
||||||
|
return a, e, k
|
||||||
|
|
||||||
|
|
||||||
|
# @jit
|
||||||
|
def _acorr_last_axis(x, nfft, maxlag):
|
||||||
|
a = np.real(ifft(np.abs(fft(x, n=nfft) ** 2)))
|
||||||
|
return a[..., :maxlag + 1] / x.shape[-1]
|
||||||
|
|
||||||
|
|
||||||
|
# @jit
|
||||||
|
def acorr_lpc(x, axis=-1):
|
||||||
|
"""Compute autocorrelation of x along the given axis.
|
||||||
|
|
||||||
|
This compute the biased autocorrelation estimator (divided by the size of
|
||||||
|
input signal)
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
The reason why we do not use acorr directly is for speed issue."""
|
||||||
|
if not np.isrealobj(x):
|
||||||
|
raise ValueError("Complex input not supported yet")
|
||||||
|
|
||||||
|
maxlag = x.shape[axis]
|
||||||
|
nfft = int(2 ** nextpow2(2 * maxlag - 1))
|
||||||
|
|
||||||
|
if axis != -1:
|
||||||
|
x = np.swapaxes(x, -1, axis)
|
||||||
|
a = _acorr_last_axis(x, nfft, maxlag)
|
||||||
|
if axis != -1:
|
||||||
|
a = np.swapaxes(a, -1, axis)
|
||||||
|
return a
|
||||||
|
|
||||||
|
|
||||||
|
# @jit
|
||||||
|
def lpc(signal, order, axis=-1):
|
||||||
|
"""Compute the Linear Prediction Coefficients.
|
||||||
|
|
||||||
|
Return the order + 1 LPC coefficients for the signal. c = lpc(x, k) will
|
||||||
|
find the k+1 coefficients of a k order linear filter:
|
||||||
|
|
||||||
|
xp[n] = -c[1] * x[n-2] - ... - c[k-1] * x[n-k-1]
|
||||||
|
|
||||||
|
Such as the sum of the squared-error e[i] = xp[i] - x[i] is minimized.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
signal: array_like
|
||||||
|
input signal
|
||||||
|
order : int
|
||||||
|
LPC order (the output will have order + 1 items)
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
a : array-like
|
||||||
|
the solution of the inversion.
|
||||||
|
e : array-like
|
||||||
|
the prediction error.
|
||||||
|
k : array-like
|
||||||
|
reflection coefficients.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
This uses Levinson-Durbin recursion for the autocorrelation matrix
|
||||||
|
inversion, and fft for the autocorrelation computation.
|
||||||
|
|
||||||
|
For small order, particularly if order << signal size, direct computation
|
||||||
|
of the autocorrelation is faster: use levinson and correlate in this case."""
|
||||||
|
n = signal.shape[axis]
|
||||||
|
if order > n:
|
||||||
|
raise ValueError("Input signal must have length >= order")
|
||||||
|
|
||||||
|
r = acorr_lpc(signal, axis)
|
||||||
|
return levinson_1d(r, order)
|
||||||
|
|
||||||
|
|
||||||
|
def process_frame(X, window, num_formants, new_sr):
|
||||||
|
X = X * window
|
||||||
|
A, e, k = lpc(X, num_formants * 2)
|
||||||
|
|
||||||
|
rts = np.roots(A)
|
||||||
|
rts = rts[np.where(np.imag(rts) >= 0)]
|
||||||
|
angz = np.arctan2(np.imag(rts), np.real(rts))
|
||||||
|
frqs = angz * (new_sr / (2 * np.pi))
|
||||||
|
frq_inds = np.argsort(frqs)
|
||||||
|
frqs = frqs[frq_inds]
|
||||||
|
bw = -1 / 2 * (new_sr / (2 * np.pi)) * np.log(np.abs(rts[frq_inds]))
|
||||||
|
return frqs, bw
|
||||||
|
|
||||||
|
|
||||||
|
def lpc_formants(signal, sr, num_formants, max_freq, time_step,
|
||||||
|
win_len, window_shape='gaussian'):
|
||||||
|
output = {}
|
||||||
|
new_sr = 2 * max_freq
|
||||||
|
alpha = np.exp(-2 * np.pi * 50 * (1 / new_sr))
|
||||||
|
proc = lfilter([1., -alpha], 1, signal)
|
||||||
|
if sr > new_sr:
|
||||||
|
proc = librosa.resample(proc, sr, new_sr)
|
||||||
|
nperseg = int(win_len * new_sr)
|
||||||
|
nperstep = int(time_step * new_sr)
|
||||||
|
if window_shape == 'gaussian':
|
||||||
|
window = gaussian(nperseg + 2, 0.45 * (nperseg - 1) / 2)[1:nperseg + 1]
|
||||||
|
else:
|
||||||
|
window = np.hanning(nperseg + 2)[1:nperseg + 1]
|
||||||
|
indices = np.arange(int(nperseg / 2), proc.shape[0] - int(nperseg / 2) + 1, nperstep)
|
||||||
|
num_frames = len(indices)
|
||||||
|
for i in range(num_frames):
|
||||||
|
if nperseg % 2 != 0:
|
||||||
|
X = proc[indices[i] - int(nperseg / 2):indices[i] + int(nperseg / 2) + 1]
|
||||||
|
else:
|
||||||
|
X = proc[indices[i] - int(nperseg / 2):indices[i] + int(nperseg / 2)]
|
||||||
|
frqs, bw = process_frame(X, window, num_formants, new_sr)
|
||||||
|
formants = []
|
||||||
|
for j, f in enumerate(frqs):
|
||||||
|
if f < 50:
|
||||||
|
continue
|
||||||
|
if f > max_freq - 50:
|
||||||
|
continue
|
||||||
|
formants.append((np.asscalar(f), np.asscalar(bw[j])))
|
||||||
|
missing = num_formants - len(formants)
|
||||||
|
if missing:
|
||||||
|
formants += [(None, None)] * missing
|
||||||
|
output[indices[i] / new_sr] = formants
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
#class FormantTrackFunction(BaseAnalysisFunction):
|
||||||
|
# def __init__(self, num_formants=5, max_frequency=5000,
|
||||||
|
# time_step=0.01, window_length=0.025, window_shape='gaussian'):
|
||||||
|
# super(FormantTrackFunction, self).__init__()
|
||||||
|
# self.arguments = [num_formants, max_frequency, time_step, window_length, window_shape]
|
||||||
|
# self._function = lpc_formants
|
||||||
|
# self.requires_file = False
|
||||||
Reference in New Issue
Block a user