Add files via upload
This commit is contained in:
@@ -0,0 +1,273 @@
|
|||||||
|
__author__ = 'shua'
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
import wave
|
||||||
|
import os
|
||||||
|
from os import listdir
|
||||||
|
from os.path import isfile, join
|
||||||
|
import math
|
||||||
|
from scipy.fftpack.realtransforms import dct
|
||||||
|
from scipy.signal import lfilter, hamming
|
||||||
|
from copy import deepcopy
|
||||||
|
from scipy.fftpack import fft, ifft
|
||||||
|
from scikits.talkbox.linpred import lpc
|
||||||
|
import shutil
|
||||||
|
epsilon = 0.0000000001
|
||||||
|
prefac = .97
|
||||||
|
|
||||||
|
def build_data(wav,begin=None,end=None):
|
||||||
|
wav_in_file = wave.Wave_read(wav)
|
||||||
|
wav_in_num_samples = wav_in_file.getnframes()
|
||||||
|
N = wav_in_file.getnframes()
|
||||||
|
dstr = wav_in_file.readframes(N)
|
||||||
|
data = np.fromstring(dstr, np.int16)
|
||||||
|
if begin is not None and end is not None:
|
||||||
|
return data[begin*16000:end*16000]
|
||||||
|
X = []
|
||||||
|
l = len(data)
|
||||||
|
for i in range(0, l-100, 160):
|
||||||
|
X.append(data[i:i + 480])
|
||||||
|
return X
|
||||||
|
|
||||||
|
def periodogram(x, nfft=None, fs=1):
|
||||||
|
"""Compute the periodogram of the given signal, with the given fft size.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x : array-like
|
||||||
|
input signal
|
||||||
|
nfft : int
|
||||||
|
size of the fft to compute the periodogram. If None (default), the
|
||||||
|
length of the signal is used. if nfft > n, the signal is 0 padded.
|
||||||
|
fs : float
|
||||||
|
Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
|
||||||
|
Nyquist limit).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pxx : array-like
|
||||||
|
The psd estimate.
|
||||||
|
fgrid : array-like
|
||||||
|
Frequency grid over which the periodogram was estimated.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
Generate a signal with two sinusoids, and compute its periodogram:
|
||||||
|
|
||||||
|
>>> fs = 1000
|
||||||
|
>>> x = np.sin(2 * np.pi * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
|
||||||
|
>>> x += np.sin(2 * np.pi * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
|
||||||
|
>>> px, fx = periodogram(x, 512, fs)
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Only real signals supported for now.
|
||||||
|
|
||||||
|
Returns the one-sided version of the periodogram.
|
||||||
|
|
||||||
|
Discrepency with matlab: matlab compute the psd in unit of power / radian /
|
||||||
|
sample, and we compute the psd in unit of power / sample: to get the same
|
||||||
|
result as matlab, just multiply the result from talkbox by 2pi"""
|
||||||
|
x = np.atleast_1d(x)
|
||||||
|
n = x.size
|
||||||
|
|
||||||
|
if x.ndim > 1:
|
||||||
|
raise ValueError("Only rank 1 input supported for now.")
|
||||||
|
if not np.isrealobj(x):
|
||||||
|
raise ValueError("Only real input supported for now.")
|
||||||
|
if not nfft:
|
||||||
|
nfft = n
|
||||||
|
if nfft < n:
|
||||||
|
raise ValueError("nfft < signal size not supported yet")
|
||||||
|
|
||||||
|
pxx = np.abs(fft(x, nfft)) ** 2
|
||||||
|
if nfft % 2 == 0:
|
||||||
|
pn = nfft / 2 + 1
|
||||||
|
else:
|
||||||
|
pn = (nfft + 1 )/ 2
|
||||||
|
|
||||||
|
fgrid = np.linspace(0, fs * 0.5, pn)
|
||||||
|
return pxx[:pn] / (n * fs), fgrid
|
||||||
|
|
||||||
|
def arspec(x, order, nfft=None, fs=1):
|
||||||
|
"""Compute the spectral density using an AR model.
|
||||||
|
|
||||||
|
An AR model of the signal is estimated through the Yule-Walker equations;
|
||||||
|
the estimated AR coefficient are then used to compute the spectrum, which
|
||||||
|
can be computed explicitely for AR models.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x : array-like
|
||||||
|
input signal
|
||||||
|
order : int
|
||||||
|
Order of the LPC computation.
|
||||||
|
nfft : int
|
||||||
|
size of the fft to compute the periodogram. If None (default), the
|
||||||
|
length of the signal is used. if nfft > n, the signal is 0 padded.
|
||||||
|
fs : float
|
||||||
|
Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
|
||||||
|
Nyquist limit).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pxx : array-like
|
||||||
|
The psd estimate.
|
||||||
|
fgrid : array-like
|
||||||
|
Frequency grid over which the periodogram was estimated.
|
||||||
|
"""
|
||||||
|
|
||||||
|
x = np.atleast_1d(x)
|
||||||
|
n = x.size
|
||||||
|
|
||||||
|
if x.ndim > 1:
|
||||||
|
raise ValueError("Only rank 1 input supported for now.")
|
||||||
|
if not np.isrealobj(x):
|
||||||
|
raise ValueError("Only real input supported for now.")
|
||||||
|
if not nfft:
|
||||||
|
nfft = n
|
||||||
|
a, e, k = lpc(x, order)
|
||||||
|
|
||||||
|
# This is not enough to deal correctly with even/odd size
|
||||||
|
if nfft % 2 == 0:
|
||||||
|
pn = nfft / 2 + 1
|
||||||
|
else:
|
||||||
|
pn = (nfft + 1 )/ 2
|
||||||
|
|
||||||
|
px = 1 / np.fft.fft(a, nfft)[:pn]
|
||||||
|
pxx = np.real(np.conj(px) * px)
|
||||||
|
pxx /= fs / e
|
||||||
|
fx = np.linspace(0, fs * 0.5, pxx.size)
|
||||||
|
return pxx, fx
|
||||||
|
|
||||||
|
def taper(n, p=0.1):
|
||||||
|
"""Return a split cosine bell taper (or window)
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
n: int
|
||||||
|
number of samples of the taper
|
||||||
|
p: float
|
||||||
|
proportion of taper (0 <= p <= 1.)
|
||||||
|
|
||||||
|
Note
|
||||||
|
----
|
||||||
|
p represents the proportion of tapered (or "smoothed") data compared to a
|
||||||
|
boxcar.
|
||||||
|
"""
|
||||||
|
if p > 1. or p < 0:
|
||||||
|
raise ValueError("taper proportion should be betwen 0 and 1 (was %f)"
|
||||||
|
% p)
|
||||||
|
w = np.ones(n)
|
||||||
|
ntp = np.floor(0.5 * n * p)
|
||||||
|
w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp)))
|
||||||
|
w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp)))
|
||||||
|
|
||||||
|
return w
|
||||||
|
|
||||||
|
def atal(x, order, num_coefs):
|
||||||
|
x = np.atleast_1d(x)
|
||||||
|
n = x.size
|
||||||
|
if x.ndim > 1:
|
||||||
|
raise ValueError("Only rank 1 input supported for now.")
|
||||||
|
if not np.isrealobj(x):
|
||||||
|
raise ValueError("Only real input supported for now.")
|
||||||
|
a, e, kk = lpc(x, order)
|
||||||
|
c = np.zeros(num_coefs)
|
||||||
|
c[0] = a[0]
|
||||||
|
for m in range(1, order+1):
|
||||||
|
c[m] = - a[m]
|
||||||
|
for k in range(1, m):
|
||||||
|
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
|
||||||
|
for m in range(order+1, num_coefs):
|
||||||
|
for k in range(1, order+1):
|
||||||
|
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
|
||||||
|
return c
|
||||||
|
|
||||||
|
def preemp(input, p):
|
||||||
|
"""Pre-emphasis filter."""
|
||||||
|
return lfilter([1., -p], 1, input)
|
||||||
|
|
||||||
|
def arspecs(input_wav,order,Atal=False):
|
||||||
|
epsilon = 0.0000000001
|
||||||
|
data = input_wav
|
||||||
|
if Atal:
|
||||||
|
ar = atal(data, order, 30)
|
||||||
|
return ar
|
||||||
|
else:
|
||||||
|
ar = []
|
||||||
|
ars = arspec(data, order, 4096)
|
||||||
|
for k, l in zip(ars[0], ars[1]):
|
||||||
|
ar.append(math.log(math.sqrt((k**2)+(l**2))))
|
||||||
|
for val in range(0,len(ar)):
|
||||||
|
if ar[val] == 0.0:
|
||||||
|
ar[val] = deepcopy(epsilon)
|
||||||
|
mspec1 = np.log10(ar)
|
||||||
|
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||||
|
ar = dct(mspec1, type=2, norm='ortho', axis=-1)
|
||||||
|
return ar[:30]
|
||||||
|
|
||||||
|
def specPS(input_wav,pitch):
|
||||||
|
N = len(input_wav)
|
||||||
|
samps = N/pitch
|
||||||
|
if samps == 0:
|
||||||
|
samps = 1
|
||||||
|
frames = N/samps
|
||||||
|
data = input_wav[0:frames]
|
||||||
|
specs = periodogram(data,nfft=4096)
|
||||||
|
for i in range(1,int(samps)):
|
||||||
|
data = input_wav[frames*i:frames*(i+1)]
|
||||||
|
peri = periodogram(data,nfft=4096)
|
||||||
|
for sp in range(len(peri[0])):
|
||||||
|
specs[0][sp] += peri[0][sp]
|
||||||
|
for s in range(len(specs[0])):
|
||||||
|
specs[0][s] /= float(samps)
|
||||||
|
peri = []
|
||||||
|
for k, l in zip(specs[0], specs[1]):
|
||||||
|
if k == 0 and l == 0:
|
||||||
|
peri.append(epsilon)
|
||||||
|
else:
|
||||||
|
peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
|
||||||
|
# Filter the spectrum through the triangle filterbank
|
||||||
|
mspec = np.log10(peri)
|
||||||
|
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||||
|
ceps = dct(mspec, type=2, norm='ortho', axis=-1)
|
||||||
|
return ceps[:50]
|
||||||
|
def build_single_feature_row(data,Atal):
|
||||||
|
lpcs = [8,9,10,11,12,13,14,15,16,17]
|
||||||
|
arr = []
|
||||||
|
periodo = specPS(data,50)
|
||||||
|
arr.extend(periodo)
|
||||||
|
for j in lpcs:
|
||||||
|
if Atal:
|
||||||
|
ars = arspecs(data, j, Atal=True)
|
||||||
|
else:
|
||||||
|
ars = arspecs(data, j)
|
||||||
|
arr.extend(ars)
|
||||||
|
for i in range(len(arr)):
|
||||||
|
if np.isnan(np.float(arr[i])):
|
||||||
|
arr[i] = 0.0
|
||||||
|
return arr
|
||||||
|
|
||||||
|
def Create_features(input_wav,feature_file_name, begin=None,end=None,Atal=False):
|
||||||
|
X = build_data(input_wav,begin,end)
|
||||||
|
full_path = os.path.realpath(__file__)
|
||||||
|
output_directory = os.path.dirname(full_path)+'/Features/'
|
||||||
|
if Atal:
|
||||||
|
feature_file = output_directory+"ATAL_features_"+feature_file_name+'.txt'
|
||||||
|
else:
|
||||||
|
feature_file = output_directory+"features_"+feature_file_name+'.txt'
|
||||||
|
if begin is not None and end is not None:
|
||||||
|
arr = [input_wav.replace('.wav','')]
|
||||||
|
arr.extend(build_single_feature_row(X,Atal))
|
||||||
|
np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s")
|
||||||
|
return arr
|
||||||
|
arcep_mat = []
|
||||||
|
for i in range(len(X)):
|
||||||
|
arr = [input_wav.replace('.wav','_PART_')+str(i)]
|
||||||
|
arr.extend(build_single_feature_row(X[i], Atal))
|
||||||
|
arcep_mat.append(arr)
|
||||||
|
np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s")
|
||||||
|
return arcep_mat
|
||||||
|
|
||||||
|
|
||||||
+47
@@ -0,0 +1,47 @@
|
|||||||
|
import Extract_Features as features
|
||||||
|
from subprocess import call
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import shlex
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def easy_call(command, debug_mode=False):
|
||||||
|
try:
|
||||||
|
#command = "time " + command
|
||||||
|
if debug_mode:
|
||||||
|
print >>sys.stderr, command
|
||||||
|
call(command, shell=True)
|
||||||
|
except Exception as exception:
|
||||||
|
print "Error: could not execute the following"
|
||||||
|
print ">>", command
|
||||||
|
print type(exception) # the exception instance
|
||||||
|
print exception.args # arguments stored in .args
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# parse arguments
|
||||||
|
parser = argparse.ArgumentParser(description='Extract features for formants estimation.')
|
||||||
|
parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)")
|
||||||
|
parser.add_argument('formants_file', default='', help="output formant text file")
|
||||||
|
parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float)
|
||||||
|
parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float)
|
||||||
|
args = parser.parse_args()
|
||||||
|
full_path = os.path.realpath(__file__)
|
||||||
|
if not os.path.exists(os.path.dirname(full_path)+'/Features/'):
|
||||||
|
os.makedirs(os.path.dirname(full_path)+'/Features/')
|
||||||
|
if not os.path.exists(os.path.dirname(full_path)+'/Predictions/'):
|
||||||
|
os.makedirs(os.path.dirname(full_path)+'/Predictions/')
|
||||||
|
|
||||||
|
if args.begin > 0.0 or args.end > 0.0:
|
||||||
|
Data = features.Create_features(args.wav_file, args.formants_file, args.begin, args.end)
|
||||||
|
ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
|
||||||
|
pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
|
||||||
|
easy_call("th load_estimation_model.lua " + ff + ' ' + pf)
|
||||||
|
else:
|
||||||
|
Data = features.Create_features(args.wav_file, args.formants_file)
|
||||||
|
ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
|
||||||
|
pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
|
||||||
|
easy_call("th load_tracking_model.lua " + ff + ' ' + pf)
|
||||||
|
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
require 'torch' -- torch
|
||||||
|
require 'optim'
|
||||||
|
require 'nn' -- provides a normalization operator
|
||||||
|
|
||||||
|
function string:split(sep)
|
||||||
|
local sep, fields = sep, {}
|
||||||
|
local pattern = string.format("([^%s]+)", sep)
|
||||||
|
self:gsub(pattern, function(substr) fields[#fields + 1] = substr end)
|
||||||
|
return fields
|
||||||
|
end
|
||||||
|
|
||||||
|
local f_file = io.open(arg[1], 'r')
|
||||||
|
local p_file = io.open(arg[2], 'w')
|
||||||
|
local data = torch.Tensor(1, 351)
|
||||||
|
local name = ''
|
||||||
|
for line in f_file:lines('*l') do
|
||||||
|
local l = line:split(',')
|
||||||
|
first = true
|
||||||
|
for key, val in ipairs(l) do
|
||||||
|
if first == false then
|
||||||
|
data[1][key] = val
|
||||||
|
else data[1][key] = 0
|
||||||
|
first = false
|
||||||
|
name = val
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local X = data[{{},{2,-1}}]
|
||||||
|
model = torch.load('estimation_model.dat')
|
||||||
|
local myPrediction = model:forward(X)
|
||||||
|
p_file:write('NAME,F1,F2,F3,F4\n')
|
||||||
|
p_file:write(name..','..tostring(myPrediction[1][1])..','..tostring(myPrediction[1][2])..','..tostring(myPrediction[1][3])..','..tostring(myPrediction[1][4])..'\n')
|
||||||
|
|
||||||
Reference in New Issue
Block a user