From a0c1265dca467395a73086eb72e6655e993df0d7 Mon Sep 17 00:00:00 2001 From: Shuadissen Date: Wed, 22 Jun 2016 19:24:16 +0300 Subject: [PATCH] Add files via upload --- Extract_Features.py | 273 ++++++++++++++++++++++++++++++++++++++ formants.py | 47 +++++++ load_estimation_model.lua | 34 +++++ 3 files changed, 354 insertions(+) create mode 100644 Extract_Features.py create mode 100644 formants.py create mode 100644 load_estimation_model.lua diff --git a/Extract_Features.py b/Extract_Features.py new file mode 100644 index 0000000..039f388 --- /dev/null +++ b/Extract_Features.py @@ -0,0 +1,273 @@ +__author__ = 'shua' +import argparse +import numpy as np +import wave +import os +from os import listdir +from os.path import isfile, join +import math +from scipy.fftpack.realtransforms import dct +from scipy.signal import lfilter, hamming +from copy import deepcopy +from scipy.fftpack import fft, ifft +from scikits.talkbox.linpred import lpc +import shutil +epsilon = 0.0000000001 +prefac = .97 + +def build_data(wav,begin=None,end=None): + wav_in_file = wave.Wave_read(wav) + wav_in_num_samples = wav_in_file.getnframes() + N = wav_in_file.getnframes() + dstr = wav_in_file.readframes(N) + data = np.fromstring(dstr, np.int16) + if begin is not None and end is not None: + return data[begin*16000:end*16000] + X = [] + l = len(data) + for i in range(0, l-100, 160): + X.append(data[i:i + 480]) + return X + +def periodogram(x, nfft=None, fs=1): + """Compute the periodogram of the given signal, with the given fft size. + + Parameters + ---------- + x : array-like + input signal + nfft : int + size of the fft to compute the periodogram. If None (default), the + length of the signal is used. if nfft > n, the signal is 0 padded. + fs : float + Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the + Nyquist limit). + + Returns + ------- + pxx : array-like + The psd estimate. + fgrid : array-like + Frequency grid over which the periodogram was estimated. + + Examples + -------- + Generate a signal with two sinusoids, and compute its periodogram: + + >>> fs = 1000 + >>> x = np.sin(2 * np.pi * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs)) + >>> x += np.sin(2 * np.pi * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs)) + >>> px, fx = periodogram(x, 512, fs) + + Notes + ----- + Only real signals supported for now. + + Returns the one-sided version of the periodogram. + + Discrepency with matlab: matlab compute the psd in unit of power / radian / + sample, and we compute the psd in unit of power / sample: to get the same + result as matlab, just multiply the result from talkbox by 2pi""" + x = np.atleast_1d(x) + n = x.size + + if x.ndim > 1: + raise ValueError("Only rank 1 input supported for now.") + if not np.isrealobj(x): + raise ValueError("Only real input supported for now.") + if not nfft: + nfft = n + if nfft < n: + raise ValueError("nfft < signal size not supported yet") + + pxx = np.abs(fft(x, nfft)) ** 2 + if nfft % 2 == 0: + pn = nfft / 2 + 1 + else: + pn = (nfft + 1 )/ 2 + + fgrid = np.linspace(0, fs * 0.5, pn) + return pxx[:pn] / (n * fs), fgrid + +def arspec(x, order, nfft=None, fs=1): + """Compute the spectral density using an AR model. + + An AR model of the signal is estimated through the Yule-Walker equations; + the estimated AR coefficient are then used to compute the spectrum, which + can be computed explicitely for AR models. + + Parameters + ---------- + x : array-like + input signal + order : int + Order of the LPC computation. + nfft : int + size of the fft to compute the periodogram. If None (default), the + length of the signal is used. if nfft > n, the signal is 0 padded. + fs : float + Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the + Nyquist limit). + + Returns + ------- + pxx : array-like + The psd estimate. + fgrid : array-like + Frequency grid over which the periodogram was estimated. + """ + + x = np.atleast_1d(x) + n = x.size + + if x.ndim > 1: + raise ValueError("Only rank 1 input supported for now.") + if not np.isrealobj(x): + raise ValueError("Only real input supported for now.") + if not nfft: + nfft = n + a, e, k = lpc(x, order) + + # This is not enough to deal correctly with even/odd size + if nfft % 2 == 0: + pn = nfft / 2 + 1 + else: + pn = (nfft + 1 )/ 2 + + px = 1 / np.fft.fft(a, nfft)[:pn] + pxx = np.real(np.conj(px) * px) + pxx /= fs / e + fx = np.linspace(0, fs * 0.5, pxx.size) + return pxx, fx + +def taper(n, p=0.1): + """Return a split cosine bell taper (or window) + + Parameters + ---------- + n: int + number of samples of the taper + p: float + proportion of taper (0 <= p <= 1.) + + Note + ---- + p represents the proportion of tapered (or "smoothed") data compared to a + boxcar. + """ + if p > 1. or p < 0: + raise ValueError("taper proportion should be betwen 0 and 1 (was %f)" + % p) + w = np.ones(n) + ntp = np.floor(0.5 * n * p) + w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp))) + w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp))) + + return w + +def atal(x, order, num_coefs): + x = np.atleast_1d(x) + n = x.size + if x.ndim > 1: + raise ValueError("Only rank 1 input supported for now.") + if not np.isrealobj(x): + raise ValueError("Only real input supported for now.") + a, e, kk = lpc(x, order) + c = np.zeros(num_coefs) + c[0] = a[0] + for m in range(1, order+1): + c[m] = - a[m] + for k in range(1, m): + c[m] += (float(k)/float(m)-1)*a[k]*c[m-k] + for m in range(order+1, num_coefs): + for k in range(1, order+1): + c[m] += (float(k)/float(m)-1)*a[k]*c[m-k] + return c + +def preemp(input, p): + """Pre-emphasis filter.""" + return lfilter([1., -p], 1, input) + +def arspecs(input_wav,order,Atal=False): + epsilon = 0.0000000001 + data = input_wav + if Atal: + ar = atal(data, order, 30) + return ar + else: + ar = [] + ars = arspec(data, order, 4096) + for k, l in zip(ars[0], ars[1]): + ar.append(math.log(math.sqrt((k**2)+(l**2)))) + for val in range(0,len(ar)): + if ar[val] == 0.0: + ar[val] = deepcopy(epsilon) + mspec1 = np.log10(ar) + # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain) + ar = dct(mspec1, type=2, norm='ortho', axis=-1) + return ar[:30] + +def specPS(input_wav,pitch): + N = len(input_wav) + samps = N/pitch + if samps == 0: + samps = 1 + frames = N/samps + data = input_wav[0:frames] + specs = periodogram(data,nfft=4096) + for i in range(1,int(samps)): + data = input_wav[frames*i:frames*(i+1)] + peri = periodogram(data,nfft=4096) + for sp in range(len(peri[0])): + specs[0][sp] += peri[0][sp] + for s in range(len(specs[0])): + specs[0][s] /= float(samps) + peri = [] + for k, l in zip(specs[0], specs[1]): + if k == 0 and l == 0: + peri.append(epsilon) + else: + peri.append(math.log(math.sqrt((k ** 2) + (l ** 2)))) + # Filter the spectrum through the triangle filterbank + mspec = np.log10(peri) + # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain) + ceps = dct(mspec, type=2, norm='ortho', axis=-1) + return ceps[:50] +def build_single_feature_row(data,Atal): + lpcs = [8,9,10,11,12,13,14,15,16,17] + arr = [] + periodo = specPS(data,50) + arr.extend(periodo) + for j in lpcs: + if Atal: + ars = arspecs(data, j, Atal=True) + else: + ars = arspecs(data, j) + arr.extend(ars) + for i in range(len(arr)): + if np.isnan(np.float(arr[i])): + arr[i] = 0.0 + return arr + +def Create_features(input_wav,feature_file_name, begin=None,end=None,Atal=False): + X = build_data(input_wav,begin,end) + full_path = os.path.realpath(__file__) + output_directory = os.path.dirname(full_path)+'/Features/' + if Atal: + feature_file = output_directory+"ATAL_features_"+feature_file_name+'.txt' + else: + feature_file = output_directory+"features_"+feature_file_name+'.txt' + if begin is not None and end is not None: + arr = [input_wav.replace('.wav','')] + arr.extend(build_single_feature_row(X,Atal)) + np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s") + return arr + arcep_mat = [] + for i in range(len(X)): + arr = [input_wav.replace('.wav','_PART_')+str(i)] + arr.extend(build_single_feature_row(X[i], Atal)) + arcep_mat.append(arr) + np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s") + return arcep_mat + + diff --git a/formants.py b/formants.py new file mode 100644 index 0000000..0d496b4 --- /dev/null +++ b/formants.py @@ -0,0 +1,47 @@ +import Extract_Features as features +from subprocess import call +import os +import sys +import shlex +import argparse + + +def easy_call(command, debug_mode=False): + try: + #command = "time " + command + if debug_mode: + print >>sys.stderr, command + call(command, shell=True) + except Exception as exception: + print "Error: could not execute the following" + print ">>", command + print type(exception) # the exception instance + print exception.args # arguments stored in .args + exit(-1) + + +if __name__ == "__main__": + # parse arguments + parser = argparse.ArgumentParser(description='Extract features for formants estimation.') + parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)") + parser.add_argument('formants_file', default='', help="output formant text file") + parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float) + parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float) + args = parser.parse_args() + full_path = os.path.realpath(__file__) + if not os.path.exists(os.path.dirname(full_path)+'/Features/'): + os.makedirs(os.path.dirname(full_path)+'/Features/') + if not os.path.exists(os.path.dirname(full_path)+'/Predictions/'): + os.makedirs(os.path.dirname(full_path)+'/Predictions/') + + if args.begin > 0.0 or args.end > 0.0: + Data = features.Create_features(args.wav_file, args.formants_file, args.begin, args.end) + ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt') + pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv') + easy_call("th load_estimation_model.lua " + ff + ' ' + pf) + else: + Data = features.Create_features(args.wav_file, args.formants_file) + ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt') + pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv') + easy_call("th load_tracking_model.lua " + ff + ' ' + pf) + diff --git a/load_estimation_model.lua b/load_estimation_model.lua new file mode 100644 index 0000000..423e587 --- /dev/null +++ b/load_estimation_model.lua @@ -0,0 +1,34 @@ +require 'torch' -- torch +require 'optim' +require 'nn' -- provides a normalization operator + +function string:split(sep) + local sep, fields = sep, {} + local pattern = string.format("([^%s]+)", sep) + self:gsub(pattern, function(substr) fields[#fields + 1] = substr end) + return fields +end + +local f_file = io.open(arg[1], 'r') +local p_file = io.open(arg[2], 'w') +local data = torch.Tensor(1, 351) +local name = '' +for line in f_file:lines('*l') do +local l = line:split(',') +first = true + for key, val in ipairs(l) do + if first == false then + data[1][key] = val + else data[1][key] = 0 + first = false + name = val + end + end +end + +local X = data[{{},{2,-1}}] +model = torch.load('estimation_model.dat') +local myPrediction = model:forward(X) +p_file:write('NAME,F1,F2,F3,F4\n') +p_file:write(name..','..tostring(myPrediction[1][1])..','..tostring(myPrediction[1][2])..','..tostring(myPrediction[1][3])..','..tostring(myPrediction[1][4])..'\n') +