Add files via upload

2016-06-22 19:24:16 +03:00
parent d27f7aa59e
commit a0c1265dca
3 changed files with 354 additions and 0 deletions
@@ -0,0 +1,273 @@
+__author__ = 'shua'
+import argparse
+import numpy as np
+import wave
+import os
+from os import listdir
+from os.path import isfile, join
+import math
+from scipy.fftpack.realtransforms import dct
+from scipy.signal import lfilter, hamming
+from copy import deepcopy
+from scipy.fftpack import fft, ifft
+from scikits.talkbox.linpred import lpc
+import shutil
+epsilon = 0.0000000001
+prefac = .97
+
+def build_data(wav,begin=None,end=None):
+    wav_in_file = wave.Wave_read(wav)
+    wav_in_num_samples = wav_in_file.getnframes()
+    N = wav_in_file.getnframes()
+    dstr = wav_in_file.readframes(N)
+    data = np.fromstring(dstr, np.int16)
+    if begin is not None and end is not None:
+        return data[begin*16000:end*16000]
+    X = []
+    l = len(data)
+    for i in range(0, l-100, 160):
+        X.append(data[i:i + 480])
+    return X
+
+def periodogram(x, nfft=None, fs=1):
+    """Compute the periodogram of the given signal, with the given fft size.
+
+    Parameters
+    ----------
+    x : array-like
+        input signal
+    nfft : int
+        size of the fft to compute the periodogram. If None (default), the
+        length of the signal is used. if nfft > n, the signal is 0 padded.
+    fs : float
+        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
+        Nyquist limit).
+
+    Returns
+    -------
+    pxx : array-like
+        The psd estimate.
+    fgrid : array-like
+        Frequency grid over which the periodogram was estimated.
+
+    Examples
+    --------
+    Generate a signal with two sinusoids, and compute its periodogram:
+
+    >>> fs = 1000
+    >>> x = np.sin(2 * np.pi  * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
+    >>> x += np.sin(2 * np.pi  * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
+    >>> px, fx = periodogram(x, 512, fs)
+
+    Notes
+    -----
+    Only real signals supported for now.
+
+    Returns the one-sided version of the periodogram.
+
+    Discrepency with matlab: matlab compute the psd in unit of power / radian /
+    sample, and we compute the psd in unit of power / sample: to get the same
+    result as matlab, just multiply the result from talkbox by 2pi"""
+    x = np.atleast_1d(x)
+    n = x.size
+
+    if x.ndim > 1:
+        raise ValueError("Only rank 1 input supported for now.")
+    if not np.isrealobj(x):
+        raise ValueError("Only real input supported for now.")
+    if not nfft:
+        nfft = n
+    if nfft < n:
+        raise ValueError("nfft < signal size not supported yet")
+
+    pxx = np.abs(fft(x, nfft)) ** 2
+    if nfft % 2 == 0:
+        pn = nfft / 2 + 1
+    else:
+        pn = (nfft + 1 )/ 2
+
+    fgrid = np.linspace(0, fs * 0.5, pn)
+    return pxx[:pn] / (n * fs), fgrid
+
+def arspec(x, order, nfft=None, fs=1):
+    """Compute the spectral density using an AR model.
+
+    An AR model of the signal is estimated through the Yule-Walker equations;
+    the estimated AR coefficient are then used to compute the spectrum, which
+    can be computed explicitely for AR models.
+
+    Parameters
+    ----------
+    x : array-like
+        input signal
+    order : int
+        Order of the LPC computation.
+    nfft : int
+        size of the fft to compute the periodogram. If None (default), the
+        length of the signal is used. if nfft > n, the signal is 0 padded.
+    fs : float
+        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
+        Nyquist limit).
+
+    Returns
+    -------
+    pxx : array-like
+        The psd estimate.
+    fgrid : array-like
+        Frequency grid over which the periodogram was estimated.
+    """
+
+    x = np.atleast_1d(x)
+    n = x.size
+
+    if x.ndim > 1:
+        raise ValueError("Only rank 1 input supported for now.")
+    if not np.isrealobj(x):
+        raise ValueError("Only real input supported for now.")
+    if not nfft:
+        nfft = n
+    a, e, k = lpc(x, order)
+
+    # This is not enough to deal correctly with even/odd size
+    if nfft % 2 == 0:
+        pn = nfft / 2 + 1
+    else:
+        pn = (nfft + 1 )/ 2
+
+    px = 1 / np.fft.fft(a, nfft)[:pn]
+    pxx = np.real(np.conj(px) * px)
+    pxx /= fs / e
+    fx = np.linspace(0, fs * 0.5, pxx.size)
+    return pxx, fx
+
+def taper(n, p=0.1):
+    """Return a split cosine bell taper (or window)
+
+    Parameters
+    ----------
+        n: int
+            number of samples of the taper
+        p: float
+            proportion of taper (0 <= p <= 1.)
+
+    Note
+    ----
+    p represents the proportion of tapered (or "smoothed") data compared to a
+    boxcar.
+    """
+    if p > 1. or p < 0:
+        raise ValueError("taper proportion should be betwen 0 and 1 (was %f)"
+                         % p)
+    w = np.ones(n)
+    ntp = np.floor(0.5 * n * p)
+    w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp)))
+    w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp)))
+
+    return w
+
+def atal(x, order, num_coefs):
+    x = np.atleast_1d(x)
+    n = x.size
+    if x.ndim > 1:
+        raise ValueError("Only rank 1 input supported for now.")
+    if not np.isrealobj(x):
+        raise ValueError("Only real input supported for now.")
+    a, e, kk = lpc(x, order)
+    c = np.zeros(num_coefs)
+    c[0] = a[0]
+    for m in range(1, order+1):
+        c[m] = - a[m]
+        for k in range(1, m):
+            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
+    for m in range(order+1, num_coefs):
+        for k in range(1, order+1):
+            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
+    return c
+
+def preemp(input, p):
+    """Pre-emphasis filter."""
+    return lfilter([1., -p], 1, input)
+
+def arspecs(input_wav,order,Atal=False):
+    epsilon = 0.0000000001
+    data = input_wav
+    if Atal:
+        ar = atal(data, order, 30)
+        return ar
+    else:
+        ar = []
+        ars = arspec(data, order, 4096)
+        for k, l in zip(ars[0], ars[1]):
+            ar.append(math.log(math.sqrt((k**2)+(l**2))))
+        for val in range(0,len(ar)):
+            if ar[val] == 0.0:
+                ar[val] = deepcopy(epsilon)
+        mspec1 = np.log10(ar)
+        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
+        ar = dct(mspec1, type=2, norm='ortho', axis=-1)
+        return ar[:30]
+
+def specPS(input_wav,pitch):
+        N = len(input_wav)
+        samps = N/pitch
+        if samps == 0:
+            samps = 1
+        frames = N/samps
+        data = input_wav[0:frames]
+        specs = periodogram(data,nfft=4096)
+        for i in range(1,int(samps)):
+            data = input_wav[frames*i:frames*(i+1)]
+            peri = periodogram(data,nfft=4096)
+            for sp in range(len(peri[0])):
+                specs[0][sp] += peri[0][sp]
+        for s in range(len(specs[0])):
+            specs[0][s] /= float(samps)
+        peri = []
+        for k, l in zip(specs[0], specs[1]):
+            if k == 0 and l == 0:
+                peri.append(epsilon)
+            else:
+                peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
+        # Filter the spectrum through the triangle filterbank
+        mspec = np.log10(peri)
+        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
+        ceps = dct(mspec, type=2, norm='ortho', axis=-1)
+        return ceps[:50]
+def build_single_feature_row(data,Atal):
+	lpcs = [8,9,10,11,12,13,14,15,16,17]
+        arr = []
+	periodo = specPS(data,50)
+	arr.extend(periodo)
+	for j in lpcs:
+	    if Atal:
+		ars = arspecs(data, j, Atal=True)
+	    else:
+	       ars = arspecs(data, j)
+	    arr.extend(ars)
+	for i in range(len(arr)):
+		if np.isnan(np.float(arr[i])):
+			arr[i] = 0.0
+	return arr
+
+def Create_features(input_wav,feature_file_name, begin=None,end=None,Atal=False):
+    X = build_data(input_wav,begin,end)
+    full_path = os.path.realpath(__file__)
+    output_directory = os.path.dirname(full_path)+'/Features/'
+    if Atal:
+        feature_file = output_directory+"ATAL_features_"+feature_file_name+'.txt'
+    else:
+        feature_file = output_directory+"features_"+feature_file_name+'.txt'
+    if begin is not None and end is not None:
+	arr = [input_wav.replace('.wav','')]
+	arr.extend(build_single_feature_row(X,Atal))
+	np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s")
+	return arr
+    arcep_mat = []
+    for i in range(len(X)):
+	arr = [input_wav.replace('.wav','_PART_')+str(i)]
+	arr.extend(build_single_feature_row(X[i], Atal))
+	arcep_mat.append(arr)
+    np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s")
+    return arcep_mat
+
+
@@ -0,0 +1,47 @@
+import Extract_Features as features
+from subprocess import call
+import os
+import sys
+import shlex
+import argparse
+
+
+def easy_call(command, debug_mode=False):
+    try:
+        #command = "time " + command
+        if debug_mode:
+            print >>sys.stderr, command
+        call(command, shell=True)
+    except Exception as exception:
+        print "Error: could not execute the following"
+        print ">>", command
+        print type(exception)     # the exception instance
+        print exception.args      # arguments stored in .args
+        exit(-1)
+
+
+if __name__ == "__main__":
+    # parse arguments
+    parser = argparse.ArgumentParser(description='Extract features for formants estimation.')
+    parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)")
+    parser.add_argument('formants_file', default='', help="output formant text file")
+    parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float)
+    parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float)
+    args = parser.parse_args()
+    full_path = os.path.realpath(__file__)
+    if not os.path.exists(os.path.dirname(full_path)+'/Features/'):
+        os.makedirs(os.path.dirname(full_path)+'/Features/')
+    if not os.path.exists(os.path.dirname(full_path)+'/Predictions/'):
+        os.makedirs(os.path.dirname(full_path)+'/Predictions/')
+
+    if args.begin > 0.0 or args.end > 0.0:
+    	Data = features.Create_features(args.wav_file, args.formants_file, args.begin, args.end)
+    	ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
+	pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
+	easy_call("th load_estimation_model.lua " + ff + ' ' + pf)
+    else:
+    	Data = features.Create_features(args.wav_file, args.formants_file)
+    	ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
+	pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
+	easy_call("th load_tracking_model.lua " + ff + ' ' + pf)
+
@@ -0,0 +1,34 @@
+require 'torch'   -- torch
+require 'optim'
+require 'nn'      -- provides a normalization operator
+
+function string:split(sep)
+  local sep, fields = sep, {}
+  local pattern = string.format("([^%s]+)", sep)
+  self:gsub(pattern, function(substr) fields[#fields + 1] = substr end)
+  return fields
+end
+
+local f_file = io.open(arg[1], 'r')
+local p_file = io.open(arg[2], 'w')
+local data = torch.Tensor(1, 351)
+local name = ''
+for line in f_file:lines('*l') do
+local l = line:split(',')
+first = true
+	for key, val in ipairs(l) do
+	  if first == false then
+	  data[1][key] = val
+	  else data[1][key] = 0
+	  first = false
+	  name = val
+	  end
+	end
+end
+
+local X = data[{{},{2,-1}}]
+model = torch.load('estimation_model.dat')
+local myPrediction = model:forward(X)
+p_file:write('NAME,F1,F2,F3,F4\n')
+p_file:write(name..','..tostring(myPrediction[1][1])..','..tostring(myPrediction[1][2])..','..tostring(myPrediction[1][3])..','..tostring(myPrediction[1][4])..'\n')
+