Add files via upload

2016-06-22 19:24:16 +03:00
parent d27f7aa59e
commit a0c1265dca
3 changed files with 354 additions and 0 deletions
@@ -0,0 +1,273 @@
 __author__ = 'shua'
 import argparse
 import numpy as np
 import wave
 import os
 from os import listdir
 from os.path import isfile, join
 import math
 from scipy.fftpack.realtransforms import dct
 from scipy.signal import lfilter, hamming
 from copy import deepcopy
 from scipy.fftpack import fft, ifft
 from scikits.talkbox.linpred import lpc
 import shutil
 epsilon = 0.0000000001
 prefac = .97
 def build_data(wav,begin=None,end=None):
    wav_in_file = wave.Wave_read(wav)
    wav_in_num_samples = wav_in_file.getnframes()
    N = wav_in_file.getnframes()
    dstr = wav_in_file.readframes(N)
    data = np.fromstring(dstr, np.int16)
    if begin is not None and end is not None:
        return data[begin*16000:end*16000]
    X = []
    l = len(data)
    for i in range(0, l-100, 160):
        X.append(data[i:i + 480])
    return X
 def periodogram(x, nfft=None, fs=1):
    """Compute the periodogram of the given signal, with the given fft size.
    Parameters
    ----------
    x : array-like
        input signal
    nfft : int
        size of the fft to compute the periodogram. If None (default), the
        length of the signal is used. if nfft > n, the signal is 0 padded.
    fs : float
        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
        Nyquist limit).
    Returns
    -------
    pxx : array-like
        The psd estimate.
    fgrid : array-like
        Frequency grid over which the periodogram was estimated.
    Examples
    --------
    Generate a signal with two sinusoids, and compute its periodogram:
    >>> fs = 1000
    >>> x = np.sin(2 * np.pi  * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
    >>> x += np.sin(2 * np.pi  * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
    >>> px, fx = periodogram(x, 512, fs)
    Notes
    -----
    Only real signals supported for now.
    Returns the one-sided version of the periodogram.
    Discrepency with matlab: matlab compute the psd in unit of power / radian /
    sample, and we compute the psd in unit of power / sample: to get the same
    result as matlab, just multiply the result from talkbox by 2pi"""
    x = np.atleast_1d(x)
    n = x.size
    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    if not nfft:
        nfft = n
    if nfft < n:
        raise ValueError("nfft < signal size not supported yet")
    pxx = np.abs(fft(x, nfft)) ** 2
    if nfft % 2 == 0:
        pn = nfft / 2 + 1
    else:
        pn = (nfft + 1 )/ 2
    fgrid = np.linspace(0, fs * 0.5, pn)
    return pxx[:pn] / (n * fs), fgrid
 def arspec(x, order, nfft=None, fs=1):
    """Compute the spectral density using an AR model.
    An AR model of the signal is estimated through the Yule-Walker equations;
    the estimated AR coefficient are then used to compute the spectrum, which
    can be computed explicitely for AR models.
    Parameters
    ----------
    x : array-like
        input signal
    order : int
        Order of the LPC computation.
    nfft : int
        size of the fft to compute the periodogram. If None (default), the
        length of the signal is used. if nfft > n, the signal is 0 padded.
    fs : float
        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
        Nyquist limit).
    Returns
    -------
    pxx : array-like
        The psd estimate.
    fgrid : array-like
        Frequency grid over which the periodogram was estimated.
    """
    x = np.atleast_1d(x)
    n = x.size
    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    if not nfft:
        nfft = n
    a, e, k = lpc(x, order)
    # This is not enough to deal correctly with even/odd size
    if nfft % 2 == 0:
        pn = nfft / 2 + 1
    else:
        pn = (nfft + 1 )/ 2
    px = 1 / np.fft.fft(a, nfft)[:pn]
    pxx = np.real(np.conj(px) * px)
    pxx /= fs / e
    fx = np.linspace(0, fs * 0.5, pxx.size)
    return pxx, fx
 def taper(n, p=0.1):
    """Return a split cosine bell taper (or window)
    Parameters
    ----------
        n: int
            number of samples of the taper
        p: float
            proportion of taper (0 <= p <= 1.)
    Note
    ----
    p represents the proportion of tapered (or "smoothed") data compared to a
    boxcar.
    """
    if p > 1. or p < 0:
        raise ValueError("taper proportion should be betwen 0 and 1 (was %f)"
                         % p)
    w = np.ones(n)
    ntp = np.floor(0.5 * n * p)
    w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp)))
    w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp)))
    return w
 def atal(x, order, num_coefs):
    x = np.atleast_1d(x)
    n = x.size
    if x.ndim > 1:
        raise ValueError("Only rank 1 input supported for now.")
    if not np.isrealobj(x):
        raise ValueError("Only real input supported for now.")
    a, e, kk = lpc(x, order)
    c = np.zeros(num_coefs)
    c[0] = a[0]
    for m in range(1, order+1):
        c[m] = - a[m]
        for k in range(1, m):
            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
    for m in range(order+1, num_coefs):
        for k in range(1, order+1):
            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
    return c
 def preemp(input, p):
    """Pre-emphasis filter."""
    return lfilter([1., -p], 1, input)
 def arspecs(input_wav,order,Atal=False):
    epsilon = 0.0000000001
    data = input_wav
    if Atal:
        ar = atal(data, order, 30)
        return ar
    else:
        ar = []
        ars = arspec(data, order, 4096)
        for k, l in zip(ars[0], ars[1]):
            ar.append(math.log(math.sqrt((k**2)+(l**2))))
        for val in range(0,len(ar)):
            if ar[val] == 0.0:
                ar[val] = deepcopy(epsilon)
        mspec1 = np.log10(ar)
        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
        ar = dct(mspec1, type=2, norm='ortho', axis=-1)
        return ar[:30]
 def specPS(input_wav,pitch):
        N = len(input_wav)
        samps = N/pitch
        if samps == 0:
            samps = 1
        frames = N/samps
        data = input_wav[0:frames]
        specs = periodogram(data,nfft=4096)
        for i in range(1,int(samps)):
            data = input_wav[frames*i:frames*(i+1)]
            peri = periodogram(data,nfft=4096)
            for sp in range(len(peri[0])):
                specs[0][sp] += peri[0][sp]
        for s in range(len(specs[0])):
            specs[0][s] /= float(samps)
        peri = []
        for k, l in zip(specs[0], specs[1]):
            if k == 0 and l == 0:
                peri.append(epsilon)
            else:
                peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
        # Filter the spectrum through the triangle filterbank
        mspec = np.log10(peri)
        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
        ceps = dct(mspec, type=2, norm='ortho', axis=-1)
        return ceps[:50]
 def build_single_feature_row(data,Atal):
 	lpcs = [8,9,10,11,12,13,14,15,16,17]
        arr = []
 	periodo = specPS(data,50)
 	arr.extend(periodo)
 	for j in lpcs:
 	    if Atal:
 		ars = arspecs(data, j, Atal=True)
 	    else:
 	       ars = arspecs(data, j)
 	    arr.extend(ars)
 	for i in range(len(arr)):
 		if np.isnan(np.float(arr[i])):
 			arr[i] = 0.0
 	return arr
 def Create_features(input_wav,feature_file_name, begin=None,end=None,Atal=False):
    X = build_data(input_wav,begin,end)
    full_path = os.path.realpath(__file__)
    output_directory = os.path.dirname(full_path)+'/Features/'
    if Atal:
        feature_file = output_directory+"ATAL_features_"+feature_file_name+'.txt'
    else:
        feature_file = output_directory+"features_"+feature_file_name+'.txt'
    if begin is not None and end is not None:
 	arr = [input_wav.replace('.wav','')]
 	arr.extend(build_single_feature_row(X,Atal))
 	np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s")
 	return arr
    arcep_mat = []
    for i in range(len(X)):
 	arr = [input_wav.replace('.wav','_PART_')+str(i)]
 	arr.extend(build_single_feature_row(X[i], Atal))
 	arcep_mat.append(arr)
    np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s")
    return arcep_mat
@@ -0,0 +1,47 @@
 import Extract_Features as features
 from subprocess import call
 import os
 import sys
 import shlex
 import argparse
 def easy_call(command, debug_mode=False):
    try:
        #command = "time " + command
        if debug_mode:
            print >>sys.stderr, command
        call(command, shell=True)
    except Exception as exception:
        print "Error: could not execute the following"
        print ">>", command
        print type(exception)     # the exception instance
        print exception.args      # arguments stored in .args
        exit(-1)
 if __name__ == "__main__":
    # parse arguments
    parser = argparse.ArgumentParser(description='Extract features for formants estimation.')
    parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)")
    parser.add_argument('formants_file', default='', help="output formant text file")
    parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float)
    parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float)
    args = parser.parse_args()
    full_path = os.path.realpath(__file__)
    if not os.path.exists(os.path.dirname(full_path)+'/Features/'):
        os.makedirs(os.path.dirname(full_path)+'/Features/')
    if not os.path.exists(os.path.dirname(full_path)+'/Predictions/'):
        os.makedirs(os.path.dirname(full_path)+'/Predictions/')
    if args.begin > 0.0 or args.end > 0.0:
    	Data = features.Create_features(args.wav_file, args.formants_file, args.begin, args.end)
    	ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
 	pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
 	easy_call("th load_estimation_model.lua " + ff + ' ' + pf)
    else:
    	Data = features.Create_features(args.wav_file, args.formants_file)
    	ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
 	pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
 	easy_call("th load_tracking_model.lua " + ff + ' ' + pf)
@@ -0,0 +1,34 @@
 require 'torch'   -- torch
 require 'optim'
 require 'nn'      -- provides a normalization operator
 function string:split(sep)
  local sep, fields = sep, {}
  local pattern = string.format("([^%s]+)", sep)
  self:gsub(pattern, function(substr) fields[#fields + 1] = substr end)
  return fields
 end
 local f_file = io.open(arg[1], 'r')
 local p_file = io.open(arg[2], 'w')
 local data = torch.Tensor(1, 351)
 local name = ''
 for line in f_file:lines('*l') do
 local l = line:split(',')
 first = true
 	for key, val in ipairs(l) do
 	  if first == false then
 	  data[1][key] = val
 	  else data[1][key] = 0
 	  first = false
 	  name = val
 	  end
 	end
 end
 local X = data[{{},{2,-1}}]
 model = torch.load('estimation_model.dat')
 local myPrediction = model:forward(X)
 p_file:write('NAME,F1,F2,F3,F4\n')
 p_file:write(name..','..tostring(myPrediction[1][1])..','..tostring(myPrediction[1][2])..','..tostring(myPrediction[1][3])..','..tostring(myPrediction[1][4])..'\n')