Add files via upload

This commit is contained in:
Shuadissen
2016-06-22 19:24:16 +03:00
committed by GitHub
parent d27f7aa59e
commit a0c1265dca
3 changed files with 354 additions and 0 deletions
+273
View File
@@ -0,0 +1,273 @@
__author__ = 'shua'
import argparse
import numpy as np
import wave
import os
from os import listdir
from os.path import isfile, join
import math
from scipy.fftpack.realtransforms import dct
from scipy.signal import lfilter, hamming
from copy import deepcopy
from scipy.fftpack import fft, ifft
from scikits.talkbox.linpred import lpc
import shutil
epsilon = 0.0000000001
prefac = .97
def build_data(wav,begin=None,end=None):
wav_in_file = wave.Wave_read(wav)
wav_in_num_samples = wav_in_file.getnframes()
N = wav_in_file.getnframes()
dstr = wav_in_file.readframes(N)
data = np.fromstring(dstr, np.int16)
if begin is not None and end is not None:
return data[begin*16000:end*16000]
X = []
l = len(data)
for i in range(0, l-100, 160):
X.append(data[i:i + 480])
return X
def periodogram(x, nfft=None, fs=1):
"""Compute the periodogram of the given signal, with the given fft size.
Parameters
----------
x : array-like
input signal
nfft : int
size of the fft to compute the periodogram. If None (default), the
length of the signal is used. if nfft > n, the signal is 0 padded.
fs : float
Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
Nyquist limit).
Returns
-------
pxx : array-like
The psd estimate.
fgrid : array-like
Frequency grid over which the periodogram was estimated.
Examples
--------
Generate a signal with two sinusoids, and compute its periodogram:
>>> fs = 1000
>>> x = np.sin(2 * np.pi * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
>>> x += np.sin(2 * np.pi * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
>>> px, fx = periodogram(x, 512, fs)
Notes
-----
Only real signals supported for now.
Returns the one-sided version of the periodogram.
Discrepency with matlab: matlab compute the psd in unit of power / radian /
sample, and we compute the psd in unit of power / sample: to get the same
result as matlab, just multiply the result from talkbox by 2pi"""
x = np.atleast_1d(x)
n = x.size
if x.ndim > 1:
raise ValueError("Only rank 1 input supported for now.")
if not np.isrealobj(x):
raise ValueError("Only real input supported for now.")
if not nfft:
nfft = n
if nfft < n:
raise ValueError("nfft < signal size not supported yet")
pxx = np.abs(fft(x, nfft)) ** 2
if nfft % 2 == 0:
pn = nfft / 2 + 1
else:
pn = (nfft + 1 )/ 2
fgrid = np.linspace(0, fs * 0.5, pn)
return pxx[:pn] / (n * fs), fgrid
def arspec(x, order, nfft=None, fs=1):
"""Compute the spectral density using an AR model.
An AR model of the signal is estimated through the Yule-Walker equations;
the estimated AR coefficient are then used to compute the spectrum, which
can be computed explicitely for AR models.
Parameters
----------
x : array-like
input signal
order : int
Order of the LPC computation.
nfft : int
size of the fft to compute the periodogram. If None (default), the
length of the signal is used. if nfft > n, the signal is 0 padded.
fs : float
Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
Nyquist limit).
Returns
-------
pxx : array-like
The psd estimate.
fgrid : array-like
Frequency grid over which the periodogram was estimated.
"""
x = np.atleast_1d(x)
n = x.size
if x.ndim > 1:
raise ValueError("Only rank 1 input supported for now.")
if not np.isrealobj(x):
raise ValueError("Only real input supported for now.")
if not nfft:
nfft = n
a, e, k = lpc(x, order)
# This is not enough to deal correctly with even/odd size
if nfft % 2 == 0:
pn = nfft / 2 + 1
else:
pn = (nfft + 1 )/ 2
px = 1 / np.fft.fft(a, nfft)[:pn]
pxx = np.real(np.conj(px) * px)
pxx /= fs / e
fx = np.linspace(0, fs * 0.5, pxx.size)
return pxx, fx
def taper(n, p=0.1):
"""Return a split cosine bell taper (or window)
Parameters
----------
n: int
number of samples of the taper
p: float
proportion of taper (0 <= p <= 1.)
Note
----
p represents the proportion of tapered (or "smoothed") data compared to a
boxcar.
"""
if p > 1. or p < 0:
raise ValueError("taper proportion should be betwen 0 and 1 (was %f)"
% p)
w = np.ones(n)
ntp = np.floor(0.5 * n * p)
w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp)))
w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp)))
return w
def atal(x, order, num_coefs):
x = np.atleast_1d(x)
n = x.size
if x.ndim > 1:
raise ValueError("Only rank 1 input supported for now.")
if not np.isrealobj(x):
raise ValueError("Only real input supported for now.")
a, e, kk = lpc(x, order)
c = np.zeros(num_coefs)
c[0] = a[0]
for m in range(1, order+1):
c[m] = - a[m]
for k in range(1, m):
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
for m in range(order+1, num_coefs):
for k in range(1, order+1):
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
return c
def preemp(input, p):
"""Pre-emphasis filter."""
return lfilter([1., -p], 1, input)
def arspecs(input_wav,order,Atal=False):
epsilon = 0.0000000001
data = input_wav
if Atal:
ar = atal(data, order, 30)
return ar
else:
ar = []
ars = arspec(data, order, 4096)
for k, l in zip(ars[0], ars[1]):
ar.append(math.log(math.sqrt((k**2)+(l**2))))
for val in range(0,len(ar)):
if ar[val] == 0.0:
ar[val] = deepcopy(epsilon)
mspec1 = np.log10(ar)
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
ar = dct(mspec1, type=2, norm='ortho', axis=-1)
return ar[:30]
def specPS(input_wav,pitch):
N = len(input_wav)
samps = N/pitch
if samps == 0:
samps = 1
frames = N/samps
data = input_wav[0:frames]
specs = periodogram(data,nfft=4096)
for i in range(1,int(samps)):
data = input_wav[frames*i:frames*(i+1)]
peri = periodogram(data,nfft=4096)
for sp in range(len(peri[0])):
specs[0][sp] += peri[0][sp]
for s in range(len(specs[0])):
specs[0][s] /= float(samps)
peri = []
for k, l in zip(specs[0], specs[1]):
if k == 0 and l == 0:
peri.append(epsilon)
else:
peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
# Filter the spectrum through the triangle filterbank
mspec = np.log10(peri)
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
ceps = dct(mspec, type=2, norm='ortho', axis=-1)
return ceps[:50]
def build_single_feature_row(data,Atal):
lpcs = [8,9,10,11,12,13,14,15,16,17]
arr = []
periodo = specPS(data,50)
arr.extend(periodo)
for j in lpcs:
if Atal:
ars = arspecs(data, j, Atal=True)
else:
ars = arspecs(data, j)
arr.extend(ars)
for i in range(len(arr)):
if np.isnan(np.float(arr[i])):
arr[i] = 0.0
return arr
def Create_features(input_wav,feature_file_name, begin=None,end=None,Atal=False):
X = build_data(input_wav,begin,end)
full_path = os.path.realpath(__file__)
output_directory = os.path.dirname(full_path)+'/Features/'
if Atal:
feature_file = output_directory+"ATAL_features_"+feature_file_name+'.txt'
else:
feature_file = output_directory+"features_"+feature_file_name+'.txt'
if begin is not None and end is not None:
arr = [input_wav.replace('.wav','')]
arr.extend(build_single_feature_row(X,Atal))
np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s")
return arr
arcep_mat = []
for i in range(len(X)):
arr = [input_wav.replace('.wav','_PART_')+str(i)]
arr.extend(build_single_feature_row(X[i], Atal))
arcep_mat.append(arr)
np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s")
return arcep_mat
+47
View File
@@ -0,0 +1,47 @@
import Extract_Features as features
from subprocess import call
import os
import sys
import shlex
import argparse
def easy_call(command, debug_mode=False):
try:
#command = "time " + command
if debug_mode:
print >>sys.stderr, command
call(command, shell=True)
except Exception as exception:
print "Error: could not execute the following"
print ">>", command
print type(exception) # the exception instance
print exception.args # arguments stored in .args
exit(-1)
if __name__ == "__main__":
# parse arguments
parser = argparse.ArgumentParser(description='Extract features for formants estimation.')
parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)")
parser.add_argument('formants_file', default='', help="output formant text file")
parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float)
parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float)
args = parser.parse_args()
full_path = os.path.realpath(__file__)
if not os.path.exists(os.path.dirname(full_path)+'/Features/'):
os.makedirs(os.path.dirname(full_path)+'/Features/')
if not os.path.exists(os.path.dirname(full_path)+'/Predictions/'):
os.makedirs(os.path.dirname(full_path)+'/Predictions/')
if args.begin > 0.0 or args.end > 0.0:
Data = features.Create_features(args.wav_file, args.formants_file, args.begin, args.end)
ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
easy_call("th load_estimation_model.lua " + ff + ' ' + pf)
else:
Data = features.Create_features(args.wav_file, args.formants_file)
ff = str(os.path.dirname(os.path.realpath(__file__))+'/Features/features_' + args.formants_file+'.txt')
pf = str(os.path.dirname(os.path.realpath(__file__))+'/Predictions/' +args.formants_file+'.csv')
easy_call("th load_tracking_model.lua " + ff + ' ' + pf)
+34
View File
@@ -0,0 +1,34 @@
require 'torch' -- torch
require 'optim'
require 'nn' -- provides a normalization operator
function string:split(sep)
local sep, fields = sep, {}
local pattern = string.format("([^%s]+)", sep)
self:gsub(pattern, function(substr) fields[#fields + 1] = substr end)
return fields
end
local f_file = io.open(arg[1], 'r')
local p_file = io.open(arg[2], 'w')
local data = torch.Tensor(1, 351)
local name = ''
for line in f_file:lines('*l') do
local l = line:split(',')
first = true
for key, val in ipairs(l) do
if first == false then
data[1][key] = val
else data[1][key] = 0
first = false
name = val
end
end
end
local X = data[{{},{2,-1}}]
model = torch.load('estimation_model.dat')
local myPrediction = model:forward(X)
p_file:write('NAME,F1,F2,F3,F4\n')
p_file:write(name..','..tostring(myPrediction[1][1])..','..tostring(myPrediction[1][2])..','..tostring(myPrediction[1][3])..','..tostring(myPrediction[1][4])..'\n')