From 395878acbc94712d9c8565e727da93da1a73b685 Mon Sep 17 00:00:00 2001
From: Shuadissen <shua.dissen@gmail.com>
Date: Wed, 22 Jun 2016 19:23:04 +0300
Subject: [PATCH] Delete Extract_Features.py

---
 Extract_Features.py | 276 --------------------------------------------
 1 file changed, 276 deletions(-)
 delete mode 100644 Extract_Features.py

diff --git a/Extract_Features.py b/Extract_Features.py
deleted file mode 100644
index bef5fd8..0000000
--- a/Extract_Features.py
+++ /dev/null
@@ -1,276 +0,0 @@
-__author__ = 'shua'
-import argparse
-import numpy as np
-import tempfile
-import wave
-import os
-from os import listdir
-from os.path import isfile, join
-import math
-from scipy.fftpack.realtransforms import dct
-from scipy.signal import lfilter, hamming
-from copy import deepcopy
-from scipy.fftpack import fft, ifft
-from scikits.talkbox.linpred import lpc
-import shutil
-epsilon = 0.0000000001
-prefac = .97
-
-def build_data(wav,begin=None,end=None):
-    wav_in_file = wave.Wave_read(wav)
-    wav_in_num_samples = wav_in_file.getnframes()
-    N = wav_in_file.getnframes()
-    dstr = wav_in_file.readframes(N)
-    data = np.fromstring(dstr, np.int16)
-    if begin is not None and end is not None:
-        return data[begin*16000:end*16000]
-    X = []
-    l = len(data)
-    for i in range(0, l-100, 160):
-        X.append(data[i:i + 480])
-    return X
-
-def periodogram(x, nfft=None, fs=1):
-    """Compute the periodogram of the given signal, with the given fft size.
-
-    Parameters
-    ----------
-    x : array-like
-        input signal
-    nfft : int
-        size of the fft to compute the periodogram. If None (default), the
-        length of the signal is used. if nfft > n, the signal is 0 padded.
-    fs : float
-        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
-        Nyquist limit).
-
-    Returns
-    -------
-    pxx : array-like
-        The psd estimate.
-    fgrid : array-like
-        Frequency grid over which the periodogram was estimated.
-
-    Examples
-    --------
-    Generate a signal with two sinusoids, and compute its periodogram:
-
-    >>> fs = 1000
-    >>> x = np.sin(2 * np.pi  * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
-    >>> x += np.sin(2 * np.pi  * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
-    >>> px, fx = periodogram(x, 512, fs)
-
-    Notes
-    -----
-    Only real signals supported for now.
-
-    Returns the one-sided version of the periodogram.
-
-    Discrepency with matlab: matlab compute the psd in unit of power / radian /
-    sample, and we compute the psd in unit of power / sample: to get the same
-    result as matlab, just multiply the result from talkbox by 2pi"""
-    x = np.atleast_1d(x)
-    n = x.size
-
-    if x.ndim > 1:
-        raise ValueError("Only rank 1 input supported for now.")
-    if not np.isrealobj(x):
-        raise ValueError("Only real input supported for now.")
-    if not nfft:
-        nfft = n
-    if nfft < n:
-        raise ValueError("nfft < signal size not supported yet")
-
-    pxx = np.abs(fft(x, nfft)) ** 2
-    if nfft % 2 == 0:
-        pn = nfft / 2 + 1
-    else:
-        pn = (nfft + 1 )/ 2
-
-    fgrid = np.linspace(0, fs * 0.5, pn)
-    return pxx[:pn] / (n * fs), fgrid
-
-def arspec(x, order, nfft=None, fs=1):
-    """Compute the spectral density using an AR model.
-
-    An AR model of the signal is estimated through the Yule-Walker equations;
-    the estimated AR coefficient are then used to compute the spectrum, which
-    can be computed explicitely for AR models.
-
-    Parameters
-    ----------
-    x : array-like
-        input signal
-    order : int
-        Order of the LPC computation.
-    nfft : int
-        size of the fft to compute the periodogram. If None (default), the
-        length of the signal is used. if nfft > n, the signal is 0 padded.
-    fs : float
-        Sampling rate. By default, is 1 (normalized frequency. e.g. 0.5 is the
-        Nyquist limit).
-
-    Returns
-    -------
-    pxx : array-like
-        The psd estimate.
-    fgrid : array-like
-        Frequency grid over which the periodogram was estimated.
-    """
-
-    x = np.atleast_1d(x)
-    n = x.size
-
-    if x.ndim > 1:
-        raise ValueError("Only rank 1 input supported for now.")
-    if not np.isrealobj(x):
-        raise ValueError("Only real input supported for now.")
-    if not nfft:
-        nfft = n
-    a, e, k = lpc(x, order)
-
-    # This is not enough to deal correctly with even/odd size
-    if nfft % 2 == 0:
-        pn = nfft / 2 + 1
-    else:
-        pn = (nfft + 1 )/ 2
-
-    px = 1 / np.fft.fft(a, nfft)[:pn]
-    pxx = np.real(np.conj(px) * px)
-    pxx /= fs / e
-    fx = np.linspace(0, fs * 0.5, pxx.size)
-    return pxx, fx
-
-def taper(n, p=0.1):
-    """Return a split cosine bell taper (or window)
-
-    Parameters
-    ----------
-        n: int
-            number of samples of the taper
-        p: float
-            proportion of taper (0 <= p <= 1.)
-
-    Note
-    ----
-    p represents the proportion of tapered (or "smoothed") data compared to a
-    boxcar.
-    """
-    if p > 1. or p < 0:
-        raise ValueError("taper proportion should be betwen 0 and 1 (was %f)"
-                         % p)
-    w = np.ones(n)
-    ntp = np.floor(0.5 * n * p)
-    w[:ntp] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0, 0.5, ntp)))
-    w[-ntp:] = 0.5 * (1 - np.cos(np.pi * 2 * np.linspace(0.5, 0, ntp)))
-
-    return w
-
-def atal(x, order, num_coefs):
-    x = np.atleast_1d(x)
-    n = x.size
-    if x.ndim > 1:
-        raise ValueError("Only rank 1 input supported for now.")
-    if not np.isrealobj(x):
-        raise ValueError("Only real input supported for now.")
-    a, e, kk = lpc(x, order)
-    c = np.zeros(num_coefs)
-    c[0] = a[0]
-    for m in range(1, order+1):
-        c[m] = - a[m]
-        for k in range(1, m):
-            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
-    for m in range(order+1, num_coefs):
-        for k in range(1, order+1):
-            c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
-    return c
-
-def preemp(input, p):
-    """Pre-emphasis filter."""
-    return lfilter([1., -p], 1, input)
-
-def arspecs(input_wav,order,Atal=False):
-    epsilon = 0.0000000001
-    data = input_wav
-    if Atal:
-        ar = atal(data, order, 30)
-        return ar
-    else:
-        ar = []
-        ars = arspec(data, order, 4096)
-        for k, l in zip(ars[0], ars[1]):
-            ar.append(math.log(math.sqrt((k**2)+(l**2))))
-        for val in range(0,len(ar)):
-            if ar[val] == 0.0:
-                ar[val] = deepcopy(epsilon)
-        mspec1 = np.log10(ar)
-        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
-        ar = dct(mspec1, type=2, norm='ortho', axis=-1)
-        return ar[:30]
-
-def specPS(input_wav,pitch):
-        N = len(input_wav)
-        samps = N/pitch
-        if samps == 0:
-            samps = 1
-        frames = N/samps
-        data = input_wav[0:frames]
-        specs = periodogram(data,nfft=4096)
-        for i in range(1,int(samps)):
-            data = input_wav[frames*i:frames*(i+1)]
-            peri = periodogram(data,nfft=4096)
-            for sp in range(len(peri[0])):
-                specs[0][sp] += peri[0][sp]
-        for s in range(len(specs[0])):
-            specs[0][s] /= float(samps)
-        peri = []
-        for k, l in zip(specs[0], specs[1]):
-            if k == 0 and l == 0:
-                peri.append(epsilon)
-            else:
-                peri.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
-        # Filter the spectrum through the triangle filterbank
-        mspec = np.log10(peri)
-        # Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
-        ceps = dct(mspec, type=2, norm='ortho', axis=-1)
-        return ceps[:50]
-def build_single_feature_row(data,Atal):
-	lpcs = [8,9,10,11,12,13,14,15,16,17]
-        arr = []
-	periodo = specPS(data,50)
-	arr.extend(periodo)
-	for j in lpcs:
-	    if Atal:
-		ars = arspecs(data, j, Atal=True)
-	    else:
-	       ars = arspecs(data, j)
-	    arr.extend(ars)
-	for i in range(len(arr)):
-		if np.isnan(np.float(arr[i])):
-			arr[i] = 0.0
-	return arr
-
-def Create_features(input_wav,begin=None,end=None,Atal=False):
-    X = build_data(input_wav,begin,end)
-    full_path = os.path.realpath(__file__)
-    output_directory = os.path.dirname(full_path)+'/Features/'
-    if not os.path.exists(output_directory):
-        os.makedirs(output_directory)
-    if Atal:
-        feature_file = output_directory+"ATAL_features_"+input_wav.replace('.wav','.txt')
-    else:
-        feature_file = output_directory+"features_"+input_wav.replace('.wav','.txt')
-    if begin is not None and end is not None:
-	arr = [input_wav.replace('.wav','')]
-	arr.extend(build_single_feature_row(X,Atal))
-	np.savetxt(feature_file,np.asarray([arr]),delimiter=",",fmt="%s")
-	return arr
-    arcep_mat = []
-    for i in range(len(X)):
-	arr = [input_wav.replace('.wav','_PART_')+str(i)]
-	arr.extend(build_single_feature_row(X[i], Atal))
-	arcep_mat.append(arr)
-    np.savetxt(feature_file,np.asarray(arcep_mat),delimiter=",",fmt="%s")
-    return arcep_mat
-
-