Mercurial > hg > cm

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cortical_model.py	Sat Jan 25 20:00:38 2014 +0000
@@ -0,0 +1,343 @@
+"""
+A module used for auditory analysis.
+
+Models currently implemented:
+    * Frequency-modulation analysis model based on the human auditory system.
+
+Model implementations in progress:
+    * Glasberg and Moore model
+
+Packaged dependencies:
+    * utils.py and/or utils.pyc
+    * erb.dat
+    * outMidFir.dat
+
+External dependencies:
+    * scipy
+    * numpy
+    * copy
+    * matplotlib
+"""
+
+import utils
+import scipy.signal as sp
+import numpy as np
+from copy import deepcopy
+import matplotlib.pyplot as plt
+
+def get_partial_loudness():
+    """
+    A function to calculate the partial loudness of an excitation pattern at given ERB.
+
+    TO DO
+    """
+
+
+    return
+
+def get_excitation_i(input, fs, SPL, rectify=False):
+    """
+    A function to calculate the excitation intensity of the input signal.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required)
+        * fs (type: numerical) - sample frequency of the signal, input. (Required)
+        * SPL (type: double) - the sound pressure level (SPL) at 0 dBFS, e.g., the SPL of a sine
+            wave with peaks at amplitude 1 and troughs at amplitude -1. (Required)
+        * rectify (type: boolean) - Specifies whether to include half wave rectification, modelling the direction
+            of that the cochlear nerves vibrate.
+            True to include, False to ignore. (Optional; Default = False)
+
+    Returns:
+        * gtfs (type: numpy array of floats) - array with size ((39,) + np.shape(input)) containing the excitation
+            pattern (in sound intensity) for each ERB of input signal. The excitation pattern for the nth ERB can
+            be accessed with gtfs[n].
+    """
+
+    input = np.array(input)
+    inputOMFir = outMidFir(input)
+    inputPa = v_Pascal(inputOMFir, SPL)
+    gtfs = gamma_tone_filter(inputPa, fs)
+    if (rectify):
+        gtfs = half_rectification(gtfs)
+    gtfs = pa_i(gtfs)
+    gtfs = at_normalise(gtfs)
+
+    return gtfs
+
+def plot_excitation_tf(fs = 44100, outMidFilt = True, xscale = 'log', yscale = 'log'):
+    """
+    A function that plots the transfer function of the outer middle ear and each gammatone filter.
+
+    Parameters:
+        * fs (type: numerical) - the sampling frequency of the signal. (Optional; Default = 44100)
+        * outMidFilt (type: boolean) - filter the signal by the outer and middle ear FIR filter. (Optional; Default = True)
+        * xscale (type: string) - the scale of the frequency axis. Values are 'log' or 'linear'. (Optional; Default = 'log')
+        * yscale (type: string) - the scale of the amplitude axis. Values are 'log' or 'linear'. (Optional; Default = 'log')
+
+    Returns:
+        * y (type: numpy array of floats) - array with size ((39,np.ceil(fs))) containing the impulse response of the
+            signal at each gammatone filter (and optionally, including the outer middle ear response). The response at the
+            nth gammatone filter can be accessed by y[n].
+    """
+
+    input = np.zeros(np.ceil(fs))
+    input[0] = 1
+    if(outMidFilt): input = outMidFir(input)
+    y = gamma_tone_filter(input, fs)
+
+    for i in range(np.shape(y)[0]):
+        utils.plot_fft(y[i],xscale, yscale, False)
+
+    plt.show()
+
+    return y
+
+def get_modulation_i(input, fs):
+    """
+    A function to calculate the modulation intensity of the input intensity signal. The function implements a
+    filter bank of bandpass filters with cut off frequencies ranging from 0.25 to 16 Hz.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - the input intensity signal.
+            E.g., use get_excitation_i() to obtain excitation intensity and use as input.
+        * fs (type: numerical) - sampling frequency of input signal
+
+    Returns:
+        * y (type: numpy array of floats) - array with size ((10,) + np.shape(input)) containing the
+        modulation intensity of the signal at each modulation filter. The modulation intensity for the nth filter can
+        be accessed with y[n].
+    """
+
+
+    input = np.array(input)
+    b = fir_antialias(fs)
+    input_lp = sp.lfilter(b,(1),input_fr)
+    input_ds = downsample(input_lp, fs)
+    fc = np.array(utils.exp_sequence(-2,4,10))
+    bw = fc/2
+    y = decomposition(input_ds, fs, fc, bw)
+
+    return y
+
+def outMidFir(input):
+    """
+    A function to filter the input signal with the response of the outer and middle ear.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required)
+
+    Returns:
+        * y (type: numpy array of floats) - array with dimensions equal to the input signal filtered by the response of
+            the outer and middle ear.
+    """
+
+    input = np.array(input)
+    b = utils.load_outMidFir_coeff()
+    y = sp.lfilter(b, (1), input)
+
+    return y
+
+def gamma_tone_filter(input, fs):
+    """
+    A function to filter to decompose the input signal into 39 different gammatones filtered signals modelling the ERBs.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required)
+        * fs (type: numerical) - sample frequency of the signal, input. (Required)
+
+    Returns:
+        * y (type: numpy array of floats) - array with size ((39),np.shape(input)) containing the impulse response of the
+            signal at each gammatone filter. The response at the nth gammatone filter can be accessed by y[n].
+    """
+
+    input = np.array(input)
+    fc, bw = utils.load_erb_data()
+    y = decomposition(input,fs,fc,bw)
+
+    return y
+
+def v_Pascal(input, SPL):
+    """
+    A function to convert a signal, normalised to an amplitude range of -1 to 1, to a signal represented in pressure (units: Pascal).
+
+    Parameters:
+        * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required)
+        * SPL (type: double) - the sound pressure level (SPL) at 0 dBFS, e.g., the SPL of a sine
+            wave with peaks at amplitude 1 and troughs at amplitude -1. (Required)
+
+    Returns:
+        * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input represented
+            as a pressure signal.
+    """
+
+    input = np.array(input)
+    y = np.sign(input)*(0.00002*10**(np.log10(np.abs(input))+dBFS/20))
+
+    return y
+
+def pa_i(input, C=406):
+    """
+    A function to convert a pressure signal (unit: Pascal) to an intensity signal.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - pressure signal (unit: Pascal) (Required)
+        * C (type: double) - the acoustic impedance of the air (Optional; Default = 406)
+
+    Returns:
+        * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input represented
+        as a pressure signal.
+    """
+
+    input = np.array(input)
+    y = (input**2) / C
+
+    return y
+
+def at_normalise(input):
+    """
+    A function to normalise an intensity signal with the audibility threshold.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - intensity signal (unit: Pascal) (Required)
+
+    Returns:
+        * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input normalised
+            with the audibility threshold.
+    """
+
+    input = np.array(input)
+    y = input / 1*(10**12)
+
+    return
+
+def downsample(input, factor=100):
+    """
+    A function to downsample a signal, input, with sampling frequency, fs, by a downsample factor of factor.
+
+    NOTE: It is advised to use the fir_antialias() function before downsampling to remove any high frequencies
+        which would otherwise represented as low frequencies due to aliasing.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - input signal. (Required)
+        * factor - downsample factor (Optional; Default = 100)
+
+    Returns:
+        * output (type: numpy array of floats) - array with outer dimensions equivalent to the to the input, and
+            inner dimension equal to np.floor(lenIn / factor) where lenIn is the length of the inner dimension.
+    """
+
+    input = np.array(input)
+    shapeIn = np.shape(input)
+    nDim = np.shape(shapeIn)
+    lenIn = shapeIn[nDim[0]-1]
+    lenOut = np.floor(lenIn / factor)
+    n = np.linspace(0,lenIn,lenOut, endpoint=False).astype(np.int)
+    output = input[...,n]
+
+    return output
+
+def antialias_fir(fs, fc=100, order=64):
+    """
+    A function which returns the b coefficients for a lowpass fir filter with specified requirements.
+    Made specifically to remove aliasing when downsampling, but can be used for any application that
+    requires a lowpass filter.
+
+    Parameters:
+        * fs (type: numerical) - sampling frequency of the signal to be filtered (Required)
+        * fc (type: numerical) - unnormalised cut off frequency of the filter (Optional; Default = 100)
+        * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 64)
+
+    Returns:
+        * b (type: numpy array of floats) - an array of size order + 1 (i.e. a coefficient for each tap)
+    """
+
+    nyquist = 0.5*fs
+    fcNorm = fc/nyquist
+    b = sp.firwin(order+1, fcNorm)
+
+    return b
+
+def half_rectification(input):
+    """
+    A function which performs a half-wave rectification on the input signal.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - input signal. (Required)
+
+    Returns:
+        * y (type: numpy array of floats) - an array with dimensions of input containing the half-wave rectification of
+            input.
+    """
+
+
+    y = np.array(input)
+    y[y<0] = 0
+
+    return y
+
+def decomposition(input, fs, fc, bw, order=1024, verbose = False):
+    """
+    A function to run the input filter through a bandpass filter bank of length equal to the length of fc. Each
+    bandpass filter is designed by defining the centre frequencies, fc, and bandwidths, bw.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - input signal. (Required)
+        * fs (type: numerical) - the sampling frequency of the input signal. (Required)
+        * fc (type: array-like vector of floats) - the centre off frequencies (unnormalised) of each bandpass filter.
+            The length of this vector determines the number of filters in the bank.
+        * bw (type: array-like vector of floats) - the bandwidths (unnormalised) of each bandpass filter. Must be equal
+            to or more than the length of fc. If the length is more, all elements exceeding the length of fc - 1 will be
+            ignored.
+        * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 1024)
+        * verbose (type: boolean) - determines whether to display the current subroutine/progress of the procedure.
+            (Optional; Default = False)
+
+    Returns:
+    * y (type: numpy array of floats) - an array with inner dimensions equal to that of the input and outer dimension equal to
+        the length of fc (i.e. the number of bandpass filters in the bank) containing the outputs to each filter. The output
+        signal of the nth filter can be accessed using y[n].
+    """
+
+
+    input = np.array(input)
+    nFreqs = len(fc)
+    shape = (nFreqs,) + np.shape(input)
+    shape = shape[0:]
+    y = np.zeros(shape)
+
+    if(verbose): print "Running frequency decomposition."
+    for i in range(nFreqs):
+        if(verbose): print str(100.0*i/nFreqs) + "% complete."
+        low = fc[i]-bw[i]/2;
+        high = fc[i]+bw[i]/2;
+        if(verbose): print "Low: " + str(low) + "; High: " + str(high)
+        b = fir_bandpass(low, high, fs, order, verbose)
+        x = deepcopy(input)
+        y[i] = sp.lfilter(b,(1),x)
+
+    return y
+
+def fir_bandpass(low, high, fs, order = 1024, verbose = False):
+    """
+    A function which returns the b coefficients for a bandpass fir filter with specified requirements.
+
+    Parameters:
+        * low - the lower cutoff frequency of the filter. (Required)
+        * high - the upper cutoff frequency of the filter. (Required)
+        * fs (type: numerical) - sampling frequency of the signal to be filtered. (Required)
+        * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 1024)
+        * verbose (type: boolean) - determines whether to display the current progress (or info on the current subroutine)
+        of the procedure. (Optional; Default = False)
+
+    Returns:
+        * b (type: numpy array of floats) - an array of size order + 1 (i.e. a coefficient for each tap).
+    """
+
+    nyquist = 0.5*fs
+    lowNorm = low/nyquist
+    highNorm = high/nyquist
+    if(verbose): print "Low: " + str(lowNorm) + "; High: " + str(highNorm)
+    b = sp.firwin(order+1, [lowNorm, highNorm], pass_zero=False)
+
+    return b
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run.py	Sat Jan 25 20:00:38 2014 +0000
@@ -0,0 +1,8 @@
+import utils
+import cortical_model as cm
+import scipy.io.wavfile as wave
+import os
+import numpy as np
+
+fs, wav = utils.load_wav_data("impulse.wav")
+y = cm.gamma_tone_filter(wav, fs)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Sat Jan 25 20:00:38 2014 +0000
@@ -0,0 +1,216 @@
+"""
+A utility module to assist cortical_model.py with general procedures, e.g., file reading and writing.
+
+Packaged dependencies:
+    * erb.dat
+    * outMidFir.dat
+
+External dependencies:
+    * scipy
+    * numpy
+    * matplotlib
+"""
+
+import numpy as np
+import scipy.io.wavfile as wave
+import matplotlib.pyplot as plt
+import scipy.fftpack as fft
+
+def load_erb_data():
+	"""
+	Loads and returns 39 ERB frequencies and bandwidths
+
+    Parameters:
+        NONE
+
+    Returns:
+        * fc (type: numpy array of floats) - a vector of length 39 with elements containing
+            the centre frequencies of each ERB.
+        * bw (type: numpy array of floats) - a vector of length 39 with elements containing
+            the bandwidths of each ERB.
+
+    Dependencies:
+        * erb.dat (place in the same folder as utils.pyc)
+	"""
+	# read our data from a text file
+	data=np.loadtxt("erb.dat",delimiter=",")
+
+	# load centre frequencies from the first column into fc
+	fc=np.array(data[:,0])
+	# load bandwidths from the second column into bw
+	bw=np.array(data[:,1])
+
+	return fc,bw
+
+def load_outMidFir_coeff():
+    """
+    Loads and returns 39 ERB frequencies and bandwidths
+
+    Parameters:
+        NONE
+
+    Returns:
+        * b (type: numpy array of floats) - a vector of length 4097 containing the impulse
+            response of the outer middle ear.
+
+    Dependencies:
+        * outMidFir.dat (place in the same folder as utils.pyc)
+    """
+
+    b=np.loadtxt("outMidFir.dat", delimiter=",")
+
+    return b
+
+def exp_sequence(start, stop, n, base=2):
+    """
+    Creates a linear sequence with n points starting from start and ending at stop. For each
+    element in the sequence, e, the output sequence is 2^e, generating an exponential sequence
+    with base 2.
+
+    Parameters:
+        * start (type: numerical int) - determines the first element of the sequence, i.e.,
+            base^start. (Required)
+        * stop (type: numerical int) - determines the last element of the sequence, i.e.,
+            base^stop. (Required)
+        * n (type = numerical int) - determines the number of elements in the sequence. (Required)
+        * base (type: numerical) - determines the exponential base. (Optional; Default = 2)
+
+    Returns:
+        * seq - the exponential sequence
+    """
+
+    seq = [base**x for x in np.linspace(start,stop,n)]
+
+    return seq
+
+def wavread(file):
+    """
+    Reads the audio data from a wav file and converts it to floating point ranging from -1 to 1.
+
+    Parameters:
+        * file (type: string) - the name of the wav file to read from. (Required)
+
+    Returns:
+        * fs (type: numerical) - the sampling frequency of the signal storied in file
+        * data (type: numpy array of floats) - the data of the signal stored in file normalised
+            to an amplitude range of -1 to 1.
+    """
+
+    fs,data = wave.read(file)
+    data = np.array(int16_to_nfloat(data))
+
+    return fs,data
+
+def wavwrite(file, fs, data, precision = 16):
+    """
+    Unnormalises the audio data to a specified integer precision and converts to an int, then
+    writes the audio data to a wav file. (E.g., if 16-bit precision, then highest amplitude
+    is equal to 2^16).
+
+    Parameters:
+        * file (type: string) - the name of the wav file to write to. (Required)
+        * fs (type: numerical) - the sampling frequency of the signal. (Required)
+        * data (type: array-like matrix of floats) - the signal data normalised from -1 to 1. The signal will be clipped if not
+        in this range. (Required)
+        * precision (type: numerical int)- the bit precision to store at. Can only be 16 or 32 bit, because that is
+        all scipy allows.
+
+    Returns:
+        NONE
+
+    TODO explore WAVE package to be allow for 24bit precision.
+    """
+
+    data[data>1] = 1
+    data[data<-1] = -1
+
+    if(precision == 16):
+        dtype = np.int16
+    elif(precision == 32):
+        dtype = np.int32
+    else:
+        print "Error: precision can only be 16 or 32 bit due to scipy package."
+        return
+
+    data = nfloat_to_int(data)
+    wave.write(file, fs, data)
+
+    return
+
+def plot_fft(x, xscale = 'log', yscale = 'log', show = True):
+    """
+    Plots the fft of signal x. If the figure is not shown, the current plot is held to allow other plots to
+    be added to the same figure.
+
+    Parameters:
+        * x (type: array-like matrix of floats) - the signal to be analysed. (Required)
+        * xscale (type: string) - the scale of the frequency axis. Values are 'log' or 'linear'. (Optional; Default = 'log')
+        * yscale (type: string) - the scale of the amplitude axis. Values are 'log' or 'linear'. (Optional; Default = 'log')
+        * show (type: boolean) - specifies whether the figure should be shown. If False, the current plot will be held so
+        other plots can be added to the figure. (Optional; Default = True)
+
+    Returns:
+        NONE
+    """
+
+    if(xscale.lower() == "log"): plt.gca().set_xscale('log')
+    if(xscale.lower() == "log"): plt.gca().set_yscale('log')
+
+    fftx = np.absolute(fft.fft(x))
+    plt.plot(range(np.shape(x)[0]),fftx)
+    if(show):
+        plt.show()
+    else:
+        plt.hold(True)
+
+    return
+
+def int_to_nfloat(input, type=np.float32):
+    """
+    Convert integer with to floating point with a range from -1 to 1.
+
+    Parameters:
+        * input (type: array-like matrix of ints) - a signed integer array. (Required)
+        dtype : the output datatype. (Optional; Default = np.float32)
+
+    Returns:
+        * y (type: array-like matrix of floats) - a float array normalised to a
+            range of -1 to 1.
+    """
+
+    input = np.array(input)
+    assert input.dtype.kind == 'i', "'input' must be an array-like matrix of integers."
+    type = np.dtype(type);
+    inputdtype = np.dtype(type(input[0]))
+    input = input.astype(type)
+
+    input[input > 0] = input[input > 0] / np.iinfo(inputdtype).max
+    input[input < 0] = input[input < 0] / -np.iinfo(inputdtype).min
+
+    y = input
+    return y
+
+def nfloat_to_int(input, type=np.int16):
+    """
+    Convert a float array with amplitude ranging from -1 to 1 to a unnormalised signed
+    integer array with specified precision.
+
+    Parameters:
+        * input (type: array-like matrix of floats) - a float array. (Required)
+        dtype : the output datatype (also determines the precision).
+        (Optional; Default = np.int16)
+
+    Returns:
+        * y (type: array-like matrix of ints) - an unnormalised to a
+        range of -1 to 1.
+    """
+
+    input = np.array(input)
+    assert input.dtype.kind == 'f', "'input' must be an array of floats!"
+
+    input[input > 0] = input[ input > 0 ] * np.iinfo(np.int16).max
+    input[input < 0] = input[ input < 0 ] * -np.iinfo(np.int16).min
+
+    y = input.astype(type)
+
+    return y
\ No newline at end of file