Mercurial > hg > cm
changeset 0:5609fd93e935
First commit.
author | Carl Bussey <c.bussey@se10.qmul.ac.uk> |
---|---|
date | Sat, 25 Jan 2014 20:00:38 +0000 |
parents | |
children | dc43033a2c20 |
files | cortical_model.py run.py utils.py |
diffstat | 3 files changed, 567 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cortical_model.py Sat Jan 25 20:00:38 2014 +0000 @@ -0,0 +1,343 @@ +""" +A module used for auditory analysis. + +Models currently implemented: + * Frequency-modulation analysis model based on the human auditory system. + +Model implementations in progress: + * Glasberg and Moore model + +Packaged dependencies: + * utils.py and/or utils.pyc + * erb.dat + * outMidFir.dat + +External dependencies: + * scipy + * numpy + * copy + * matplotlib +""" + +import utils +import scipy.signal as sp +import numpy as np +from copy import deepcopy +import matplotlib.pyplot as plt + +def get_partial_loudness(): + """ + A function to calculate the partial loudness of an excitation pattern at given ERB. + + TO DO + """ + + + return + +def get_excitation_i(input, fs, SPL, rectify=False): + """ + A function to calculate the excitation intensity of the input signal. + + Parameters: + * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required) + * fs (type: numerical) - sample frequency of the signal, input. (Required) + * SPL (type: double) - the sound pressure level (SPL) at 0 dBFS, e.g., the SPL of a sine + wave with peaks at amplitude 1 and troughs at amplitude -1. (Required) + * rectify (type: boolean) - Specifies whether to include half wave rectification, modelling the direction + of that the cochlear nerves vibrate. + True to include, False to ignore. (Optional; Default = False) + + Returns: + * gtfs (type: numpy array of floats) - array with size ((39,) + np.shape(input)) containing the excitation + pattern (in sound intensity) for each ERB of input signal. The excitation pattern for the nth ERB can + be accessed with gtfs[n]. + """ + + input = np.array(input) + inputOMFir = outMidFir(input) + inputPa = v_Pascal(inputOMFir, SPL) + gtfs = gamma_tone_filter(inputPa, fs) + if (rectify): + gtfs = half_rectification(gtfs) + gtfs = pa_i(gtfs) + gtfs = at_normalise(gtfs) + + return gtfs + +def plot_excitation_tf(fs = 44100, outMidFilt = True, xscale = 'log', yscale = 'log'): + """ + A function that plots the transfer function of the outer middle ear and each gammatone filter. + + Parameters: + * fs (type: numerical) - the sampling frequency of the signal. (Optional; Default = 44100) + * outMidFilt (type: boolean) - filter the signal by the outer and middle ear FIR filter. (Optional; Default = True) + * xscale (type: string) - the scale of the frequency axis. Values are 'log' or 'linear'. (Optional; Default = 'log') + * yscale (type: string) - the scale of the amplitude axis. Values are 'log' or 'linear'. (Optional; Default = 'log') + + Returns: + * y (type: numpy array of floats) - array with size ((39,np.ceil(fs))) containing the impulse response of the + signal at each gammatone filter (and optionally, including the outer middle ear response). The response at the + nth gammatone filter can be accessed by y[n]. + """ + + input = np.zeros(np.ceil(fs)) + input[0] = 1 + if(outMidFilt): input = outMidFir(input) + y = gamma_tone_filter(input, fs) + + for i in range(np.shape(y)[0]): + utils.plot_fft(y[i],xscale, yscale, False) + + plt.show() + + return y + +def get_modulation_i(input, fs): + """ + A function to calculate the modulation intensity of the input intensity signal. The function implements a + filter bank of bandpass filters with cut off frequencies ranging from 0.25 to 16 Hz. + + Parameters: + * input (type: array-like matrix of floats) - the input intensity signal. + E.g., use get_excitation_i() to obtain excitation intensity and use as input. + * fs (type: numerical) - sampling frequency of input signal + + Returns: + * y (type: numpy array of floats) - array with size ((10,) + np.shape(input)) containing the + modulation intensity of the signal at each modulation filter. The modulation intensity for the nth filter can + be accessed with y[n]. + """ + + + input = np.array(input) + b = fir_antialias(fs) + input_lp = sp.lfilter(b,(1),input_fr) + input_ds = downsample(input_lp, fs) + fc = np.array(utils.exp_sequence(-2,4,10)) + bw = fc/2 + y = decomposition(input_ds, fs, fc, bw) + + return y + +def outMidFir(input): + """ + A function to filter the input signal with the response of the outer and middle ear. + + Parameters: + * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required) + + Returns: + * y (type: numpy array of floats) - array with dimensions equal to the input signal filtered by the response of + the outer and middle ear. + """ + + input = np.array(input) + b = utils.load_outMidFir_coeff() + y = sp.lfilter(b, (1), input) + + return y + +def gamma_tone_filter(input, fs): + """ + A function to filter to decompose the input signal into 39 different gammatones filtered signals modelling the ERBs. + + Parameters: + * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required) + * fs (type: numerical) - sample frequency of the signal, input. (Required) + + Returns: + * y (type: numpy array of floats) - array with size ((39),np.shape(input)) containing the impulse response of the + signal at each gammatone filter. The response at the nth gammatone filter can be accessed by y[n]. + """ + + input = np.array(input) + fc, bw = utils.load_erb_data() + y = decomposition(input,fs,fc,bw) + + return y + +def v_Pascal(input, SPL): + """ + A function to convert a signal, normalised to an amplitude range of -1 to 1, to a signal represented in pressure (units: Pascal). + + Parameters: + * input (type: array-like matrix of floats) - signal normalised to an amplitude range of -1 to 1. (Required) + * SPL (type: double) - the sound pressure level (SPL) at 0 dBFS, e.g., the SPL of a sine + wave with peaks at amplitude 1 and troughs at amplitude -1. (Required) + + Returns: + * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input represented + as a pressure signal. + """ + + input = np.array(input) + y = np.sign(input)*(0.00002*10**(np.log10(np.abs(input))+dBFS/20)) + + return y + +def pa_i(input, C=406): + """ + A function to convert a pressure signal (unit: Pascal) to an intensity signal. + + Parameters: + * input (type: array-like matrix of floats) - pressure signal (unit: Pascal) (Required) + * C (type: double) - the acoustic impedance of the air (Optional; Default = 406) + + Returns: + * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input represented + as a pressure signal. + """ + + input = np.array(input) + y = (input**2) / C + + return y + +def at_normalise(input): + """ + A function to normalise an intensity signal with the audibility threshold. + + Parameters: + * input (type: array-like matrix of floats) - intensity signal (unit: Pascal) (Required) + + Returns: + * y (type: numpy array of floats) - array with dimensions equal to the input signal containing the input normalised + with the audibility threshold. + """ + + input = np.array(input) + y = input / 1*(10**12) + + return + +def downsample(input, factor=100): + """ + A function to downsample a signal, input, with sampling frequency, fs, by a downsample factor of factor. + + NOTE: It is advised to use the fir_antialias() function before downsampling to remove any high frequencies + which would otherwise represented as low frequencies due to aliasing. + + Parameters: + * input (type: array-like matrix of floats) - input signal. (Required) + * factor - downsample factor (Optional; Default = 100) + + Returns: + * output (type: numpy array of floats) - array with outer dimensions equivalent to the to the input, and + inner dimension equal to np.floor(lenIn / factor) where lenIn is the length of the inner dimension. + """ + + input = np.array(input) + shapeIn = np.shape(input) + nDim = np.shape(shapeIn) + lenIn = shapeIn[nDim[0]-1] + lenOut = np.floor(lenIn / factor) + n = np.linspace(0,lenIn,lenOut, endpoint=False).astype(np.int) + output = input[...,n] + + return output + +def antialias_fir(fs, fc=100, order=64): + """ + A function which returns the b coefficients for a lowpass fir filter with specified requirements. + Made specifically to remove aliasing when downsampling, but can be used for any application that + requires a lowpass filter. + + Parameters: + * fs (type: numerical) - sampling frequency of the signal to be filtered (Required) + * fc (type: numerical) - unnormalised cut off frequency of the filter (Optional; Default = 100) + * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 64) + + Returns: + * b (type: numpy array of floats) - an array of size order + 1 (i.e. a coefficient for each tap) + """ + + nyquist = 0.5*fs + fcNorm = fc/nyquist + b = sp.firwin(order+1, fcNorm) + + return b + +def half_rectification(input): + """ + A function which performs a half-wave rectification on the input signal. + + Parameters: + * input (type: array-like matrix of floats) - input signal. (Required) + + Returns: + * y (type: numpy array of floats) - an array with dimensions of input containing the half-wave rectification of + input. + """ + + + y = np.array(input) + y[y<0] = 0 + + return y + +def decomposition(input, fs, fc, bw, order=1024, verbose = False): + """ + A function to run the input filter through a bandpass filter bank of length equal to the length of fc. Each + bandpass filter is designed by defining the centre frequencies, fc, and bandwidths, bw. + + Parameters: + * input (type: array-like matrix of floats) - input signal. (Required) + * fs (type: numerical) - the sampling frequency of the input signal. (Required) + * fc (type: array-like vector of floats) - the centre off frequencies (unnormalised) of each bandpass filter. + The length of this vector determines the number of filters in the bank. + * bw (type: array-like vector of floats) - the bandwidths (unnormalised) of each bandpass filter. Must be equal + to or more than the length of fc. If the length is more, all elements exceeding the length of fc - 1 will be + ignored. + * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 1024) + * verbose (type: boolean) - determines whether to display the current subroutine/progress of the procedure. + (Optional; Default = False) + + Returns: + * y (type: numpy array of floats) - an array with inner dimensions equal to that of the input and outer dimension equal to + the length of fc (i.e. the number of bandpass filters in the bank) containing the outputs to each filter. The output + signal of the nth filter can be accessed using y[n]. + """ + + + input = np.array(input) + nFreqs = len(fc) + shape = (nFreqs,) + np.shape(input) + shape = shape[0:] + y = np.zeros(shape) + + if(verbose): print "Running frequency decomposition." + for i in range(nFreqs): + if(verbose): print str(100.0*i/nFreqs) + "% complete." + low = fc[i]-bw[i]/2; + high = fc[i]+bw[i]/2; + if(verbose): print "Low: " + str(low) + "; High: " + str(high) + b = fir_bandpass(low, high, fs, order, verbose) + x = deepcopy(input) + y[i] = sp.lfilter(b,(1),x) + + return y + +def fir_bandpass(low, high, fs, order = 1024, verbose = False): + """ + A function which returns the b coefficients for a bandpass fir filter with specified requirements. + + Parameters: + * low - the lower cutoff frequency of the filter. (Required) + * high - the upper cutoff frequency of the filter. (Required) + * fs (type: numerical) - sampling frequency of the signal to be filtered. (Required) + * order (type: numerical int) - the order of the filter (number of taps minus 1) (Optional; Default = 1024) + * verbose (type: boolean) - determines whether to display the current progress (or info on the current subroutine) + of the procedure. (Optional; Default = False) + + Returns: + * b (type: numpy array of floats) - an array of size order + 1 (i.e. a coefficient for each tap). + """ + + nyquist = 0.5*fs + lowNorm = low/nyquist + highNorm = high/nyquist + if(verbose): print "Low: " + str(lowNorm) + "; High: " + str(highNorm) + b = sp.firwin(order+1, [lowNorm, highNorm], pass_zero=False) + + return b \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run.py Sat Jan 25 20:00:38 2014 +0000 @@ -0,0 +1,8 @@ +import utils +import cortical_model as cm +import scipy.io.wavfile as wave +import os +import numpy as np + +fs, wav = utils.load_wav_data("impulse.wav") +y = cm.gamma_tone_filter(wav, fs) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.py Sat Jan 25 20:00:38 2014 +0000 @@ -0,0 +1,216 @@ +""" +A utility module to assist cortical_model.py with general procedures, e.g., file reading and writing. + +Packaged dependencies: + * erb.dat + * outMidFir.dat + +External dependencies: + * scipy + * numpy + * matplotlib +""" + +import numpy as np +import scipy.io.wavfile as wave +import matplotlib.pyplot as plt +import scipy.fftpack as fft + +def load_erb_data(): + """ + Loads and returns 39 ERB frequencies and bandwidths + + Parameters: + NONE + + Returns: + * fc (type: numpy array of floats) - a vector of length 39 with elements containing + the centre frequencies of each ERB. + * bw (type: numpy array of floats) - a vector of length 39 with elements containing + the bandwidths of each ERB. + + Dependencies: + * erb.dat (place in the same folder as utils.pyc) + """ + # read our data from a text file + data=np.loadtxt("erb.dat",delimiter=",") + + # load centre frequencies from the first column into fc + fc=np.array(data[:,0]) + # load bandwidths from the second column into bw + bw=np.array(data[:,1]) + + return fc,bw + +def load_outMidFir_coeff(): + """ + Loads and returns 39 ERB frequencies and bandwidths + + Parameters: + NONE + + Returns: + * b (type: numpy array of floats) - a vector of length 4097 containing the impulse + response of the outer middle ear. + + Dependencies: + * outMidFir.dat (place in the same folder as utils.pyc) + """ + + b=np.loadtxt("outMidFir.dat", delimiter=",") + + return b + +def exp_sequence(start, stop, n, base=2): + """ + Creates a linear sequence with n points starting from start and ending at stop. For each + element in the sequence, e, the output sequence is 2^e, generating an exponential sequence + with base 2. + + Parameters: + * start (type: numerical int) - determines the first element of the sequence, i.e., + base^start. (Required) + * stop (type: numerical int) - determines the last element of the sequence, i.e., + base^stop. (Required) + * n (type = numerical int) - determines the number of elements in the sequence. (Required) + * base (type: numerical) - determines the exponential base. (Optional; Default = 2) + + Returns: + * seq - the exponential sequence + """ + + seq = [base**x for x in np.linspace(start,stop,n)] + + return seq + +def wavread(file): + """ + Reads the audio data from a wav file and converts it to floating point ranging from -1 to 1. + + Parameters: + * file (type: string) - the name of the wav file to read from. (Required) + + Returns: + * fs (type: numerical) - the sampling frequency of the signal storied in file + * data (type: numpy array of floats) - the data of the signal stored in file normalised + to an amplitude range of -1 to 1. + """ + + fs,data = wave.read(file) + data = np.array(int16_to_nfloat(data)) + + return fs,data + +def wavwrite(file, fs, data, precision = 16): + """ + Unnormalises the audio data to a specified integer precision and converts to an int, then + writes the audio data to a wav file. (E.g., if 16-bit precision, then highest amplitude + is equal to 2^16). + + Parameters: + * file (type: string) - the name of the wav file to write to. (Required) + * fs (type: numerical) - the sampling frequency of the signal. (Required) + * data (type: array-like matrix of floats) - the signal data normalised from -1 to 1. The signal will be clipped if not + in this range. (Required) + * precision (type: numerical int)- the bit precision to store at. Can only be 16 or 32 bit, because that is + all scipy allows. + + Returns: + NONE + + TODO explore WAVE package to be allow for 24bit precision. + """ + + data[data>1] = 1 + data[data<-1] = -1 + + if(precision == 16): + dtype = np.int16 + elif(precision == 32): + dtype = np.int32 + else: + print "Error: precision can only be 16 or 32 bit due to scipy package." + return + + data = nfloat_to_int(data) + wave.write(file, fs, data) + + return + +def plot_fft(x, xscale = 'log', yscale = 'log', show = True): + """ + Plots the fft of signal x. If the figure is not shown, the current plot is held to allow other plots to + be added to the same figure. + + Parameters: + * x (type: array-like matrix of floats) - the signal to be analysed. (Required) + * xscale (type: string) - the scale of the frequency axis. Values are 'log' or 'linear'. (Optional; Default = 'log') + * yscale (type: string) - the scale of the amplitude axis. Values are 'log' or 'linear'. (Optional; Default = 'log') + * show (type: boolean) - specifies whether the figure should be shown. If False, the current plot will be held so + other plots can be added to the figure. (Optional; Default = True) + + Returns: + NONE + """ + + if(xscale.lower() == "log"): plt.gca().set_xscale('log') + if(xscale.lower() == "log"): plt.gca().set_yscale('log') + + fftx = np.absolute(fft.fft(x)) + plt.plot(range(np.shape(x)[0]),fftx) + if(show): + plt.show() + else: + plt.hold(True) + + return + +def int_to_nfloat(input, type=np.float32): + """ + Convert integer with to floating point with a range from -1 to 1. + + Parameters: + * input (type: array-like matrix of ints) - a signed integer array. (Required) + dtype : the output datatype. (Optional; Default = np.float32) + + Returns: + * y (type: array-like matrix of floats) - a float array normalised to a + range of -1 to 1. + """ + + input = np.array(input) + assert input.dtype.kind == 'i', "'input' must be an array-like matrix of integers." + type = np.dtype(type); + inputdtype = np.dtype(type(input[0])) + input = input.astype(type) + + input[input > 0] = input[input > 0] / np.iinfo(inputdtype).max + input[input < 0] = input[input < 0] / -np.iinfo(inputdtype).min + + y = input + return y + +def nfloat_to_int(input, type=np.int16): + """ + Convert a float array with amplitude ranging from -1 to 1 to a unnormalised signed + integer array with specified precision. + + Parameters: + * input (type: array-like matrix of floats) - a float array. (Required) + dtype : the output datatype (also determines the precision). + (Optional; Default = np.int16) + + Returns: + * y (type: array-like matrix of ints) - an unnormalised to a + range of -1 to 1. + """ + + input = np.array(input) + assert input.dtype.kind == 'f', "'input' must be an array of floats!" + + input[input > 0] = input[ input > 0 ] * np.iinfo(np.int16).max + input[input < 0] = input[ input < 0 ] * -np.iinfo(np.int16).min + + y = input.astype(type) + + return y \ No newline at end of file