Maria@4: """Smoothie Maria@4: Maria@4: A module aimed at melody salience and melody features. It contains Maria@4: Maria@4: * a frequency transform inspired by constant-Q transforms Maria@4: * an NMF-based melodic salience function Maria@4: * methods that transform this salience into melody features (not implemented) Maria@4: """ Maria@4: Maria@4: import sys Maria@4: import numpy as np Maria@4: from scipy.signal import hann Maria@4: from scipy.sparse import csr_matrix Maria@4: import librosa Maria@4: Maria@4: p = {# smoothie q mapping parameters Maria@4: 'bpo' : 36, Maria@4: 'break1' : 500, Maria@4: 'break2' : 4000, Maria@4: 'f_max' : 8000, Maria@4: 'fs' : 16000, Maria@4: # spectrogram parameters Maria@4: 'step_size' : 160, Maria@4: 'use_A_wtg' : False, Maria@4: 'harm_remove' : False, Maria@4: # NMF and NMF dictionary parameters Maria@4: 's' : 0.85, # decay parameter Maria@4: 'n_harm' : 50, # I'm gonna do you no harm Maria@4: 'min_midi' : 25, Maria@4: 'max_midi' : 90.8, Maria@4: 'bps' : 5, Maria@4: 'n_iter' : 30 Maria@4: } Maria@4: Maria@4: def update_nmf_simple(H, W, V): Maria@4: """Update the gain matrix H using the multiplicative NMF update equation. Maria@4: Maria@4: Keyword arguments: Maria@4: H -- gain matrix Maria@4: W -- template matrix Maria@4: V -- target data matrix Maria@4: """ Maria@4: WHV = np.dot(W, H) ** (-1) * V Maria@4: H = H * np.dot(W.transpose(), WHV) Maria@4: return H Maria@4: Maria@4: def make_a_weight(f): Maria@4: """Return the values of A-weighting at the input frequencies f.""" Maria@4: f = np.array(f) Maria@4: zaehler = 12200 * 12200* (f**4) Maria@4: nenner = (f*f + 20.6*20.6) * np.sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)) * (f*f + 12200*12200) Maria@4: return zaehler / nenner Maria@4: Maria@4: def nextpow2(x): Maria@4: """Return the smallest integer n such that 2 ** n > x.""" Maria@4: return np.ceil(np.log2(x)) Maria@4: Maria@4: def get_smoothie_frequencies(p): Maria@4: """Calculate filter centres and widths for the smooth Q transform.""" Maria@4: # I think this calculation should be reconsidered in order to have Maria@4: # a more principled transition between linear and exponential Maria@4: n = 1.0 / (2.0**(1.0/p['bpo']) - 1) Maria@4: x = p['break1'] * 1.0 / n Maria@4: Maria@4: # first linear stretch Maria@4: # f = (np.arange(1, int(np.round(n)) + 1) * x).round().tolist() Maria@4: f = (np.arange(1, int(np.round(n)) + 1) * x).tolist() Maria@4: Maria@4: # exponential stretch Maria@4: while max(f) < p['break2']: Maria@4: f.append(max(f) * 2**(1.0/p['bpo'])) Maria@4: Maria@4: # final linear stretch Maria@4: lastdiff = f[-1] - f[-2] Maria@4: while max(f) < p['f_max']: Maria@4: f.append(max(f) + lastdiff) Maria@4: Maria@4: deltaf = np.diff(np.array(f)) Maria@4: f = f[:-1] Maria@4: Maria@4: return f, deltaf Maria@4: Maria@4: def create_smoothie_kernel(f, deltaf, fs): Maria@4: """Create a sparse matrix that maps the complex DFT to the complex Maria@4: smoothie representation. Maria@4: """ Maria@4: print >>sys.stdout, "[ SMOOTHIE Q kernel calculation ... ]" Maria@4: n_filter = len(f) Maria@4: n_fft = 2**nextpow2(np.ceil(fs/min(deltaf))) Maria@4: Maria@4: thresh = 0.0054 Maria@4: smoothie_kernel = np.zeros([n_fft, n_filter], np.complex64) Maria@4: Maria@4: for i_filter in range(n_filter-1, -1, -1): # descending Maria@4: # print i_filter Maria@4: Q = f[i_filter] * 1.0 / deltaf[i_filter] # local Q for this filter Maria@4: lgth = int(np.ceil(fs * 1.0 / deltaf[i_filter])) Maria@4: lgthinv = 1.0 / lgth Maria@4: offset = int(n_fft/2 - np.ceil(lgth * 0.5)) Maria@4: temp = hann(lgth) * lgthinv * \ Maria@4: np.exp(2j * np.pi * Q * (np.arange(lgth) - offset) * lgthinv) Maria@4: # print(sum(hann(lgth)), Q, lgth, offset) Maria@4: temp_kernel = np.zeros(n_fft, dtype = np.complex64) Maria@4: temp_kernel[np.arange(lgth) + offset] = temp Maria@4: spec_kernel = np.fft.fft(temp_kernel) Maria@4: spec_kernel[abs(spec_kernel) <= thresh] = 0 Maria@4: smoothie_kernel[:,i_filter] = spec_kernel Maria@4: return csr_matrix(smoothie_kernel.conj()/n_fft) Maria@4: Maria@4: def smoothie_q_spectrogram(x, p): Maria@4: """Calculate the actual spectrogram with smooth Q frequencies""" Maria@4: print >>sys.stdout, "[ SMOOTHIE Q spectrogram ... ]" Maria@4: Maria@4: # precalculate smoothie kernel Maria@4: f, deltaf = get_smoothie_frequencies(p) Maria@4: smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs']) Maria@4: n_fft, n_filter = smoothie_kernel.shape Maria@4: Maria@4: # some preparations Maria@4: n_sample = len(x) Maria@4: # n_frame = int(np.floor(n_sample / p['step_size'])) Maria@4: n_frame = int(np.ceil(n_sample / float(p['step_size']))) # added mp Maria@4: t = (np.arange(n_frame) * p['step_size']) * 1.0 / p['fs'] Maria@4: smoothie_kernelT = smoothie_kernel.T Maria@4: Maria@4: # allocate Maria@4: s = np.zeros((n_filter, n_frame), dtype = np.complex64) Maria@4: Maria@4: # pad (if wanted) Maria@4: x = np.concatenate((np.zeros(n_fft/2), x, np.zeros(n_fft/2))) Maria@4: Maria@4: for i_frame in range(n_frame): Maria@4: smpl = p['step_size'] * i_frame Maria@4: block = x[smpl + np.arange(n_fft)] Maria@4: s[:, i_frame] = smoothie_kernelT.dot(np.fft.fft(block)) Maria@4: Maria@4: if p['use_A_wtg']: Maria@4: a_wtg = make_a_weight(f) Maria@4: s = s * a_wtg[:, np.newaxis] Maria@4: Maria@4: return s, t Maria@4: Maria@4: def mel_triangles(input_f): Maria@4: """Make matrix with mel filters at the given frequencies. Maria@4: Warning: this is a very coarse mel filterbank. Maria@4: """ Maria@4: n_linearfilters = 3 Maria@4: n_logfilters0 = 30 # just something high, will be pruned later Maria@4: linear_spacing = 100 Maria@4: log_spacing = 6.0/4 Maria@4: n_filter0 = n_linearfilters + n_logfilters0 Maria@4: Maria@4: freqs = np.zeros(n_filter0+2) # includes one more on either side, hence +2 Maria@4: freqs[range(n_linearfilters+1)] = \ Maria@4: np.arange(-1,n_linearfilters) * linear_spacing Maria@4: freqs[range(n_linearfilters+1, n_filter0+2)] = \ Maria@4: freqs[n_linearfilters] * log_spacing ** np.arange(1, n_logfilters0+2) Maria@4: Maria@4: centre_freqs = freqs[1:-1] Maria@4: lower_freqs = freqs[0:-2] Maria@4: upper_freqs = freqs[2:] Maria@4: Maria@4: n_filter = list(np.nonzero(lower_freqs < max(input_f)))[0][-1] + 1 Maria@4: Maria@4: n_input_f = len(input_f) Maria@4: mtr = np.zeros((n_input_f, n_filter)) Maria@4: Maria@4: for i_filter in range(n_filter): Maria@4: for i_freq in range(n_input_f): Maria@4: if input_f[i_freq] > lower_freqs[i_filter] \ Maria@4: and input_f[i_freq] <= upper_freqs[i_filter]: Maria@4: Maria@4: if input_f[i_freq] <= centre_freqs[i_filter]: Maria@4: mtr[i_freq, i_filter] = \ Maria@4: (input_f[i_freq] - lower_freqs[i_filter]) * 1.0 / \ Maria@4: (centre_freqs[i_filter] - lower_freqs[i_filter]) Maria@4: else: Maria@4: mtr[i_freq, i_filter] = \ Maria@4: 1 - (input_f[i_freq] - centre_freqs[i_filter]) / \ Maria@4: (upper_freqs[i_filter] - centre_freqs[i_filter]) Maria@4: return mtr Maria@4: Maria@4: def create_smoothie_nfm_dict(smoothie_kernel, filterf, p): Maria@4: """Create dictionary matrix with note templates.""" Maria@4: n_note = int((p['max_midi'] - p['min_midi']) * p['bps'] + 1) Maria@4: n_fft, n_filter = smoothie_kernel.shape Maria@4: t = ((np.arange(n_fft) + 1) - ((n_fft + 1)*0.5))/p['fs'] Maria@4: Maria@4: mtr = mel_triangles(filterf) Maria@4: n_template = n_note + mtr.shape[1] Maria@4: Maria@4: w = np.zeros((n_filter, n_template), dtype = np.complex64) Maria@4: w[:,n_note:] = mtr Maria@4: f0s = [] Maria@4: Maria@4: smoothie_kernelT = smoothie_kernel.T Maria@4: Maria@4: for i_note in range(n_note): Maria@4: midi = p['min_midi'] + i_note * 1.0 / p['bps'] Maria@4: f0 = 440 * 2 ** ((midi-69)*1.0/12) Maria@4: f0s.append(f0) Maria@4: sig = np.zeros(len(t)) Maria@4: for i_harm in range(p['n_harm']): Maria@4: f = f0 * (i_harm + 1) Maria@4: if f > p['fs'] * 0.5: Maria@4: continue Maria@4: x = np.sin(2*np.pi*f*t) * p['s']**(i_harm) Maria@4: sig += x Maria@4: w[:, i_note] = smoothie_kernelT.dot(np.fft.fft(sig)) Maria@4: Maria@4: for i in range(mtr.shape[1]): Maria@4: f0s.append(np.nan) Maria@4: Maria@4: w = abs(w) Maria@4: col_sums = w.sum(axis = 0) Maria@4: w = w / col_sums[np.newaxis, :] # normalisation Maria@4: return w, np.array(f0s) Maria@4: Maria@4: def smoothie_salience(x, p, do_tune = False): Maria@4: """Calculate melodic salience.""" Maria@4: print >>sys.stdout, "[ SMOOTHIE Q salience ... ]" Maria@4: Maria@4: # precalculate nmf kernel Maria@4: f, deltaf = get_smoothie_frequencies(p) Maria@4: smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs']) Maria@4: w, f0s = create_smoothie_nfm_dict(smoothie_kernel, f, p) Maria@4: Maria@4: # calculate smoothiegram Maria@4: s, t = smoothie_q_spectrogram(x, p) Maria@4: s[s==0] = np.spacing(1) # very small number Maria@4: s = abs(s) Maria@4: Maria@4: # run NMF Maria@4: n_frame = len(t) Maria@4: print >>sys.stdout, "[ SMOOTHIE Q : NMF updates ... ]" Maria@4: sal = np.ones((w.shape[1], n_frame)) Maria@4: for i_iter in range(p['n_iter']): Maria@4: sal = update_nmf_simple(sal, w, s) Maria@4: Maria@4: if do_tune: Maria@4: error("Tuning isn't yet implemented in the Python version") Maria@4: Maria@4: return sal, t, f0s Maria@4: Maria@4: def qnormalise(x, q, dim): Maria@4: nrm = np.sum(x**q, axis=dim, keepdims=True)**(1./float(q)) Maria@4: nrmmatrix = np.repeat(nrm, x.shape[0], axis=0) Maria@4: x = x / nrmmatrix Maria@4: return x Maria@4: Maria@4: def wrap_to_octave(sal, param): Maria@4: epsilon = 0.00001 Maria@4: nsal = qnormalise(sal, 2, 0) Maria@4: Maria@4: step = 1./float(param['bps']) Maria@4: NMFpitch = np.arange(param['min_midi'],param['max_midi']+step,step) Maria@4: nNote = len(NMFpitch) Maria@4: nsal = nsal[0:nNote, :] Maria@4: Maria@4: allsal = nsal Maria@4: allsal[nsal 90: Maria@4: startPointSec = (tracklength/2.)-20 Maria@4: startSample = round(startPointSec*sr) Maria@4: endSample = startSample+45*sr Maria@4: y = y[startSample:endSample] Maria@4: return y Maria@4: Maria@4: def get_smoothie(filename = 'test.wav', param = None, segment=True, secondframedecomp=False, hopinsec=None): Maria@4: if param is None: Maria@4: param = p Maria@4: param['fs'] = None Maria@4: param['step_size'] = 128 Maria@4: Maria@4: y, sr = librosa.load(filename, sr = param['fs']) Maria@4: param['fs'] = sr Maria@4: if hopinsec is not None: Maria@4: param['step_size'] = int(round(hopinsec*param['fs'])) Maria@4: if segment: Maria@4: y = segment_audio(y,sr) Maria@4: # sg, t = smoothie_q_spectrogram(y, param) Maria@4: # nmf_dict = create_smoothie_nfm_dict(smoothie_kernel, f, param) Maria@4: sal, t, f0s = smoothie_salience(y, param) Maria@4: sal = sal[-np.isnan(f0s),:] Maria@4: chroma = wrap_to_octave(sal, param) Maria@4: Maria@4: if secondframedecomp: Maria@4: chromasr = param['fs']/float(param['step_size']) Maria@4: win2 = int(round(8*chromasr)) Maria@4: hop2 = int(round(0.5*chromasr)) Maria@4: nbins, norigframes = chroma.shape Maria@4: nframes = int(1+np.floor((norigframes-win2)/float(hop2))) Maria@4: avechroma = np.empty((nbins, nframes)) Maria@4: for i in range(nframes): # loop over all 8-sec frames Maria@4: avechroma[:,i] = np.mean(chroma[:, (i*hop2):(i*hop2+win2)], axis=1, keepdims=True) Maria@4: chroma = avechroma Maria@4: return chroma Maria@4: Maria@4: def get_smoothie_for_bihist(filename = 'test.wav', param = None, segment=True, hopinsec=None): Maria@4: if param is None: Maria@4: param = p Maria@4: param['fs'] = None # no downsample Maria@4: param['step_size'] = 128 Maria@4: Maria@4: y, sr = librosa.load(filename, sr = param['fs']) Maria@4: param['fs'] = sr Maria@4: if hopinsec is not None: Maria@4: param['step_size'] = int(round(hopinsec*param['fs'])) Maria@4: if segment: Maria@4: y = segment_audio(y,sr) Maria@4: # sg, t = smoothie_q_spectrogram(y, param) Maria@4: # nmf_dict = create_smoothie_nfm_dict(smoothie_kernel, f, param) Maria@4: sal, t, f0s = smoothie_salience(y, param) Maria@4: sal = sal[-np.isnan(f0s),:] Maria@4: chroma = wrap_to_octave(sal, param) Maria@4: chromasr = param['fs']/float(param['step_size']) Maria@4: return (chroma, chromasr)