Mercurial > hg > rhythm-melody-feature-evaluation
changeset 5:e8ebba3d6294
add smoothie
author | Maria Panteli |
---|---|
date | Tue, 14 Mar 2017 22:37:08 +0000 |
parents | 51db426e413e |
children | 2732137aa9b5 |
files | util/smoothiecore.py |
diffstat | 1 files changed, 301 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util/smoothiecore.py Tue Mar 14 22:37:08 2017 +0000 @@ -0,0 +1,301 @@ +"""Smoothie + +A module aimed at melody salience and melody features. It contains + +* a frequency transform inspired by constant-Q transforms +* an NMF-based melodic salience function +* methods that transform this salience into melody features (not implemented) + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. See the file COPYING included with this distribution for more information. + +author: Matthias Mauch +(addition of wrap_to_octave and get_chroma functions by Maria Panteli) + +""" + +import sys +import numpy as np +from scipy.signal import hann +from scipy.sparse import csr_matrix +import librosa + + +p = {# smoothie q mapping parameters + 'bpo' : 36, + 'break1' : 500, + 'break2' : 4000, + 'f_max' : 8000, + 'fs' : 16000, + # spectrogram parameters + 'step_size' : 160, + 'use_A_wtg' : False, + 'harm_remove' : False, + # NMF and NMF dictionary parameters + 's' : 0.85, # decay parameter + 'n_harm' : 50, # I'm gonna do you no harm + 'min_midi' : 25, + 'max_midi' : 90.8, + 'bps' : 5, + 'n_iter' : 30 + } + + +def update_nmf_simple(H, W, V): + """Update the gain matrix H using the multiplicative NMF update equation. + + Keyword arguments: + H -- gain matrix + W -- template matrix + V -- target data matrix + """ + WHV = np.dot(W, H) ** (-1) * V + H = H * np.dot(W.transpose(), WHV) + return H + +def make_a_weight(f): + """Return the values of A-weighting at the input frequencies f.""" + f = np.array(f) + zaehler = 12200 * 12200* (f**4) + nenner = (f*f + 20.6*20.6) * np.sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)) * (f*f + 12200*12200) + return zaehler / nenner + + +def nextpow2(x): + """Return the smallest integer n such that 2 ** n > x.""" + return np.ceil(np.log2(x)) + + +def get_smoothie_frequencies(p): + """Calculate filter centres and widths for the smooth Q transform.""" + # I think this calculation should be reconsidered in order to have + # a more principled transition between linear and exponential + n = 1.0 / (2.0**(1.0/p['bpo']) - 1) + x = p['break1'] * 1.0 / n + + # first linear stretch + # f = (np.arange(1, int(np.round(n)) + 1) * x).round().tolist() + f = (np.arange(1, int(np.round(n)) + 1) * x).tolist() + + # exponential stretch + while max(f) < p['break2']: + f.append(max(f) * 2**(1.0/p['bpo'])) + + # final linear stretch + lastdiff = f[-1] - f[-2] + while max(f) < p['f_max']: + f.append(max(f) + lastdiff) + + deltaf = np.diff(np.array(f)) + f = f[:-1] + + return f, deltaf + + +def create_smoothie_kernel(f, deltaf, fs): + """Create a sparse matrix that maps the complex DFT to the complex + smoothie representation. + """ + print >>sys.stdout, "[ SMOOTHIE Q kernel calculation ... ]" + n_filter = len(f) + n_fft = 2**nextpow2(np.ceil(fs/min(deltaf))) + + thresh = 0.0054 + smoothie_kernel = np.zeros([n_fft, n_filter], np.complex64) + + for i_filter in range(n_filter-1, -1, -1): # descending + # print i_filter + Q = f[i_filter] * 1.0 / deltaf[i_filter] # local Q for this filter + lgth = int(np.ceil(fs * 1.0 / deltaf[i_filter])) + lgthinv = 1.0 / lgth + offset = int(n_fft/2 - np.ceil(lgth * 0.5)) + temp = hann(lgth) * lgthinv * \ + np.exp(2j * np.pi * Q * (np.arange(lgth) - offset) * lgthinv) + # print(sum(hann(lgth)), Q, lgth, offset) + temp_kernel = np.zeros(n_fft, dtype = np.complex64) + temp_kernel[np.arange(lgth) + offset] = temp + spec_kernel = np.fft.fft(temp_kernel) + spec_kernel[abs(spec_kernel) <= thresh] = 0 + smoothie_kernel[:,i_filter] = spec_kernel + return csr_matrix(smoothie_kernel.conj()/n_fft) + + +def smoothie_q_spectrogram(x, p): + """Calculate the actual spectrogram with smooth Q frequencies""" + print >>sys.stdout, "[ SMOOTHIE Q spectrogram ... ]" + + # precalculate smoothie kernel + f, deltaf = get_smoothie_frequencies(p) + smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs']) + n_fft, n_filter = smoothie_kernel.shape + + # some preparations + n_sample = len(x) + # n_frame = int(np.floor(n_sample / p['step_size'])) + n_frame = int(np.ceil(n_sample / float(p['step_size']))) # added mp + t = (np.arange(n_frame) * p['step_size']) * 1.0 / p['fs'] + smoothie_kernelT = smoothie_kernel.T + + # allocate + s = np.zeros((n_filter, n_frame), dtype = np.complex64) + + # pad (if wanted) + x = np.concatenate((np.zeros(n_fft/2), x, np.zeros(n_fft/2))) + + for i_frame in range(n_frame): + smpl = p['step_size'] * i_frame + block = x[smpl + np.arange(n_fft)] + s[:, i_frame] = smoothie_kernelT.dot(np.fft.fft(block)) + + if p['use_A_wtg']: + a_wtg = make_a_weight(f) + s = s * a_wtg[:, np.newaxis] + + return s, t + + +def mel_triangles(input_f): + """Make matrix with mel filters at the given frequencies. + Warning: this is a very coarse mel filterbank. + """ + n_linearfilters = 3 + n_logfilters0 = 30 # just something high, will be pruned later + linear_spacing = 100 + log_spacing = 6.0/4 + n_filter0 = n_linearfilters + n_logfilters0 + + freqs = np.zeros(n_filter0+2) # includes one more on either side, hence +2 + freqs[range(n_linearfilters+1)] = \ + np.arange(-1,n_linearfilters) * linear_spacing + freqs[range(n_linearfilters+1, n_filter0+2)] = \ + freqs[n_linearfilters] * log_spacing ** np.arange(1, n_logfilters0+2) + + centre_freqs = freqs[1:-1] + lower_freqs = freqs[0:-2] + upper_freqs = freqs[2:] + + n_filter = list(np.nonzero(lower_freqs < max(input_f)))[0][-1] + 1 + + n_input_f = len(input_f) + mtr = np.zeros((n_input_f, n_filter)) + + for i_filter in range(n_filter): + for i_freq in range(n_input_f): + if input_f[i_freq] > lower_freqs[i_filter] \ + and input_f[i_freq] <= upper_freqs[i_filter]: + + if input_f[i_freq] <= centre_freqs[i_filter]: + mtr[i_freq, i_filter] = \ + (input_f[i_freq] - lower_freqs[i_filter]) * 1.0 / \ + (centre_freqs[i_filter] - lower_freqs[i_filter]) + else: + mtr[i_freq, i_filter] = \ + 1 - (input_f[i_freq] - centre_freqs[i_filter]) / \ + (upper_freqs[i_filter] - centre_freqs[i_filter]) + return mtr + + +def create_smoothie_nfm_dict(smoothie_kernel, filterf, p): + """Create dictionary matrix with note templates.""" + n_note = int((p['max_midi'] - p['min_midi']) * p['bps'] + 1) + n_fft, n_filter = smoothie_kernel.shape + t = ((np.arange(n_fft) + 1) - ((n_fft + 1)*0.5))/p['fs'] + + mtr = mel_triangles(filterf) + n_template = n_note + mtr.shape[1] + + w = np.zeros((n_filter, n_template), dtype = np.complex64) + w[:,n_note:] = mtr + f0s = [] + + smoothie_kernelT = smoothie_kernel.T + + for i_note in range(n_note): + midi = p['min_midi'] + i_note * 1.0 / p['bps'] + f0 = 440 * 2 ** ((midi-69)*1.0/12) + f0s.append(f0) + sig = np.zeros(len(t)) + for i_harm in range(p['n_harm']): + f = f0 * (i_harm + 1) + if f > p['fs'] * 0.5: + continue + x = np.sin(2*np.pi*f*t) * p['s']**(i_harm) + sig += x + w[:, i_note] = smoothie_kernelT.dot(np.fft.fft(sig)) + + for i in range(mtr.shape[1]): + f0s.append(np.nan) + + w = abs(w) + col_sums = w.sum(axis = 0) + w = w / col_sums[np.newaxis, :] # normalisation + return w, np.array(f0s) + + +def smoothie_salience(x, p, do_tune = False): + """Calculate melodic salience.""" + print >>sys.stdout, "[ SMOOTHIE Q salience ... ]" + + # precalculate nmf kernel + f, deltaf = get_smoothie_frequencies(p) + smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs']) + w, f0s = create_smoothie_nfm_dict(smoothie_kernel, f, p) + + # calculate smoothiegram + s, t = smoothie_q_spectrogram(x, p) + s[s==0] = np.spacing(1) # very small number + s = abs(s) + + # run NMF + n_frame = len(t) + print >>sys.stdout, "[ SMOOTHIE Q : NMF updates ... ]" + sal = np.ones((w.shape[1], n_frame)) + for i_iter in range(p['n_iter']): + sal = update_nmf_simple(sal, w, s) + + #if do_tune: + # error("Tuning isn't yet implemented in the Python version") + + return sal, t, f0s + + +def qnormalise(x, q, dim): + """Normalise by sum.""" + nrm = np.sum(x**q, axis=dim, keepdims=True)**(1./float(q)) + nrmmatrix = np.repeat(nrm, x.shape[0], axis=0) + x = x / nrmmatrix + return x + + +def wrap_to_octave(sal, param): + """Wrap pitch to octave.""" + epsilon = 0.00001 + nsal = qnormalise(sal, 2, 0) + + step = 1. / float(param['bps']) + NMFpitch = np.arange(param['min_midi'], param['max_midi'] + step, step) + nNote = len(NMFpitch) + nsal = nsal[0:nNote, :] + + allsal = nsal + allsal[nsal < epsilon] = epsilon + + # chroma mapping + n = param['bps'] * 12 + mmap = np.tile(np.eye(n), (1, 5)) + chroma = mmap.dot(allsal[0:(n * 5), :]) + return chroma + + +def get_chroma(filename = 'test.wav'): + """Get chroma from audio file.""" + y, sr = librosa.load(filename, sr = None) + p['fs'] = sr + p['step_size'] = int(round(0.005 * p['fs'])) # default hop size 0.005 sec. + sal, t, f0s = smoothie_salience(y, p) + sal = sal[-np.isnan(f0s), :] + chroma = wrap_to_octave(sal, p) + chromasr = p['fs'] / float(p['step_size']) + return chroma, chromasr