annotate util/smoothiecore.py @ 7:b7169083b9ea tip

fix typo in variable name
author Maria Panteli
date Tue, 01 Jan 2019 15:51:38 +0200
parents e8ebba3d6294
children
rev   line source
Maria@5 1 """Smoothie
Maria@5 2
Maria@5 3 A module aimed at melody salience and melody features. It contains
Maria@5 4
Maria@5 5 * a frequency transform inspired by constant-Q transforms
Maria@5 6 * an NMF-based melodic salience function
Maria@5 7 * methods that transform this salience into melody features (not implemented)
Maria@5 8
Maria@5 9 This program is free software; you can redistribute it and/or modify it under
Maria@5 10 the terms of the GNU General Public License as published by the Free Software
Maria@5 11 Foundation; either version 2 of the License, or (at your option) any later
Maria@5 12 version. See the file COPYING included with this distribution for more information.
Maria@5 13
Maria@5 14 author: Matthias Mauch
Maria@5 15 (addition of wrap_to_octave and get_chroma functions by Maria Panteli)
Maria@5 16
Maria@5 17 """
Maria@5 18
Maria@5 19 import sys
Maria@5 20 import numpy as np
Maria@5 21 from scipy.signal import hann
Maria@5 22 from scipy.sparse import csr_matrix
Maria@5 23 import librosa
Maria@5 24
Maria@5 25
Maria@5 26 p = {# smoothie q mapping parameters
Maria@5 27 'bpo' : 36,
Maria@5 28 'break1' : 500,
Maria@5 29 'break2' : 4000,
Maria@5 30 'f_max' : 8000,
Maria@5 31 'fs' : 16000,
Maria@5 32 # spectrogram parameters
Maria@5 33 'step_size' : 160,
Maria@5 34 'use_A_wtg' : False,
Maria@5 35 'harm_remove' : False,
Maria@5 36 # NMF and NMF dictionary parameters
Maria@5 37 's' : 0.85, # decay parameter
Maria@5 38 'n_harm' : 50, # I'm gonna do you no harm
Maria@5 39 'min_midi' : 25,
Maria@5 40 'max_midi' : 90.8,
Maria@5 41 'bps' : 5,
Maria@5 42 'n_iter' : 30
Maria@5 43 }
Maria@5 44
Maria@5 45
Maria@5 46 def update_nmf_simple(H, W, V):
Maria@5 47 """Update the gain matrix H using the multiplicative NMF update equation.
Maria@5 48
Maria@5 49 Keyword arguments:
Maria@5 50 H -- gain matrix
Maria@5 51 W -- template matrix
Maria@5 52 V -- target data matrix
Maria@5 53 """
Maria@5 54 WHV = np.dot(W, H) ** (-1) * V
Maria@5 55 H = H * np.dot(W.transpose(), WHV)
Maria@5 56 return H
Maria@5 57
Maria@5 58 def make_a_weight(f):
Maria@5 59 """Return the values of A-weighting at the input frequencies f."""
Maria@5 60 f = np.array(f)
Maria@5 61 zaehler = 12200 * 12200* (f**4)
Maria@5 62 nenner = (f*f + 20.6*20.6) * np.sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)) * (f*f + 12200*12200)
Maria@5 63 return zaehler / nenner
Maria@5 64
Maria@5 65
Maria@5 66 def nextpow2(x):
Maria@5 67 """Return the smallest integer n such that 2 ** n > x."""
Maria@5 68 return np.ceil(np.log2(x))
Maria@5 69
Maria@5 70
Maria@5 71 def get_smoothie_frequencies(p):
Maria@5 72 """Calculate filter centres and widths for the smooth Q transform."""
Maria@5 73 # I think this calculation should be reconsidered in order to have
Maria@5 74 # a more principled transition between linear and exponential
Maria@5 75 n = 1.0 / (2.0**(1.0/p['bpo']) - 1)
Maria@5 76 x = p['break1'] * 1.0 / n
Maria@5 77
Maria@5 78 # first linear stretch
Maria@5 79 # f = (np.arange(1, int(np.round(n)) + 1) * x).round().tolist()
Maria@5 80 f = (np.arange(1, int(np.round(n)) + 1) * x).tolist()
Maria@5 81
Maria@5 82 # exponential stretch
Maria@5 83 while max(f) < p['break2']:
Maria@5 84 f.append(max(f) * 2**(1.0/p['bpo']))
Maria@5 85
Maria@5 86 # final linear stretch
Maria@5 87 lastdiff = f[-1] - f[-2]
Maria@5 88 while max(f) < p['f_max']:
Maria@5 89 f.append(max(f) + lastdiff)
Maria@5 90
Maria@5 91 deltaf = np.diff(np.array(f))
Maria@5 92 f = f[:-1]
Maria@5 93
Maria@5 94 return f, deltaf
Maria@5 95
Maria@5 96
Maria@5 97 def create_smoothie_kernel(f, deltaf, fs):
Maria@5 98 """Create a sparse matrix that maps the complex DFT to the complex
Maria@5 99 smoothie representation.
Maria@5 100 """
Maria@5 101 print >>sys.stdout, "[ SMOOTHIE Q kernel calculation ... ]"
Maria@5 102 n_filter = len(f)
Maria@5 103 n_fft = 2**nextpow2(np.ceil(fs/min(deltaf)))
Maria@5 104
Maria@5 105 thresh = 0.0054
Maria@5 106 smoothie_kernel = np.zeros([n_fft, n_filter], np.complex64)
Maria@5 107
Maria@5 108 for i_filter in range(n_filter-1, -1, -1): # descending
Maria@5 109 # print i_filter
Maria@5 110 Q = f[i_filter] * 1.0 / deltaf[i_filter] # local Q for this filter
Maria@5 111 lgth = int(np.ceil(fs * 1.0 / deltaf[i_filter]))
Maria@5 112 lgthinv = 1.0 / lgth
Maria@5 113 offset = int(n_fft/2 - np.ceil(lgth * 0.5))
Maria@5 114 temp = hann(lgth) * lgthinv * \
Maria@5 115 np.exp(2j * np.pi * Q * (np.arange(lgth) - offset) * lgthinv)
Maria@5 116 # print(sum(hann(lgth)), Q, lgth, offset)
Maria@5 117 temp_kernel = np.zeros(n_fft, dtype = np.complex64)
Maria@5 118 temp_kernel[np.arange(lgth) + offset] = temp
Maria@5 119 spec_kernel = np.fft.fft(temp_kernel)
Maria@5 120 spec_kernel[abs(spec_kernel) <= thresh] = 0
Maria@5 121 smoothie_kernel[:,i_filter] = spec_kernel
Maria@5 122 return csr_matrix(smoothie_kernel.conj()/n_fft)
Maria@5 123
Maria@5 124
Maria@5 125 def smoothie_q_spectrogram(x, p):
Maria@5 126 """Calculate the actual spectrogram with smooth Q frequencies"""
Maria@5 127 print >>sys.stdout, "[ SMOOTHIE Q spectrogram ... ]"
Maria@5 128
Maria@5 129 # precalculate smoothie kernel
Maria@5 130 f, deltaf = get_smoothie_frequencies(p)
Maria@5 131 smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs'])
Maria@5 132 n_fft, n_filter = smoothie_kernel.shape
Maria@5 133
Maria@5 134 # some preparations
Maria@5 135 n_sample = len(x)
Maria@5 136 # n_frame = int(np.floor(n_sample / p['step_size']))
Maria@5 137 n_frame = int(np.ceil(n_sample / float(p['step_size']))) # added mp
Maria@5 138 t = (np.arange(n_frame) * p['step_size']) * 1.0 / p['fs']
Maria@5 139 smoothie_kernelT = smoothie_kernel.T
Maria@5 140
Maria@5 141 # allocate
Maria@5 142 s = np.zeros((n_filter, n_frame), dtype = np.complex64)
Maria@5 143
Maria@5 144 # pad (if wanted)
Maria@5 145 x = np.concatenate((np.zeros(n_fft/2), x, np.zeros(n_fft/2)))
Maria@5 146
Maria@5 147 for i_frame in range(n_frame):
Maria@5 148 smpl = p['step_size'] * i_frame
Maria@5 149 block = x[smpl + np.arange(n_fft)]
Maria@5 150 s[:, i_frame] = smoothie_kernelT.dot(np.fft.fft(block))
Maria@5 151
Maria@5 152 if p['use_A_wtg']:
Maria@5 153 a_wtg = make_a_weight(f)
Maria@5 154 s = s * a_wtg[:, np.newaxis]
Maria@5 155
Maria@5 156 return s, t
Maria@5 157
Maria@5 158
Maria@5 159 def mel_triangles(input_f):
Maria@5 160 """Make matrix with mel filters at the given frequencies.
Maria@5 161 Warning: this is a very coarse mel filterbank.
Maria@5 162 """
Maria@5 163 n_linearfilters = 3
Maria@5 164 n_logfilters0 = 30 # just something high, will be pruned later
Maria@5 165 linear_spacing = 100
Maria@5 166 log_spacing = 6.0/4
Maria@5 167 n_filter0 = n_linearfilters + n_logfilters0
Maria@5 168
Maria@5 169 freqs = np.zeros(n_filter0+2) # includes one more on either side, hence +2
Maria@5 170 freqs[range(n_linearfilters+1)] = \
Maria@5 171 np.arange(-1,n_linearfilters) * linear_spacing
Maria@5 172 freqs[range(n_linearfilters+1, n_filter0+2)] = \
Maria@5 173 freqs[n_linearfilters] * log_spacing ** np.arange(1, n_logfilters0+2)
Maria@5 174
Maria@5 175 centre_freqs = freqs[1:-1]
Maria@5 176 lower_freqs = freqs[0:-2]
Maria@5 177 upper_freqs = freqs[2:]
Maria@5 178
Maria@5 179 n_filter = list(np.nonzero(lower_freqs < max(input_f)))[0][-1] + 1
Maria@5 180
Maria@5 181 n_input_f = len(input_f)
Maria@5 182 mtr = np.zeros((n_input_f, n_filter))
Maria@5 183
Maria@5 184 for i_filter in range(n_filter):
Maria@5 185 for i_freq in range(n_input_f):
Maria@5 186 if input_f[i_freq] > lower_freqs[i_filter] \
Maria@5 187 and input_f[i_freq] <= upper_freqs[i_filter]:
Maria@5 188
Maria@5 189 if input_f[i_freq] <= centre_freqs[i_filter]:
Maria@5 190 mtr[i_freq, i_filter] = \
Maria@5 191 (input_f[i_freq] - lower_freqs[i_filter]) * 1.0 / \
Maria@5 192 (centre_freqs[i_filter] - lower_freqs[i_filter])
Maria@5 193 else:
Maria@5 194 mtr[i_freq, i_filter] = \
Maria@5 195 1 - (input_f[i_freq] - centre_freqs[i_filter]) / \
Maria@5 196 (upper_freqs[i_filter] - centre_freqs[i_filter])
Maria@5 197 return mtr
Maria@5 198
Maria@5 199
Maria@5 200 def create_smoothie_nfm_dict(smoothie_kernel, filterf, p):
Maria@5 201 """Create dictionary matrix with note templates."""
Maria@5 202 n_note = int((p['max_midi'] - p['min_midi']) * p['bps'] + 1)
Maria@5 203 n_fft, n_filter = smoothie_kernel.shape
Maria@5 204 t = ((np.arange(n_fft) + 1) - ((n_fft + 1)*0.5))/p['fs']
Maria@5 205
Maria@5 206 mtr = mel_triangles(filterf)
Maria@5 207 n_template = n_note + mtr.shape[1]
Maria@5 208
Maria@5 209 w = np.zeros((n_filter, n_template), dtype = np.complex64)
Maria@5 210 w[:,n_note:] = mtr
Maria@5 211 f0s = []
Maria@5 212
Maria@5 213 smoothie_kernelT = smoothie_kernel.T
Maria@5 214
Maria@5 215 for i_note in range(n_note):
Maria@5 216 midi = p['min_midi'] + i_note * 1.0 / p['bps']
Maria@5 217 f0 = 440 * 2 ** ((midi-69)*1.0/12)
Maria@5 218 f0s.append(f0)
Maria@5 219 sig = np.zeros(len(t))
Maria@5 220 for i_harm in range(p['n_harm']):
Maria@5 221 f = f0 * (i_harm + 1)
Maria@5 222 if f > p['fs'] * 0.5:
Maria@5 223 continue
Maria@5 224 x = np.sin(2*np.pi*f*t) * p['s']**(i_harm)
Maria@5 225 sig += x
Maria@5 226 w[:, i_note] = smoothie_kernelT.dot(np.fft.fft(sig))
Maria@5 227
Maria@5 228 for i in range(mtr.shape[1]):
Maria@5 229 f0s.append(np.nan)
Maria@5 230
Maria@5 231 w = abs(w)
Maria@5 232 col_sums = w.sum(axis = 0)
Maria@5 233 w = w / col_sums[np.newaxis, :] # normalisation
Maria@5 234 return w, np.array(f0s)
Maria@5 235
Maria@5 236
Maria@5 237 def smoothie_salience(x, p, do_tune = False):
Maria@5 238 """Calculate melodic salience."""
Maria@5 239 print >>sys.stdout, "[ SMOOTHIE Q salience ... ]"
Maria@5 240
Maria@5 241 # precalculate nmf kernel
Maria@5 242 f, deltaf = get_smoothie_frequencies(p)
Maria@5 243 smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs'])
Maria@5 244 w, f0s = create_smoothie_nfm_dict(smoothie_kernel, f, p)
Maria@5 245
Maria@5 246 # calculate smoothiegram
Maria@5 247 s, t = smoothie_q_spectrogram(x, p)
Maria@5 248 s[s==0] = np.spacing(1) # very small number
Maria@5 249 s = abs(s)
Maria@5 250
Maria@5 251 # run NMF
Maria@5 252 n_frame = len(t)
Maria@5 253 print >>sys.stdout, "[ SMOOTHIE Q : NMF updates ... ]"
Maria@5 254 sal = np.ones((w.shape[1], n_frame))
Maria@5 255 for i_iter in range(p['n_iter']):
Maria@5 256 sal = update_nmf_simple(sal, w, s)
Maria@5 257
Maria@5 258 #if do_tune:
Maria@5 259 # error("Tuning isn't yet implemented in the Python version")
Maria@5 260
Maria@5 261 return sal, t, f0s
Maria@5 262
Maria@5 263
Maria@5 264 def qnormalise(x, q, dim):
Maria@5 265 """Normalise by sum."""
Maria@5 266 nrm = np.sum(x**q, axis=dim, keepdims=True)**(1./float(q))
Maria@5 267 nrmmatrix = np.repeat(nrm, x.shape[0], axis=0)
Maria@5 268 x = x / nrmmatrix
Maria@5 269 return x
Maria@5 270
Maria@5 271
Maria@5 272 def wrap_to_octave(sal, param):
Maria@5 273 """Wrap pitch to octave."""
Maria@5 274 epsilon = 0.00001
Maria@5 275 nsal = qnormalise(sal, 2, 0)
Maria@5 276
Maria@5 277 step = 1. / float(param['bps'])
Maria@5 278 NMFpitch = np.arange(param['min_midi'], param['max_midi'] + step, step)
Maria@5 279 nNote = len(NMFpitch)
Maria@5 280 nsal = nsal[0:nNote, :]
Maria@5 281
Maria@5 282 allsal = nsal
Maria@5 283 allsal[nsal < epsilon] = epsilon
Maria@5 284
Maria@5 285 # chroma mapping
Maria@5 286 n = param['bps'] * 12
Maria@5 287 mmap = np.tile(np.eye(n), (1, 5))
Maria@5 288 chroma = mmap.dot(allsal[0:(n * 5), :])
Maria@5 289 return chroma
Maria@5 290
Maria@5 291
Maria@5 292 def get_chroma(filename = 'test.wav'):
Maria@5 293 """Get chroma from audio file."""
Maria@5 294 y, sr = librosa.load(filename, sr = None)
Maria@5 295 p['fs'] = sr
Maria@5 296 p['step_size'] = int(round(0.005 * p['fs'])) # default hop size 0.005 sec.
Maria@5 297 sal, t, f0s = smoothie_salience(y, p)
Maria@5 298 sal = sal[-np.isnan(f0s), :]
Maria@5 299 chroma = wrap_to_octave(sal, p)
Maria@5 300 chromasr = p['fs'] / float(p['step_size'])
Maria@5 301 return chroma, chromasr