Maria@5
|
1 """Smoothie
|
Maria@5
|
2
|
Maria@5
|
3 A module aimed at melody salience and melody features. It contains
|
Maria@5
|
4
|
Maria@5
|
5 * a frequency transform inspired by constant-Q transforms
|
Maria@5
|
6 * an NMF-based melodic salience function
|
Maria@5
|
7 * methods that transform this salience into melody features (not implemented)
|
Maria@5
|
8
|
Maria@5
|
9 This program is free software; you can redistribute it and/or modify it under
|
Maria@5
|
10 the terms of the GNU General Public License as published by the Free Software
|
Maria@5
|
11 Foundation; either version 2 of the License, or (at your option) any later
|
Maria@5
|
12 version. See the file COPYING included with this distribution for more information.
|
Maria@5
|
13
|
Maria@5
|
14 author: Matthias Mauch
|
Maria@5
|
15 (addition of wrap_to_octave and get_chroma functions by Maria Panteli)
|
Maria@5
|
16
|
Maria@5
|
17 """
|
Maria@5
|
18
|
Maria@5
|
19 import sys
|
Maria@5
|
20 import numpy as np
|
Maria@5
|
21 from scipy.signal import hann
|
Maria@5
|
22 from scipy.sparse import csr_matrix
|
Maria@5
|
23 import librosa
|
Maria@5
|
24
|
Maria@5
|
25
|
Maria@5
|
26 p = {# smoothie q mapping parameters
|
Maria@5
|
27 'bpo' : 36,
|
Maria@5
|
28 'break1' : 500,
|
Maria@5
|
29 'break2' : 4000,
|
Maria@5
|
30 'f_max' : 8000,
|
Maria@5
|
31 'fs' : 16000,
|
Maria@5
|
32 # spectrogram parameters
|
Maria@5
|
33 'step_size' : 160,
|
Maria@5
|
34 'use_A_wtg' : False,
|
Maria@5
|
35 'harm_remove' : False,
|
Maria@5
|
36 # NMF and NMF dictionary parameters
|
Maria@5
|
37 's' : 0.85, # decay parameter
|
Maria@5
|
38 'n_harm' : 50, # I'm gonna do you no harm
|
Maria@5
|
39 'min_midi' : 25,
|
Maria@5
|
40 'max_midi' : 90.8,
|
Maria@5
|
41 'bps' : 5,
|
Maria@5
|
42 'n_iter' : 30
|
Maria@5
|
43 }
|
Maria@5
|
44
|
Maria@5
|
45
|
Maria@5
|
46 def update_nmf_simple(H, W, V):
|
Maria@5
|
47 """Update the gain matrix H using the multiplicative NMF update equation.
|
Maria@5
|
48
|
Maria@5
|
49 Keyword arguments:
|
Maria@5
|
50 H -- gain matrix
|
Maria@5
|
51 W -- template matrix
|
Maria@5
|
52 V -- target data matrix
|
Maria@5
|
53 """
|
Maria@5
|
54 WHV = np.dot(W, H) ** (-1) * V
|
Maria@5
|
55 H = H * np.dot(W.transpose(), WHV)
|
Maria@5
|
56 return H
|
Maria@5
|
57
|
Maria@5
|
58 def make_a_weight(f):
|
Maria@5
|
59 """Return the values of A-weighting at the input frequencies f."""
|
Maria@5
|
60 f = np.array(f)
|
Maria@5
|
61 zaehler = 12200 * 12200* (f**4)
|
Maria@5
|
62 nenner = (f*f + 20.6*20.6) * np.sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)) * (f*f + 12200*12200)
|
Maria@5
|
63 return zaehler / nenner
|
Maria@5
|
64
|
Maria@5
|
65
|
Maria@5
|
66 def nextpow2(x):
|
Maria@5
|
67 """Return the smallest integer n such that 2 ** n > x."""
|
Maria@5
|
68 return np.ceil(np.log2(x))
|
Maria@5
|
69
|
Maria@5
|
70
|
Maria@5
|
71 def get_smoothie_frequencies(p):
|
Maria@5
|
72 """Calculate filter centres and widths for the smooth Q transform."""
|
Maria@5
|
73 # I think this calculation should be reconsidered in order to have
|
Maria@5
|
74 # a more principled transition between linear and exponential
|
Maria@5
|
75 n = 1.0 / (2.0**(1.0/p['bpo']) - 1)
|
Maria@5
|
76 x = p['break1'] * 1.0 / n
|
Maria@5
|
77
|
Maria@5
|
78 # first linear stretch
|
Maria@5
|
79 # f = (np.arange(1, int(np.round(n)) + 1) * x).round().tolist()
|
Maria@5
|
80 f = (np.arange(1, int(np.round(n)) + 1) * x).tolist()
|
Maria@5
|
81
|
Maria@5
|
82 # exponential stretch
|
Maria@5
|
83 while max(f) < p['break2']:
|
Maria@5
|
84 f.append(max(f) * 2**(1.0/p['bpo']))
|
Maria@5
|
85
|
Maria@5
|
86 # final linear stretch
|
Maria@5
|
87 lastdiff = f[-1] - f[-2]
|
Maria@5
|
88 while max(f) < p['f_max']:
|
Maria@5
|
89 f.append(max(f) + lastdiff)
|
Maria@5
|
90
|
Maria@5
|
91 deltaf = np.diff(np.array(f))
|
Maria@5
|
92 f = f[:-1]
|
Maria@5
|
93
|
Maria@5
|
94 return f, deltaf
|
Maria@5
|
95
|
Maria@5
|
96
|
Maria@5
|
97 def create_smoothie_kernel(f, deltaf, fs):
|
Maria@5
|
98 """Create a sparse matrix that maps the complex DFT to the complex
|
Maria@5
|
99 smoothie representation.
|
Maria@5
|
100 """
|
Maria@5
|
101 print >>sys.stdout, "[ SMOOTHIE Q kernel calculation ... ]"
|
Maria@5
|
102 n_filter = len(f)
|
Maria@5
|
103 n_fft = 2**nextpow2(np.ceil(fs/min(deltaf)))
|
Maria@5
|
104
|
Maria@5
|
105 thresh = 0.0054
|
Maria@5
|
106 smoothie_kernel = np.zeros([n_fft, n_filter], np.complex64)
|
Maria@5
|
107
|
Maria@5
|
108 for i_filter in range(n_filter-1, -1, -1): # descending
|
Maria@5
|
109 # print i_filter
|
Maria@5
|
110 Q = f[i_filter] * 1.0 / deltaf[i_filter] # local Q for this filter
|
Maria@5
|
111 lgth = int(np.ceil(fs * 1.0 / deltaf[i_filter]))
|
Maria@5
|
112 lgthinv = 1.0 / lgth
|
Maria@5
|
113 offset = int(n_fft/2 - np.ceil(lgth * 0.5))
|
Maria@5
|
114 temp = hann(lgth) * lgthinv * \
|
Maria@5
|
115 np.exp(2j * np.pi * Q * (np.arange(lgth) - offset) * lgthinv)
|
Maria@5
|
116 # print(sum(hann(lgth)), Q, lgth, offset)
|
Maria@5
|
117 temp_kernel = np.zeros(n_fft, dtype = np.complex64)
|
Maria@5
|
118 temp_kernel[np.arange(lgth) + offset] = temp
|
Maria@5
|
119 spec_kernel = np.fft.fft(temp_kernel)
|
Maria@5
|
120 spec_kernel[abs(spec_kernel) <= thresh] = 0
|
Maria@5
|
121 smoothie_kernel[:,i_filter] = spec_kernel
|
Maria@5
|
122 return csr_matrix(smoothie_kernel.conj()/n_fft)
|
Maria@5
|
123
|
Maria@5
|
124
|
Maria@5
|
125 def smoothie_q_spectrogram(x, p):
|
Maria@5
|
126 """Calculate the actual spectrogram with smooth Q frequencies"""
|
Maria@5
|
127 print >>sys.stdout, "[ SMOOTHIE Q spectrogram ... ]"
|
Maria@5
|
128
|
Maria@5
|
129 # precalculate smoothie kernel
|
Maria@5
|
130 f, deltaf = get_smoothie_frequencies(p)
|
Maria@5
|
131 smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs'])
|
Maria@5
|
132 n_fft, n_filter = smoothie_kernel.shape
|
Maria@5
|
133
|
Maria@5
|
134 # some preparations
|
Maria@5
|
135 n_sample = len(x)
|
Maria@5
|
136 # n_frame = int(np.floor(n_sample / p['step_size']))
|
Maria@5
|
137 n_frame = int(np.ceil(n_sample / float(p['step_size']))) # added mp
|
Maria@5
|
138 t = (np.arange(n_frame) * p['step_size']) * 1.0 / p['fs']
|
Maria@5
|
139 smoothie_kernelT = smoothie_kernel.T
|
Maria@5
|
140
|
Maria@5
|
141 # allocate
|
Maria@5
|
142 s = np.zeros((n_filter, n_frame), dtype = np.complex64)
|
Maria@5
|
143
|
Maria@5
|
144 # pad (if wanted)
|
Maria@5
|
145 x = np.concatenate((np.zeros(n_fft/2), x, np.zeros(n_fft/2)))
|
Maria@5
|
146
|
Maria@5
|
147 for i_frame in range(n_frame):
|
Maria@5
|
148 smpl = p['step_size'] * i_frame
|
Maria@5
|
149 block = x[smpl + np.arange(n_fft)]
|
Maria@5
|
150 s[:, i_frame] = smoothie_kernelT.dot(np.fft.fft(block))
|
Maria@5
|
151
|
Maria@5
|
152 if p['use_A_wtg']:
|
Maria@5
|
153 a_wtg = make_a_weight(f)
|
Maria@5
|
154 s = s * a_wtg[:, np.newaxis]
|
Maria@5
|
155
|
Maria@5
|
156 return s, t
|
Maria@5
|
157
|
Maria@5
|
158
|
Maria@5
|
159 def mel_triangles(input_f):
|
Maria@5
|
160 """Make matrix with mel filters at the given frequencies.
|
Maria@5
|
161 Warning: this is a very coarse mel filterbank.
|
Maria@5
|
162 """
|
Maria@5
|
163 n_linearfilters = 3
|
Maria@5
|
164 n_logfilters0 = 30 # just something high, will be pruned later
|
Maria@5
|
165 linear_spacing = 100
|
Maria@5
|
166 log_spacing = 6.0/4
|
Maria@5
|
167 n_filter0 = n_linearfilters + n_logfilters0
|
Maria@5
|
168
|
Maria@5
|
169 freqs = np.zeros(n_filter0+2) # includes one more on either side, hence +2
|
Maria@5
|
170 freqs[range(n_linearfilters+1)] = \
|
Maria@5
|
171 np.arange(-1,n_linearfilters) * linear_spacing
|
Maria@5
|
172 freqs[range(n_linearfilters+1, n_filter0+2)] = \
|
Maria@5
|
173 freqs[n_linearfilters] * log_spacing ** np.arange(1, n_logfilters0+2)
|
Maria@5
|
174
|
Maria@5
|
175 centre_freqs = freqs[1:-1]
|
Maria@5
|
176 lower_freqs = freqs[0:-2]
|
Maria@5
|
177 upper_freqs = freqs[2:]
|
Maria@5
|
178
|
Maria@5
|
179 n_filter = list(np.nonzero(lower_freqs < max(input_f)))[0][-1] + 1
|
Maria@5
|
180
|
Maria@5
|
181 n_input_f = len(input_f)
|
Maria@5
|
182 mtr = np.zeros((n_input_f, n_filter))
|
Maria@5
|
183
|
Maria@5
|
184 for i_filter in range(n_filter):
|
Maria@5
|
185 for i_freq in range(n_input_f):
|
Maria@5
|
186 if input_f[i_freq] > lower_freqs[i_filter] \
|
Maria@5
|
187 and input_f[i_freq] <= upper_freqs[i_filter]:
|
Maria@5
|
188
|
Maria@5
|
189 if input_f[i_freq] <= centre_freqs[i_filter]:
|
Maria@5
|
190 mtr[i_freq, i_filter] = \
|
Maria@5
|
191 (input_f[i_freq] - lower_freqs[i_filter]) * 1.0 / \
|
Maria@5
|
192 (centre_freqs[i_filter] - lower_freqs[i_filter])
|
Maria@5
|
193 else:
|
Maria@5
|
194 mtr[i_freq, i_filter] = \
|
Maria@5
|
195 1 - (input_f[i_freq] - centre_freqs[i_filter]) / \
|
Maria@5
|
196 (upper_freqs[i_filter] - centre_freqs[i_filter])
|
Maria@5
|
197 return mtr
|
Maria@5
|
198
|
Maria@5
|
199
|
Maria@5
|
200 def create_smoothie_nfm_dict(smoothie_kernel, filterf, p):
|
Maria@5
|
201 """Create dictionary matrix with note templates."""
|
Maria@5
|
202 n_note = int((p['max_midi'] - p['min_midi']) * p['bps'] + 1)
|
Maria@5
|
203 n_fft, n_filter = smoothie_kernel.shape
|
Maria@5
|
204 t = ((np.arange(n_fft) + 1) - ((n_fft + 1)*0.5))/p['fs']
|
Maria@5
|
205
|
Maria@5
|
206 mtr = mel_triangles(filterf)
|
Maria@5
|
207 n_template = n_note + mtr.shape[1]
|
Maria@5
|
208
|
Maria@5
|
209 w = np.zeros((n_filter, n_template), dtype = np.complex64)
|
Maria@5
|
210 w[:,n_note:] = mtr
|
Maria@5
|
211 f0s = []
|
Maria@5
|
212
|
Maria@5
|
213 smoothie_kernelT = smoothie_kernel.T
|
Maria@5
|
214
|
Maria@5
|
215 for i_note in range(n_note):
|
Maria@5
|
216 midi = p['min_midi'] + i_note * 1.0 / p['bps']
|
Maria@5
|
217 f0 = 440 * 2 ** ((midi-69)*1.0/12)
|
Maria@5
|
218 f0s.append(f0)
|
Maria@5
|
219 sig = np.zeros(len(t))
|
Maria@5
|
220 for i_harm in range(p['n_harm']):
|
Maria@5
|
221 f = f0 * (i_harm + 1)
|
Maria@5
|
222 if f > p['fs'] * 0.5:
|
Maria@5
|
223 continue
|
Maria@5
|
224 x = np.sin(2*np.pi*f*t) * p['s']**(i_harm)
|
Maria@5
|
225 sig += x
|
Maria@5
|
226 w[:, i_note] = smoothie_kernelT.dot(np.fft.fft(sig))
|
Maria@5
|
227
|
Maria@5
|
228 for i in range(mtr.shape[1]):
|
Maria@5
|
229 f0s.append(np.nan)
|
Maria@5
|
230
|
Maria@5
|
231 w = abs(w)
|
Maria@5
|
232 col_sums = w.sum(axis = 0)
|
Maria@5
|
233 w = w / col_sums[np.newaxis, :] # normalisation
|
Maria@5
|
234 return w, np.array(f0s)
|
Maria@5
|
235
|
Maria@5
|
236
|
Maria@5
|
237 def smoothie_salience(x, p, do_tune = False):
|
Maria@5
|
238 """Calculate melodic salience."""
|
Maria@5
|
239 print >>sys.stdout, "[ SMOOTHIE Q salience ... ]"
|
Maria@5
|
240
|
Maria@5
|
241 # precalculate nmf kernel
|
Maria@5
|
242 f, deltaf = get_smoothie_frequencies(p)
|
Maria@5
|
243 smoothie_kernel = create_smoothie_kernel(f, deltaf, p['fs'])
|
Maria@5
|
244 w, f0s = create_smoothie_nfm_dict(smoothie_kernel, f, p)
|
Maria@5
|
245
|
Maria@5
|
246 # calculate smoothiegram
|
Maria@5
|
247 s, t = smoothie_q_spectrogram(x, p)
|
Maria@5
|
248 s[s==0] = np.spacing(1) # very small number
|
Maria@5
|
249 s = abs(s)
|
Maria@5
|
250
|
Maria@5
|
251 # run NMF
|
Maria@5
|
252 n_frame = len(t)
|
Maria@5
|
253 print >>sys.stdout, "[ SMOOTHIE Q : NMF updates ... ]"
|
Maria@5
|
254 sal = np.ones((w.shape[1], n_frame))
|
Maria@5
|
255 for i_iter in range(p['n_iter']):
|
Maria@5
|
256 sal = update_nmf_simple(sal, w, s)
|
Maria@5
|
257
|
Maria@5
|
258 #if do_tune:
|
Maria@5
|
259 # error("Tuning isn't yet implemented in the Python version")
|
Maria@5
|
260
|
Maria@5
|
261 return sal, t, f0s
|
Maria@5
|
262
|
Maria@5
|
263
|
Maria@5
|
264 def qnormalise(x, q, dim):
|
Maria@5
|
265 """Normalise by sum."""
|
Maria@5
|
266 nrm = np.sum(x**q, axis=dim, keepdims=True)**(1./float(q))
|
Maria@5
|
267 nrmmatrix = np.repeat(nrm, x.shape[0], axis=0)
|
Maria@5
|
268 x = x / nrmmatrix
|
Maria@5
|
269 return x
|
Maria@5
|
270
|
Maria@5
|
271
|
Maria@5
|
272 def wrap_to_octave(sal, param):
|
Maria@5
|
273 """Wrap pitch to octave."""
|
Maria@5
|
274 epsilon = 0.00001
|
Maria@5
|
275 nsal = qnormalise(sal, 2, 0)
|
Maria@5
|
276
|
Maria@5
|
277 step = 1. / float(param['bps'])
|
Maria@5
|
278 NMFpitch = np.arange(param['min_midi'], param['max_midi'] + step, step)
|
Maria@5
|
279 nNote = len(NMFpitch)
|
Maria@5
|
280 nsal = nsal[0:nNote, :]
|
Maria@5
|
281
|
Maria@5
|
282 allsal = nsal
|
Maria@5
|
283 allsal[nsal < epsilon] = epsilon
|
Maria@5
|
284
|
Maria@5
|
285 # chroma mapping
|
Maria@5
|
286 n = param['bps'] * 12
|
Maria@5
|
287 mmap = np.tile(np.eye(n), (1, 5))
|
Maria@5
|
288 chroma = mmap.dot(allsal[0:(n * 5), :])
|
Maria@5
|
289 return chroma
|
Maria@5
|
290
|
Maria@5
|
291
|
Maria@5
|
292 def get_chroma(filename = 'test.wav'):
|
Maria@5
|
293 """Get chroma from audio file."""
|
Maria@5
|
294 y, sr = librosa.load(filename, sr = None)
|
Maria@5
|
295 p['fs'] = sr
|
Maria@5
|
296 p['step_size'] = int(round(0.005 * p['fs'])) # default hop size 0.005 sec.
|
Maria@5
|
297 sal, t, f0s = smoothie_salience(y, p)
|
Maria@5
|
298 sal = sal[-np.isnan(f0s), :]
|
Maria@5
|
299 chroma = wrap_to_octave(sal, p)
|
Maria@5
|
300 chromasr = p['fs'] / float(p['step_size'])
|
Maria@5
|
301 return chroma, chromasr
|