Maria@1
|
1 # -*- coding: utf-8 -*-
|
Maria@1
|
2 """
|
Maria@1
|
3 Created on Mon Aug 17 13:37:00 2015
|
Maria@1
|
4
|
Maria@1
|
5 @author: mariapanteli
|
Maria@1
|
6 """
|
Maria@1
|
7 """Scale transform descriptor from mel spectrogram"""
|
Maria@1
|
8
|
Maria@1
|
9 import numpy
|
Maria@1
|
10 import librosa
|
Maria@1
|
11 import scipy.signal
|
Maria@1
|
12
|
Maria@1
|
13 class ScaleTransform:
|
Maria@1
|
14 def __init__(self):
|
Maria@1
|
15 self.y = None
|
Maria@1
|
16 self.sr = None
|
Maria@1
|
17 self.nmels = 40
|
Maria@1
|
18 self.melspec = None
|
Maria@1
|
19 self.melsr = None
|
Maria@1
|
20 self.op = None
|
Maria@1
|
21 self.opmellin = None
|
Maria@1
|
22
|
Maria@1
|
23 def load_audiofile(self, filename='test.wav', sr=None):
|
Maria@1
|
24 """Load audio"""
|
Maria@1
|
25 self.y, self.sr = librosa.load(filename, sr=sr)
|
Maria@1
|
26
|
Maria@1
|
27 def mel_spectrogram(self, y=None, sr=None):
|
Maria@1
|
28 """Get mel spectrogram"""
|
Maria@1
|
29 if self.y is None:
|
Maria@1
|
30 self.y = y
|
Maria@1
|
31 if self.sr is None:
|
Maria@1
|
32 self.sr = sr
|
Maria@1
|
33 win1 = int(round(0.04 * self.sr))
|
Maria@1
|
34 hop1 = int(round(win1 / 8.))
|
Maria@1
|
35 nfft1 = int(2 ** numpy.ceil(numpy.log2(win1)))
|
Maria@1
|
36 D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming)) ** 2
|
Maria@1
|
37 melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=self.nmels, fmax=8000)
|
Maria@1
|
38 melsr = self.sr/float(hop1)
|
Maria@1
|
39 self.melspec = melspec
|
Maria@1
|
40 self.melsr = melsr
|
Maria@1
|
41
|
Maria@1
|
42 def post_process_spec(self, melspec=None, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
|
Maria@1
|
43 """Some post processing of the mel spectrogram"""
|
Maria@1
|
44 if self.melspec is None:
|
Maria@1
|
45 self.melspec = melspec
|
Maria@1
|
46 if medianfilt:
|
Maria@1
|
47 ks = int(0.1 * self.melsr) # 100ms kernel size
|
Maria@1
|
48 if ks % 2 == 0: ks += 1 # ks must be odd
|
Maria@1
|
49 for i in range(self.nmels):
|
Maria@1
|
50 self.melspec[i, :] = scipy.signal.medfilt(self.melspec[i, :], kernel_size=ks)
|
Maria@1
|
51 if sqrt:
|
Maria@1
|
52 self.melspec = self.melspec ** .5
|
Maria@1
|
53 if diff:
|
Maria@1
|
54 # append one frame before diff to keep number of frames the same
|
Maria@1
|
55 self.melspec = numpy.concatenate((self.melspec, self.melspec[:, -1, None]), axis=1)
|
Maria@1
|
56 self.melspec = numpy.diff(self.melspec, n=1, axis=1)
|
Maria@1
|
57 if subtractmean:
|
Maria@1
|
58 mean = self.melspec.mean(axis=1)
|
Maria@1
|
59 mean.shape = (mean.shape[0], 1)
|
Maria@1
|
60 self.melspec = self.melspec - mean
|
Maria@1
|
61 if halfwave:
|
Maria@1
|
62 self.melspec[numpy.where(self.melspec < 0)] = 0
|
Maria@1
|
63 if maxnormal:
|
Maria@1
|
64 self.melspec = self.melspec / self.melspec.max()
|
Maria@1
|
65
|
Maria@1
|
66 def onset_patterns(self, melspec=None, melsr=None, center=False):
|
Maria@1
|
67 """Get rhythm periodicities by applying stft in each mel band"""
|
Maria@1
|
68 if self.melspec is None:
|
Maria@1
|
69 self.melspec = melspec
|
Maria@1
|
70 if self.melsr is None:
|
Maria@1
|
71 self.melsr = melsr
|
Maria@1
|
72 win2 = int(round(8 * self.melsr))
|
Maria@1
|
73 hop2 = int(round(0.5 * self.melsr))
|
Maria@1
|
74 nfft2 = int(2**numpy.ceil(numpy.log2(win2)))
|
Maria@1
|
75
|
Maria@1
|
76 # some preprocessing for the second frame decomposition
|
Maria@1
|
77 melspectemp = self.melspec
|
Maria@1
|
78 if melspectemp.shape[1] < nfft2:
|
Maria@1
|
79 # if buffer too short pad with zeros to have at least one 8-sec window
|
Maria@1
|
80 nzeros = nfft2 - melspectemp.shape[1]
|
Maria@1
|
81 melspectemp = numpy.concatenate([numpy.zeros((self.nmels, int(numpy.ceil(nzeros / 2.)))), melspectemp, numpy.zeros((self.nmels,int(numpy.ceil(nzeros / 2.))))], axis=1)
|
Maria@1
|
82 temp = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
|
Maria@1
|
83 nframes = temp.shape[1]
|
Maria@1
|
84
|
Maria@1
|
85 # filter periodicities in the range 30-960 bpm
|
Maria@1
|
86 freqresinbpm = float(self.melsr) / float(nfft2/2.)*60.
|
Maria@1
|
87 minmag = int(numpy.floor(30. / freqresinbpm)) # min tempo 30bpm
|
Maria@1
|
88 maxmag = int(numpy.ceil(960. / freqresinbpm)) # max tempo 960 bpm
|
Maria@1
|
89 magsinds = range(minmag, maxmag) # indices of selected stft magnitudes
|
Maria@1
|
90
|
Maria@1
|
91 # loop over all mel_bands and get rhythm periodicities (stft magnitudes)
|
Maria@1
|
92 nmags = len(magsinds)
|
Maria@1
|
93 fft2 = numpy.zeros((self.nmels, nmags, nframes))
|
Maria@1
|
94 for i in range(self.nmels):
|
Maria@1
|
95 fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
|
Maria@1
|
96 fftmags = fftmags[magsinds, :]
|
Maria@1
|
97 fft2[i, :, :] = fftmags
|
Maria@1
|
98 op = fft2
|
Maria@1
|
99 self.op = op
|
Maria@1
|
100
|
Maria@1
|
101
|
Maria@1
|
102 def post_process_op(self, median_filt=True):
|
Maria@1
|
103 """Some smoothing of the onset patterns"""
|
Maria@1
|
104 if median_filt:
|
Maria@1
|
105 hop2 = int(round(0.5 * self.melsr))
|
Maria@1
|
106 ssr = self.melsr/float(hop2)
|
Maria@1
|
107 ks = int(0.5 * ssr) # 100ms kernel size
|
Maria@1
|
108 if ks % 2 == 0: ks += 1 # ks must be odd
|
Maria@1
|
109 nmels, nmags, nframes = self.op.shape
|
Maria@1
|
110 for i in range(nmels):
|
Maria@1
|
111 for j in range(nframes):
|
Maria@1
|
112 self.op[i, :, j] = numpy.convolve(self.op[i, :, j], numpy.ones(ks) / ks, mode='same')
|
Maria@1
|
113
|
Maria@1
|
114
|
Maria@1
|
115 def mellin_transform(self, op=None):
|
Maria@1
|
116 """
|
Maria@1
|
117 Apply mellin transform to remove tempo (scale) information.
|
Maria@1
|
118 Code adapted from a MATLAB implementation by Andre Holzapfel.
|
Maria@1
|
119 """
|
Maria@1
|
120 if self.op is None:
|
Maria@1
|
121 self.op = op
|
Maria@1
|
122 nmels, nmags, nframes = self.op.shape
|
Maria@1
|
123 nmagsout = 200
|
Maria@1
|
124 u_max = numpy.log(nmags)
|
Maria@1
|
125 delta_c = numpy.pi / u_max
|
Maria@1
|
126 c_max = nmagsout
|
Maria@1
|
127 c = numpy.arange(delta_c, c_max, delta_c)
|
Maria@1
|
128 k = range(1, nmags)
|
Maria@1
|
129 exponent = 0.5 - c * 1j
|
Maria@1
|
130
|
Maria@1
|
131 normMat = 1. / (exponent * numpy.sqrt(2 * numpy.pi))
|
Maria@1
|
132 normMat.shape = (normMat.shape[0], 1)
|
Maria@1
|
133 normMat = numpy.repeat(normMat.T, nmels, axis=0)
|
Maria@1
|
134 kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k])
|
Maria@1
|
135 opmellin = numpy.zeros((nmels, nmagsout, nframes))
|
Maria@1
|
136 for i in range(nframes):
|
Maria@1
|
137 self.op[:, -1, i] = 0
|
Maria@1
|
138 deltaMat = - numpy.diff(self.op[:, :, i])
|
Maria@1
|
139 mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat)
|
Maria@1
|
140 opmellin[:, :, i] = mellin[:, :nmagsout]
|
Maria@1
|
141 self.opmellin = opmellin
|
Maria@1
|
142
|
Maria@1
|
143
|
Maria@1
|
144 def post_process_mellin(self, opmellin=None, normFrame=True, aveBands=False):
|
Maria@1
|
145 """Some post processing of the scale transform"""
|
Maria@1
|
146 if self.opmellin is None:
|
Maria@1
|
147 self.opmellin = opmellin
|
Maria@1
|
148 if aveBands:
|
Maria@1
|
149 self.opmellin = numpy.mean(self.opmellin, axis=0, keepdims=True)
|
Maria@1
|
150 nmels, nmags, nframes = self.opmellin.shape
|
Maria@1
|
151 self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
|
Maria@1
|
152 if normFrame:
|
Maria@1
|
153 min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True)
|
Maria@1
|
154 max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True)
|
Maria@1
|
155 denom = max_opmellin - min_opmellin
|
Maria@1
|
156 denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
|
Maria@1
|
157 self.opmellin = (self.opmellin - min_opmellin) / denom
|
Maria@1
|
158
|
Maria@1
|
159
|
Maria@1
|
160 def get_scale_transform(self, filename='test.wav'):
|
Maria@1
|
161 """Return scale transform for filename"""
|
Maria@1
|
162 self.load_audiofile(filename=filename)
|
Maria@1
|
163 self.mel_spectrogram()
|
Maria@1
|
164 self.post_process_spec()
|
Maria@1
|
165 self.onset_patterns()
|
Maria@1
|
166 self.post_process_op()
|
Maria@1
|
167 self.mellin_transform()
|
Maria@1
|
168 self.post_process_mellin(aveBands=True)
|
Maria@1
|
169 return self.opmellin
|
Maria@1
|
170
|
Maria@1
|
171
|
Maria@1
|
172 if __name__ == '__main__':
|
Maria@1
|
173 op = ScaleTransform()
|
Maria@1
|
174 op.get_scale_transform()
|