Maria@4
|
1 # -*- coding: utf-8 -*-
|
Maria@4
|
2 """
|
Maria@4
|
3 Created on Mon Aug 17 13:37:00 2015
|
Maria@4
|
4
|
Maria@4
|
5 @author: mariapanteli
|
Maria@4
|
6 """
|
Maria@4
|
7
|
Maria@39
|
8 import numpy as np
|
Maria@39
|
9 import scipy.signal
|
Maria@4
|
10 import librosa
|
Maria@39
|
11
|
Maria@4
|
12
|
Maria@4
|
13 class OPMellin:
|
Maria@4
|
14 def __init__(self, win2sec=8):
|
Maria@4
|
15 self.y = None
|
Maria@4
|
16 self.sr = None
|
Maria@4
|
17 self.melspec = None
|
Maria@4
|
18 self.melsr = None
|
Maria@4
|
19 self.op = None
|
Maria@4
|
20 self.opmellin = None
|
Maria@4
|
21 self.win2sec = win2sec
|
Maria@39
|
22 self.hop2sec = 0.5
|
Maria@4
|
23
|
Maria@39
|
24
|
Maria@39
|
25 def load_audiofile(self, filename, sr=None, segment=True):
|
Maria@4
|
26 self.y, self.sr = librosa.load(filename, sr=sr)
|
Maria@4
|
27 if segment:
|
Maria@4
|
28 tracklength = self.y.shape[0]/float(self.sr)
|
Maria@4
|
29 startSample = 0
|
Maria@4
|
30 endSample = None
|
Maria@4
|
31 if tracklength > 90:
|
Maria@4
|
32 startPointSec = (tracklength/2.)-20
|
Maria@4
|
33 startSample = round(startPointSec*self.sr)
|
Maria@4
|
34 endSample = startSample+45*self.sr
|
Maria@4
|
35 self.y = self.y[startSample:endSample]
|
Maria@4
|
36
|
Maria@4
|
37
|
Maria@4
|
38 def mel_spectrogram(self, y=None, sr=None):
|
Maria@4
|
39 if self.y is None:
|
Maria@4
|
40 self.y = y
|
Maria@4
|
41 if self.sr is None:
|
Maria@4
|
42 self.sr = sr
|
Maria@4
|
43 win1 = int(round(0.04*self.sr))
|
Maria@4
|
44 hop1 = int(round(win1/8.))
|
Maria@39
|
45 nfft1 = int(2**np.ceil(np.log2(win1)))
|
Maria@4
|
46 nmels = 40
|
Maria@39
|
47 D = np.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2
|
Maria@4
|
48 melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000)
|
Maria@4
|
49 melsr = self.sr/float(hop1)
|
Maria@4
|
50 self.melspec = melspec
|
Maria@4
|
51 self.melsr = melsr
|
Maria@4
|
52
|
Maria@39
|
53
|
Maria@39
|
54 def post_process_spec(self, melspec=None, log=True, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
|
Maria@4
|
55 if self.melspec is None:
|
Maria@4
|
56 self.melspec = melspec
|
Maria@4
|
57 if log:
|
Maria@4
|
58 self.melspec = librosa.logamplitude(self.melspec)
|
Maria@4
|
59 if medianfilt:
|
Maria@4
|
60 ks = int(0.1 * self.melsr) # 100ms kernel size
|
Maria@4
|
61 if ks % 2 == 0: # ks must be odd
|
Maria@4
|
62 ks += 1
|
Maria@4
|
63 nmels = self.melspec.shape[0]
|
Maria@4
|
64 for i in range(nmels):
|
Maria@4
|
65 self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks)
|
Maria@4
|
66 if sqrt:
|
Maria@4
|
67 self.melspec = self.melspec**.5
|
Maria@4
|
68 if diff:
|
Maria@39
|
69 # append one frame before diff to keep number of frames the same
|
Maria@39
|
70 self.melspec = np.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1)
|
Maria@39
|
71 self.melspec = np.diff(self.melspec, n=1, axis=1)
|
Maria@4
|
72 if subtractmean:
|
Maria@4
|
73 mean = self.melspec.mean(axis=1)
|
Maria@4
|
74 mean.shape = (mean.shape[0], 1)
|
Maria@4
|
75 self.melspec = self.melspec - mean
|
Maria@4
|
76 if halfwave:
|
Maria@39
|
77 self.melspec[np.where(self.melspec < 0)] = 0
|
Maria@4
|
78 if maxnormal:
|
Maria@4
|
79 if self.melspec.max() != 0: # avoid division by 0
|
Maria@4
|
80 self.melspec = self.melspec/self.melspec.max()
|
Maria@4
|
81
|
Maria@4
|
82
|
Maria@39
|
83 def onset_patterns(self, melspec=None, melsr=None, center=False, bpmrange=True):
|
Maria@4
|
84 if self.melspec is None:
|
Maria@4
|
85 self.melspec = melspec
|
Maria@4
|
86 if self.melsr is None:
|
Maria@4
|
87 self.melsr = melsr
|
Maria@39
|
88 win2 = int(round(self.win2sec * self.melsr))
|
Maria@39
|
89 hop2 = int(round(self.hop2sec * self.melsr))
|
Maria@4
|
90 nmels, nmelframes = self.melspec.shape
|
Maria@39
|
91
|
Maria@39
|
92 #nfft2 = int(2**np.ceil(np.log2(win2)))
|
Maria@39
|
93 nfft2 = 2048 # nfft2 does not depend on win2??
|
Maria@39
|
94 melspectemp = self.melspec
|
Maria@39
|
95 if ((nfft2 > win2) and (center is False)):
|
Maria@39
|
96 # in librosa version < 6.0, window is padded to the size of nfft
|
Maria@39
|
97 # so if win2<nfft2 the frames returned are less than expected
|
Maria@39
|
98 # solution: pad the signal by (nfft2-win2)/2 on the edges
|
Maria@39
|
99 # then frame decomposition (n_frames) matches the one expected using win2
|
Maria@39
|
100 melspectemp = np.concatenate([np.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, np.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1)
|
Maria@39
|
101 if melspectemp.shape[1]<nfft2:
|
Maria@39
|
102 # pad with zeros to have at least one 8-sec window
|
Maria@39
|
103 nzeros = nfft2 - melspectemp.shape[1]
|
Maria@39
|
104 melspectemp = np.concatenate([np.zeros((nmels,int(np.ceil(nzeros / 2.)))),melspectemp, np.zeros((nmels,int(np.ceil(nzeros / 2.))))],axis=1)
|
Maria@39
|
105 #melspectemp = np.concatenate([melspectemp, np.zeros((nmels,nzeros))],axis=1)
|
Maria@39
|
106 # nframes = int(round(np.ceil(round(nmelframes/hop2))))
|
Maria@39
|
107 ff0 = np.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
|
Maria@39
|
108 nframes = ff0.shape[1]
|
Maria@39
|
109 # nmags, nframes = ff0.shape
|
Maria@39
|
110 if bpmrange:
|
Maria@39
|
111 freqresinbpm = float(self.melsr)/float(nfft2/2.)*60.
|
Maria@39
|
112 minmag = int(np.floor(30./freqresinbpm)) # min tempo 30bpm
|
Maria@39
|
113 maxmag = int(np.ceil(960./freqresinbpm)) # max tempo 960 bpm
|
Maria@39
|
114 magsinds = range(minmag, maxmag)
|
Maria@39
|
115 else:
|
Maria@39
|
116 magsinds = range(0, 200)
|
Maria@39
|
117 nmags = len(magsinds)
|
Maria@39
|
118 fft2 = np.zeros((nmels, nmags, nframes))
|
Maria@39
|
119 for i in range(nmels):
|
Maria@39
|
120 fftmags = np.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
|
Maria@39
|
121 fftmags = fftmags[magsinds, :]
|
Maria@39
|
122 fft2[i, :, :] = fftmags
|
Maria@39
|
123 op = fft2
|
Maria@4
|
124 self.op = op
|
Maria@4
|
125
|
Maria@39
|
126
|
Maria@39
|
127 def post_process_op(self, medianfiltOP=True):
|
Maria@4
|
128 if medianfiltOP:
|
Maria@4
|
129 hop2 = int(round(0.5*self.melsr))
|
Maria@4
|
130 ssr = self.melsr/float(hop2)
|
Maria@4
|
131 ks = int(0.5 * ssr) # 100ms kernel size
|
Maria@4
|
132 if ks % 2 == 0: # ks must be odd
|
Maria@4
|
133 ks += 1
|
Maria@4
|
134 nmels, nmags, nframes = self.op.shape
|
Maria@4
|
135 for i in range(nmels):
|
Maria@4
|
136 for j in range(nframes):
|
Maria@4
|
137 #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks)
|
Maria@39
|
138 self.op[i,:,j] = np.convolve(self.op[i,:,j], np.ones(ks)/ks, mode='same')
|
Maria@39
|
139
|
Maria@4
|
140
|
Maria@4
|
141 def mellin_transform(self, op=None):
|
Maria@4
|
142 if self.op is None:
|
Maria@4
|
143 self.op = op
|
Maria@4
|
144 nmels, nmags, nframes = self.op.shape
|
Maria@4
|
145 #nmagsout = min(200, nmags)
|
Maria@4
|
146 nmagsout = 200
|
Maria@39
|
147 u_max = np.log(nmags)
|
Maria@39
|
148 delta_c = np.pi / u_max
|
Maria@4
|
149 c_max = nmagsout
|
Maria@39
|
150 c = np.arange(delta_c, c_max, delta_c)
|
Maria@4
|
151 k = range(1, nmags)
|
Maria@4
|
152 exponent = 0.5 - c*1j
|
Maria@4
|
153
|
Maria@39
|
154 normMat = 1./(exponent * np.sqrt(2*np.pi))
|
Maria@4
|
155 normMat.shape = (normMat.shape[0], 1)
|
Maria@39
|
156 normMat = np.repeat(normMat.T, nmels, axis=0)
|
Maria@39
|
157 kernelMat = np.asarray([np.power(ki, exponent) for ki in k])
|
Maria@39
|
158 opmellin = np.zeros((nmels, nmagsout, nframes))
|
Maria@4
|
159 for i in range(nframes):
|
Maria@4
|
160 self.op[:, -1, i] = 0
|
Maria@39
|
161 deltaMat = - np.diff(self.op[:, :, i])
|
Maria@39
|
162 mellin = np.abs(np.dot(deltaMat, kernelMat) * normMat)
|
Maria@4
|
163 opmellin[:, :, i] = mellin[:, :nmagsout]
|
Maria@4
|
164 self.opmellin = opmellin
|
Maria@4
|
165
|
Maria@4
|
166
|
Maria@4
|
167 def post_process_opmellin(self, opmellin=None, aveFreq=False, normFrame=True):
|
Maria@4
|
168 if self.opmellin is None:
|
Maria@4
|
169 self.opmellin = opmellin
|
Maria@4
|
170 if aveFreq:
|
Maria@39
|
171 self.opmellin = np.mean(self.opmellin, axis=0)
|
Maria@4
|
172 else: # just reshape
|
Maria@4
|
173 nmels, nmags, nframes = self.opmellin.shape
|
Maria@4
|
174 self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
|
Maria@4
|
175 if normFrame:
|
Maria@39
|
176 min_opmellin = np.amin(self.opmellin, axis=0, keepdims=True)
|
Maria@39
|
177 max_opmellin = np.amax(self.opmellin, axis=0, keepdims=True)
|
Maria@4
|
178 denom = max_opmellin - min_opmellin
|
Maria@4
|
179 denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
|
Maria@4
|
180 self.opmellin = (self.opmellin - min_opmellin) / denom
|
Maria@4
|
181
|
Maria@4
|
182
|
Maria@4
|
183 def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False):
|
Maria@4
|
184 self.load_audiofile(filename=filename)
|
Maria@4
|
185 self.mel_spectrogram()
|
Maria@4
|
186 self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec) # sqrt seems to work better
|
Maria@4
|
187 self.onset_patterns(logfilter=logfilter, center=center)
|
Maria@4
|
188 self.post_process_op(medianfiltOP=medianfiltOP)
|
Maria@4
|
189 self.mellin_transform()
|
Maria@4
|
190 self.post_process_opmellin()
|
Maria@4
|
191 return self.opmellin
|
Maria@4
|
192
|
Maria@39
|
193
|
Maria@4
|
194 def get_opmellin_from_melspec(self, melspec=[], melsr=[]):
|
Maria@4
|
195 self.melspec = melspec
|
Maria@4
|
196 self.melsr = melsr
|
Maria@4
|
197 self.post_process_spec(log=False, sqrt=True, medianfilt=True) # sqrt seems to work better
|
m@43
|
198 self.onset_patterns(center=False)
|
Maria@4
|
199 self.post_process_op(medianfiltOP=True)
|
Maria@4
|
200 self.mellin_transform()
|
Maria@4
|
201 self.post_process_opmellin()
|
Maria@4
|
202 return self.opmellin
|
Maria@4
|
203
|
Maria@39
|
204
|
Maria@4
|
205 if __name__ == '__main__':
|
Maria@4
|
206 op = OPMellin()
|
Maria@4
|
207 op.get_opmellin()
|