annotate scripts/OPMellin.py @ 105:edd82eb89b4b branch-tests tip

Merge
author Maria Panteli
date Sun, 15 Oct 2017 13:36:59 +0100
parents b1d9ba5f888e
children
rev   line source
Maria@4 1 # -*- coding: utf-8 -*-
Maria@4 2 """
Maria@4 3 Created on Mon Aug 17 13:37:00 2015
Maria@4 4
Maria@4 5 @author: mariapanteli
Maria@4 6 """
Maria@4 7
Maria@39 8 import numpy as np
Maria@39 9 import scipy.signal
Maria@4 10 import librosa
Maria@39 11
Maria@4 12
Maria@4 13 class OPMellin:
Maria@4 14 def __init__(self, win2sec=8):
Maria@4 15 self.y = None
Maria@4 16 self.sr = None
Maria@4 17 self.melspec = None
Maria@4 18 self.melsr = None
Maria@4 19 self.op = None
Maria@4 20 self.opmellin = None
Maria@4 21 self.win2sec = win2sec
Maria@39 22 self.hop2sec = 0.5
Maria@4 23
Maria@39 24
Maria@39 25 def load_audiofile(self, filename, sr=None, segment=True):
Maria@4 26 self.y, self.sr = librosa.load(filename, sr=sr)
Maria@4 27 if segment:
Maria@4 28 tracklength = self.y.shape[0]/float(self.sr)
Maria@4 29 startSample = 0
Maria@4 30 endSample = None
Maria@4 31 if tracklength > 90:
Maria@4 32 startPointSec = (tracklength/2.)-20
Maria@4 33 startSample = round(startPointSec*self.sr)
Maria@4 34 endSample = startSample+45*self.sr
Maria@4 35 self.y = self.y[startSample:endSample]
Maria@4 36
Maria@4 37
Maria@4 38 def mel_spectrogram(self, y=None, sr=None):
Maria@4 39 if self.y is None:
Maria@4 40 self.y = y
Maria@4 41 if self.sr is None:
Maria@4 42 self.sr = sr
Maria@4 43 win1 = int(round(0.04*self.sr))
Maria@4 44 hop1 = int(round(win1/8.))
Maria@39 45 nfft1 = int(2**np.ceil(np.log2(win1)))
Maria@4 46 nmels = 40
Maria@39 47 D = np.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2
Maria@4 48 melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000)
Maria@4 49 melsr = self.sr/float(hop1)
Maria@4 50 self.melspec = melspec
Maria@4 51 self.melsr = melsr
Maria@4 52
Maria@39 53
Maria@39 54 def post_process_spec(self, melspec=None, log=True, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
Maria@4 55 if self.melspec is None:
Maria@4 56 self.melspec = melspec
Maria@4 57 if log:
Maria@4 58 self.melspec = librosa.logamplitude(self.melspec)
Maria@4 59 if medianfilt:
Maria@4 60 ks = int(0.1 * self.melsr) # 100ms kernel size
Maria@4 61 if ks % 2 == 0: # ks must be odd
Maria@4 62 ks += 1
Maria@4 63 nmels = self.melspec.shape[0]
Maria@4 64 for i in range(nmels):
Maria@4 65 self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks)
Maria@4 66 if sqrt:
Maria@4 67 self.melspec = self.melspec**.5
Maria@4 68 if diff:
Maria@39 69 # append one frame before diff to keep number of frames the same
Maria@39 70 self.melspec = np.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1)
Maria@39 71 self.melspec = np.diff(self.melspec, n=1, axis=1)
Maria@4 72 if subtractmean:
Maria@4 73 mean = self.melspec.mean(axis=1)
Maria@4 74 mean.shape = (mean.shape[0], 1)
Maria@4 75 self.melspec = self.melspec - mean
Maria@4 76 if halfwave:
Maria@39 77 self.melspec[np.where(self.melspec < 0)] = 0
Maria@4 78 if maxnormal:
Maria@4 79 if self.melspec.max() != 0: # avoid division by 0
Maria@4 80 self.melspec = self.melspec/self.melspec.max()
Maria@4 81
Maria@4 82
Maria@39 83 def onset_patterns(self, melspec=None, melsr=None, center=False, bpmrange=True):
Maria@4 84 if self.melspec is None:
Maria@4 85 self.melspec = melspec
Maria@4 86 if self.melsr is None:
Maria@4 87 self.melsr = melsr
Maria@39 88 win2 = int(round(self.win2sec * self.melsr))
Maria@39 89 hop2 = int(round(self.hop2sec * self.melsr))
Maria@4 90 nmels, nmelframes = self.melspec.shape
Maria@39 91
Maria@39 92 #nfft2 = int(2**np.ceil(np.log2(win2)))
Maria@39 93 nfft2 = 2048 # nfft2 does not depend on win2??
Maria@39 94 melspectemp = self.melspec
Maria@39 95 if ((nfft2 > win2) and (center is False)):
Maria@39 96 # in librosa version < 6.0, window is padded to the size of nfft
Maria@39 97 # so if win2<nfft2 the frames returned are less than expected
Maria@39 98 # solution: pad the signal by (nfft2-win2)/2 on the edges
Maria@39 99 # then frame decomposition (n_frames) matches the one expected using win2
Maria@39 100 melspectemp = np.concatenate([np.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, np.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1)
Maria@39 101 if melspectemp.shape[1]<nfft2:
Maria@39 102 # pad with zeros to have at least one 8-sec window
Maria@39 103 nzeros = nfft2 - melspectemp.shape[1]
Maria@39 104 melspectemp = np.concatenate([np.zeros((nmels,int(np.ceil(nzeros / 2.)))),melspectemp, np.zeros((nmels,int(np.ceil(nzeros / 2.))))],axis=1)
Maria@39 105 #melspectemp = np.concatenate([melspectemp, np.zeros((nmels,nzeros))],axis=1)
Maria@39 106 # nframes = int(round(np.ceil(round(nmelframes/hop2))))
Maria@39 107 ff0 = np.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
Maria@39 108 nframes = ff0.shape[1]
Maria@39 109 # nmags, nframes = ff0.shape
Maria@39 110 if bpmrange:
Maria@39 111 freqresinbpm = float(self.melsr)/float(nfft2/2.)*60.
Maria@39 112 minmag = int(np.floor(30./freqresinbpm)) # min tempo 30bpm
Maria@39 113 maxmag = int(np.ceil(960./freqresinbpm)) # max tempo 960 bpm
Maria@39 114 magsinds = range(minmag, maxmag)
Maria@39 115 else:
Maria@39 116 magsinds = range(0, 200)
Maria@39 117 nmags = len(magsinds)
Maria@39 118 fft2 = np.zeros((nmels, nmags, nframes))
Maria@39 119 for i in range(nmels):
Maria@39 120 fftmags = np.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
Maria@39 121 fftmags = fftmags[magsinds, :]
Maria@39 122 fft2[i, :, :] = fftmags
Maria@39 123 op = fft2
Maria@4 124 self.op = op
Maria@4 125
Maria@39 126
Maria@39 127 def post_process_op(self, medianfiltOP=True):
Maria@4 128 if medianfiltOP:
Maria@4 129 hop2 = int(round(0.5*self.melsr))
Maria@4 130 ssr = self.melsr/float(hop2)
Maria@4 131 ks = int(0.5 * ssr) # 100ms kernel size
Maria@4 132 if ks % 2 == 0: # ks must be odd
Maria@4 133 ks += 1
Maria@4 134 nmels, nmags, nframes = self.op.shape
Maria@4 135 for i in range(nmels):
Maria@4 136 for j in range(nframes):
Maria@4 137 #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks)
Maria@39 138 self.op[i,:,j] = np.convolve(self.op[i,:,j], np.ones(ks)/ks, mode='same')
Maria@39 139
Maria@4 140
Maria@4 141 def mellin_transform(self, op=None):
Maria@4 142 if self.op is None:
Maria@4 143 self.op = op
Maria@4 144 nmels, nmags, nframes = self.op.shape
Maria@4 145 #nmagsout = min(200, nmags)
Maria@4 146 nmagsout = 200
Maria@39 147 u_max = np.log(nmags)
Maria@39 148 delta_c = np.pi / u_max
Maria@4 149 c_max = nmagsout
Maria@39 150 c = np.arange(delta_c, c_max, delta_c)
Maria@4 151 k = range(1, nmags)
Maria@4 152 exponent = 0.5 - c*1j
Maria@4 153
Maria@39 154 normMat = 1./(exponent * np.sqrt(2*np.pi))
Maria@4 155 normMat.shape = (normMat.shape[0], 1)
Maria@39 156 normMat = np.repeat(normMat.T, nmels, axis=0)
Maria@39 157 kernelMat = np.asarray([np.power(ki, exponent) for ki in k])
Maria@39 158 opmellin = np.zeros((nmels, nmagsout, nframes))
Maria@4 159 for i in range(nframes):
Maria@4 160 self.op[:, -1, i] = 0
Maria@39 161 deltaMat = - np.diff(self.op[:, :, i])
Maria@39 162 mellin = np.abs(np.dot(deltaMat, kernelMat) * normMat)
Maria@4 163 opmellin[:, :, i] = mellin[:, :nmagsout]
Maria@4 164 self.opmellin = opmellin
Maria@4 165
Maria@4 166
Maria@4 167 def post_process_opmellin(self, opmellin=None, aveFreq=False, normFrame=True):
Maria@4 168 if self.opmellin is None:
Maria@4 169 self.opmellin = opmellin
Maria@4 170 if aveFreq:
Maria@39 171 self.opmellin = np.mean(self.opmellin, axis=0)
Maria@4 172 else: # just reshape
Maria@4 173 nmels, nmags, nframes = self.opmellin.shape
Maria@4 174 self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
Maria@4 175 if normFrame:
Maria@39 176 min_opmellin = np.amin(self.opmellin, axis=0, keepdims=True)
Maria@39 177 max_opmellin = np.amax(self.opmellin, axis=0, keepdims=True)
Maria@4 178 denom = max_opmellin - min_opmellin
Maria@4 179 denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
Maria@4 180 self.opmellin = (self.opmellin - min_opmellin) / denom
Maria@4 181
Maria@4 182
Maria@4 183 def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False):
Maria@4 184 self.load_audiofile(filename=filename)
Maria@4 185 self.mel_spectrogram()
Maria@4 186 self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec) # sqrt seems to work better
Maria@4 187 self.onset_patterns(logfilter=logfilter, center=center)
Maria@4 188 self.post_process_op(medianfiltOP=medianfiltOP)
Maria@4 189 self.mellin_transform()
Maria@4 190 self.post_process_opmellin()
Maria@4 191 return self.opmellin
Maria@4 192
Maria@39 193
Maria@4 194 def get_opmellin_from_melspec(self, melspec=[], melsr=[]):
Maria@4 195 self.melspec = melspec
Maria@4 196 self.melsr = melsr
Maria@4 197 self.post_process_spec(log=False, sqrt=True, medianfilt=True) # sqrt seems to work better
m@43 198 self.onset_patterns(center=False)
Maria@4 199 self.post_process_op(medianfiltOP=True)
Maria@4 200 self.mellin_transform()
Maria@4 201 self.post_process_opmellin()
Maria@4 202 return self.opmellin
Maria@4 203
Maria@39 204
Maria@4 205 if __name__ == '__main__':
Maria@4 206 op = OPMellin()
Maria@4 207 op.get_opmellin()