Mercurial > hg > plosone_underreview
view scripts/OPMellin.py @ 37:2cc444441f42 branch-tests
refactor pitchbihist and introduce default melody_sr
author | Maria Panteli |
---|---|
date | Thu, 14 Sep 2017 14:50:00 +0100 |
parents | e50c63cf96be |
children | e5e8e8a96948 |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Aug 17 13:37:00 2015 @author: mariapanteli """ import librosa import scipy.signal # import matplotlib.pyplot as plt import numpy class OPMellin: def __init__(self, win2sec=8): self.y = None self.sr = None self.melspec = None self.melsr = None self.op = None self.opmellin = None self.win2sec = win2sec def load_audiofile(self, filename='test.wav', sr=None, segment=True): self.y, self.sr = librosa.load(filename, sr=sr) #self.y, self.sr = librosa.load(filename) # default sr=22050 if segment: tracklength = self.y.shape[0]/float(self.sr) startSample = 0 endSample = None if tracklength > 90: startPointSec = (tracklength/2.)-20 startSample = round(startPointSec*self.sr) endSample = startSample+45*self.sr self.y = self.y[startSample:endSample] # def mel_spectrogram(self, y=None, sr=None, filename='1_21.wav'): def mel_spectrogram(self, y=None, sr=None): if self.y is None: self.y = y if self.sr is None: self.sr = sr win1 = int(round(0.04*self.sr)) hop1 = int(round(win1/8.)) nfft1 = int(2**numpy.ceil(numpy.log2(win1))) nmels = 40 # y, sr = librosa.load(filename, sr=fs) # melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=nfft, win_length=win1, window=scipy.signal.hamming, hop_length=hop1, n_mels=nmels) # melspec = librosa.feature.melspectrogram(y=self.y, sr=self.sr, n_fft=nfft, hop_length=hop1, n_mels=nmels) D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2 #melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels) melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000) melsr = self.sr/float(hop1) # librosa.display.specshow(melspec) # return melspec, melsr self.melspec = melspec self.melsr = melsr # post process spectrogram def post_process_spec(self, melspec=None, log=True, medianfilt=False, sqrt=False, smooth=False, diff=True, subtractmean=True, halfwave=True, maxnormal=True): if self.melspec is None: self.melspec = melspec if log: self.melspec = librosa.logamplitude(self.melspec) if medianfilt: ks = int(0.1 * self.melsr) # 100ms kernel size if ks % 2 == 0: # ks must be odd ks += 1 nmels = self.melspec.shape[0] for i in range(nmels): self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks) if sqrt: self.melspec = self.melspec**.5 if smooth: step = 50.0 nmels = self.melspec.shape[0] mm = self.melspec for i in range(nmels): mm[i,:] = numpy.convolve(mm[i,:], numpy.ones(step)/step, mode='same') self.melspec = mm if diff: self.melspec = numpy.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1) # append one frame before diff to keep number of frames the same self.melspec = numpy.diff(self.melspec, n=1, axis=1) if subtractmean: mean = self.melspec.mean(axis=1) mean.shape = (mean.shape[0], 1) self.melspec = self.melspec - mean if halfwave: self.melspec[numpy.where(self.melspec < 0)] = 0 if maxnormal: if self.melspec.max() != 0: # avoid division by 0 self.melspec = self.melspec/self.melspec.max() # return melspec def onset_patterns(self, melspec=None, melsr=None, fft=True, logfilter=False, center=True, bpmrange=True): if self.melspec is None: self.melspec = melspec if self.melsr is None: self.melsr = melsr win2 = int(round(self.win2sec*self.melsr)) hop2 = int(round(0.5*self.melsr)) nmels, nmelframes = self.melspec.shape if fft: # periodicity via fft #nfft2 = int(2**numpy.ceil(numpy.log2(win2))) nfft2 = 2048 # nfft2 does not depend on win2?? melspectemp = self.melspec if ((nfft2 > win2) and (center is False)): # pad the signal by nfft2-win2 so that frame decomposition (n_frames) # is still based on win2 and not nfft2 melspectemp = numpy.concatenate([numpy.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, numpy.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1) if melspectemp.shape[1]<nfft2: # pad with zeros to have at least one 8-sec window nzeros = nfft2 - melspectemp.shape[1] melspectemp = numpy.concatenate([numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.)))),melspectemp, numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.))))],axis=1) #melspectemp = numpy.concatenate([melspectemp, numpy.zeros((nmels,nzeros))],axis=1) # nframes = int(round(numpy.ceil(round(nmelframes/hop2)))) ff0 = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) nframes = ff0.shape[1] # nmags, nframes = ff0.shape if logfilter: # log filterbank nbins = 25 binsperoct = 5 fft2 = numpy.zeros((nmels, nbins, nframes)) logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=nbins, bins_per_octave=binsperoct, fmin=0.5) # logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=25, bins_per_octave=5, fmin=0.5) for i in range(nmels): fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) fftmags = numpy.dot(logfb, fftmags) # log fft2[i, :, :] = fftmags else: # nmags = min(round(nfft2/2), 200) if bpmrange: # freqresinbpm = float(self.melsr)/float(nfft2)*60. freqresinbpm = float(self.melsr)/float(nfft2/2.)*60. minmag = int(numpy.floor(30./freqresinbpm)) # min tempo 30bpm maxmag = int(numpy.ceil(960./freqresinbpm)) # max tempo 960 bpm magsinds = range(minmag, maxmag) else: magsinds = range(0, 200) nmags = len(magsinds) fft2 = numpy.zeros((nmels, nmags, nframes)) for i in range(nmels): fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) fftmags = fftmags[magsinds, :] fft2[i, :, :] = fftmags op = fft2 else: # periodicity via autocorrelation nmags = min(win2, 1000) if nmelframes < win2: nframes = 1 else: nframes = int(nmelframes/float(win2)) autocor = numpy.zeros((nmels, nmags, nframes)) for i in range(nmels): ac = numpy.zeros((nmags, nframes)) for j in range(nframes): ac[:, j] = librosa.autocorrelate(self.melspec[i, (j*win2):min((j+1)*win2, nmelframes)], max_size=nmags) autocor[i, :, :] = ac op = autocor self.op = op def post_process_op(self, medianfiltOP=False): if medianfiltOP: hop2 = int(round(0.5*self.melsr)) ssr = self.melsr/float(hop2) ks = int(0.5 * ssr) # 100ms kernel size if ks % 2 == 0: # ks must be odd ks += 1 nmels, nmags, nframes = self.op.shape for i in range(nmels): for j in range(nframes): #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks) self.op[i,:,j] = numpy.convolve(self.op[i,:,j], numpy.ones(ks)/ks, mode='same') def mellin_transform(self, op=None): if self.op is None: self.op = op nmels, nmags, nframes = self.op.shape #nmagsout = min(200, nmags) nmagsout = 200 u_max = numpy.log(nmags) delta_c = numpy.pi / u_max c_max = nmagsout c = numpy.arange(delta_c, c_max, delta_c) k = range(1, nmags) exponent = 0.5 - c*1j normMat = 1./(exponent * numpy.sqrt(2*numpy.pi)) normMat.shape = (normMat.shape[0], 1) normMat = numpy.repeat(normMat.T, nmels, axis=0) kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k]) opmellin = numpy.zeros((nmels, nmagsout, nframes)) for i in range(nframes): self.op[:, -1, i] = 0 deltaMat = - numpy.diff(self.op[:, :, i]) mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat) opmellin[:, :, i] = mellin[:, :nmagsout] self.opmellin = opmellin def post_process_opmellin(self, opmellin=None, aveFreq=False, normFrame=True): if self.opmellin is None: self.opmellin = opmellin if aveFreq: self.opmellin = numpy.mean(self.opmellin, axis=0) else: # just reshape nmels, nmags, nframes = self.opmellin.shape self.opmellin = self.opmellin.reshape((nmels*nmags, nframes)) if normFrame: min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True) max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True) denom = max_opmellin - min_opmellin denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent self.opmellin = (self.opmellin - min_opmellin) / denom def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False): self.load_audiofile(filename=filename) self.mel_spectrogram() #self.post_process_spec(medianfilt=medianfiltspec) self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec) # sqrt seems to work better self.onset_patterns(logfilter=logfilter, center=center) self.post_process_op(medianfiltOP=medianfiltOP) self.mellin_transform() self.post_process_opmellin() return self.opmellin def get_opmellin_from_melspec(self, melspec=[], melsr=[]): self.melspec = melspec self.melsr = melsr self.post_process_spec(log=False, sqrt=True, medianfilt=True) # sqrt seems to work better self.onset_patterns(logfilter=False, center=False) self.post_process_op(medianfiltOP=True) self.mellin_transform() self.post_process_opmellin() return self.opmellin if __name__ == '__main__': op = OPMellin() op.get_opmellin()