Mercurial > hg > plosone_underreview
changeset 39:0e70021f251e branch-tests
some tests on OPmellin
author | Maria Panteli |
---|---|
date | Thu, 14 Sep 2017 16:30:41 +0100 |
parents | e5e8e8a96948 |
children | e6e10013e11c |
files | scripts/OPMellin.py scripts/PitchBihist.py tests/test_OPMellin.py tests/test_PitchBihist.py |
diffstat | 4 files changed, 160 insertions(+), 113 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/OPMellin.py Thu Sep 14 15:21:52 2017 +0100 +++ b/scripts/OPMellin.py Thu Sep 14 16:30:41 2017 +0100 @@ -5,10 +5,10 @@ @author: mariapanteli """ +import numpy as np +import scipy.signal import librosa -import scipy.signal -# import matplotlib.pyplot as plt -import numpy + class OPMellin: def __init__(self, win2sec=8): @@ -19,10 +19,11 @@ self.op = None self.opmellin = None self.win2sec = win2sec + self.hop2sec = 0.5 - def load_audiofile(self, filename='test.wav', sr=None, segment=True): + + def load_audiofile(self, filename, sr=None, segment=True): self.y, self.sr = librosa.load(filename, sr=sr) - #self.y, self.sr = librosa.load(filename) # default sr=22050 if segment: tracklength = self.y.shape[0]/float(self.sr) startSample = 0 @@ -34,7 +35,6 @@ self.y = self.y[startSample:endSample] - # def mel_spectrogram(self, y=None, sr=None, filename='1_21.wav'): def mel_spectrogram(self, y=None, sr=None): if self.y is None: self.y = y @@ -42,22 +42,16 @@ self.sr = sr win1 = int(round(0.04*self.sr)) hop1 = int(round(win1/8.)) - nfft1 = int(2**numpy.ceil(numpy.log2(win1))) + nfft1 = int(2**np.ceil(np.log2(win1))) nmels = 40 - # y, sr = librosa.load(filename, sr=fs) - # melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=nfft, win_length=win1, window=scipy.signal.hamming, hop_length=hop1, n_mels=nmels) - # melspec = librosa.feature.melspectrogram(y=self.y, sr=self.sr, n_fft=nfft, hop_length=hop1, n_mels=nmels) - D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2 - #melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels) + D = np.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2 melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000) melsr = self.sr/float(hop1) - # librosa.display.specshow(melspec) - # return melspec, melsr self.melspec = melspec self.melsr = melsr - # post process spectrogram - def post_process_spec(self, melspec=None, log=True, medianfilt=False, sqrt=False, smooth=False, diff=True, subtractmean=True, halfwave=True, maxnormal=True): + + def post_process_spec(self, melspec=None, log=True, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True): if self.melspec is None: self.melspec = melspec if log: @@ -71,98 +65,66 @@ self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks) if sqrt: self.melspec = self.melspec**.5 - if smooth: - step = 50.0 - nmels = self.melspec.shape[0] - mm = self.melspec - for i in range(nmels): - mm[i,:] = numpy.convolve(mm[i,:], numpy.ones(step)/step, mode='same') - self.melspec = mm if diff: - self.melspec = numpy.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1) # append one frame before diff to keep number of frames the same - self.melspec = numpy.diff(self.melspec, n=1, axis=1) + # append one frame before diff to keep number of frames the same + self.melspec = np.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1) + self.melspec = np.diff(self.melspec, n=1, axis=1) if subtractmean: mean = self.melspec.mean(axis=1) mean.shape = (mean.shape[0], 1) self.melspec = self.melspec - mean if halfwave: - self.melspec[numpy.where(self.melspec < 0)] = 0 + self.melspec[np.where(self.melspec < 0)] = 0 if maxnormal: if self.melspec.max() != 0: # avoid division by 0 self.melspec = self.melspec/self.melspec.max() - # return melspec - def onset_patterns(self, melspec=None, melsr=None, fft=True, logfilter=False, center=True, bpmrange=True): + def onset_patterns(self, melspec=None, melsr=None, center=False, bpmrange=True): if self.melspec is None: self.melspec = melspec if self.melsr is None: self.melsr = melsr - win2 = int(round(self.win2sec*self.melsr)) - hop2 = int(round(0.5*self.melsr)) + win2 = int(round(self.win2sec * self.melsr)) + hop2 = int(round(self.hop2sec * self.melsr)) nmels, nmelframes = self.melspec.shape - if fft: # periodicity via fft - #nfft2 = int(2**numpy.ceil(numpy.log2(win2))) - nfft2 = 2048 # nfft2 does not depend on win2?? - melspectemp = self.melspec - if ((nfft2 > win2) and (center is False)): - # in librosa version < 6.0, window is padded to the size of nfft - # so if win2<nfft2 the frames returned are less than expected - # solution: pad the signal by (nfft2-win2)/2 on the edges - # then frame decomposition (n_frames) matches the one expected using win2 - melspectemp = numpy.concatenate([numpy.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, numpy.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1) - if melspectemp.shape[1]<nfft2: - # pad with zeros to have at least one 8-sec window - nzeros = nfft2 - melspectemp.shape[1] - melspectemp = numpy.concatenate([numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.)))),melspectemp, numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.))))],axis=1) - #melspectemp = numpy.concatenate([melspectemp, numpy.zeros((nmels,nzeros))],axis=1) - # nframes = int(round(numpy.ceil(round(nmelframes/hop2)))) - ff0 = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) - nframes = ff0.shape[1] - # nmags, nframes = ff0.shape - if logfilter: # log filterbank - nbins = 25 - binsperoct = 5 - fft2 = numpy.zeros((nmels, nbins, nframes)) - logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=nbins, bins_per_octave=binsperoct, fmin=0.5) - # logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=25, bins_per_octave=5, fmin=0.5) - for i in range(nmels): - fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) - fftmags = numpy.dot(logfb, fftmags) # log - fft2[i, :, :] = fftmags - else: - # nmags = min(round(nfft2/2), 200) - if bpmrange: - # freqresinbpm = float(self.melsr)/float(nfft2)*60. - freqresinbpm = float(self.melsr)/float(nfft2/2.)*60. - minmag = int(numpy.floor(30./freqresinbpm)) # min tempo 30bpm - maxmag = int(numpy.ceil(960./freqresinbpm)) # max tempo 960 bpm - magsinds = range(minmag, maxmag) - else: - magsinds = range(0, 200) - nmags = len(magsinds) - fft2 = numpy.zeros((nmels, nmags, nframes)) - for i in range(nmels): - fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) - fftmags = fftmags[magsinds, :] - fft2[i, :, :] = fftmags - op = fft2 - else: # periodicity via autocorrelation - nmags = min(win2, 1000) - if nmelframes < win2: - nframes = 1 - else: - nframes = int(nmelframes/float(win2)) - autocor = numpy.zeros((nmels, nmags, nframes)) - for i in range(nmels): - ac = numpy.zeros((nmags, nframes)) - for j in range(nframes): - ac[:, j] = librosa.autocorrelate(self.melspec[i, (j*win2):min((j+1)*win2, nmelframes)], max_size=nmags) - autocor[i, :, :] = ac - op = autocor + + #nfft2 = int(2**np.ceil(np.log2(win2))) + nfft2 = 2048 # nfft2 does not depend on win2?? + melspectemp = self.melspec + if ((nfft2 > win2) and (center is False)): + # in librosa version < 6.0, window is padded to the size of nfft + # so if win2<nfft2 the frames returned are less than expected + # solution: pad the signal by (nfft2-win2)/2 on the edges + # then frame decomposition (n_frames) matches the one expected using win2 + melspectemp = np.concatenate([np.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, np.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1) + if melspectemp.shape[1]<nfft2: + # pad with zeros to have at least one 8-sec window + nzeros = nfft2 - melspectemp.shape[1] + melspectemp = np.concatenate([np.zeros((nmels,int(np.ceil(nzeros / 2.)))),melspectemp, np.zeros((nmels,int(np.ceil(nzeros / 2.))))],axis=1) + #melspectemp = np.concatenate([melspectemp, np.zeros((nmels,nzeros))],axis=1) + # nframes = int(round(np.ceil(round(nmelframes/hop2)))) + ff0 = np.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) + nframes = ff0.shape[1] + # nmags, nframes = ff0.shape + if bpmrange: + freqresinbpm = float(self.melsr)/float(nfft2/2.)*60. + minmag = int(np.floor(30./freqresinbpm)) # min tempo 30bpm + maxmag = int(np.ceil(960./freqresinbpm)) # max tempo 960 bpm + magsinds = range(minmag, maxmag) + else: + magsinds = range(0, 200) + nmags = len(magsinds) + fft2 = np.zeros((nmels, nmags, nframes)) + for i in range(nmels): + fftmags = np.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center)) + fftmags = fftmags[magsinds, :] + fft2[i, :, :] = fftmags + op = fft2 self.op = op - def post_process_op(self, medianfiltOP=False): + + def post_process_op(self, medianfiltOP=True): if medianfiltOP: hop2 = int(round(0.5*self.melsr)) ssr = self.melsr/float(hop2) @@ -173,7 +135,8 @@ for i in range(nmels): for j in range(nframes): #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks) - self.op[i,:,j] = numpy.convolve(self.op[i,:,j], numpy.ones(ks)/ks, mode='same') + self.op[i,:,j] = np.convolve(self.op[i,:,j], np.ones(ks)/ks, mode='same') + def mellin_transform(self, op=None): if self.op is None: @@ -181,22 +144,22 @@ nmels, nmags, nframes = self.op.shape #nmagsout = min(200, nmags) nmagsout = 200 - u_max = numpy.log(nmags) - delta_c = numpy.pi / u_max + u_max = np.log(nmags) + delta_c = np.pi / u_max c_max = nmagsout - c = numpy.arange(delta_c, c_max, delta_c) + c = np.arange(delta_c, c_max, delta_c) k = range(1, nmags) exponent = 0.5 - c*1j - normMat = 1./(exponent * numpy.sqrt(2*numpy.pi)) + normMat = 1./(exponent * np.sqrt(2*np.pi)) normMat.shape = (normMat.shape[0], 1) - normMat = numpy.repeat(normMat.T, nmels, axis=0) - kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k]) - opmellin = numpy.zeros((nmels, nmagsout, nframes)) + normMat = np.repeat(normMat.T, nmels, axis=0) + kernelMat = np.asarray([np.power(ki, exponent) for ki in k]) + opmellin = np.zeros((nmels, nmagsout, nframes)) for i in range(nframes): self.op[:, -1, i] = 0 - deltaMat = - numpy.diff(self.op[:, :, i]) - mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat) + deltaMat = - np.diff(self.op[:, :, i]) + mellin = np.abs(np.dot(deltaMat, kernelMat) * normMat) opmellin[:, :, i] = mellin[:, :nmagsout] self.opmellin = opmellin @@ -205,13 +168,13 @@ if self.opmellin is None: self.opmellin = opmellin if aveFreq: - self.opmellin = numpy.mean(self.opmellin, axis=0) + self.opmellin = np.mean(self.opmellin, axis=0) else: # just reshape nmels, nmags, nframes = self.opmellin.shape self.opmellin = self.opmellin.reshape((nmels*nmags, nframes)) if normFrame: - min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True) - max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True) + min_opmellin = np.amin(self.opmellin, axis=0, keepdims=True) + max_opmellin = np.amax(self.opmellin, axis=0, keepdims=True) denom = max_opmellin - min_opmellin denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent self.opmellin = (self.opmellin - min_opmellin) / denom @@ -220,7 +183,6 @@ def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False): self.load_audiofile(filename=filename) self.mel_spectrogram() - #self.post_process_spec(medianfilt=medianfiltspec) self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec) # sqrt seems to work better self.onset_patterns(logfilter=logfilter, center=center) self.post_process_op(medianfiltOP=medianfiltOP) @@ -228,6 +190,7 @@ self.post_process_opmellin() return self.opmellin + def get_opmellin_from_melspec(self, melspec=[], melsr=[]): self.melspec = melspec self.melsr = melsr @@ -238,6 +201,7 @@ self.post_process_opmellin() return self.opmellin + if __name__ == '__main__': op = OPMellin() op.get_opmellin()
--- a/scripts/PitchBihist.py Thu Sep 14 15:21:52 2017 +0100 +++ b/scripts/PitchBihist.py Thu Sep 14 16:30:41 2017 +0100 @@ -8,8 +8,6 @@ import os import scipy.signal -import smoothiecore as s - class PitchBihist: def __init__(self, win2sec=8): @@ -91,6 +89,18 @@ return B + def second_frame_decomposition(self, norigframes): + win2 = int(round(self.win2sec * self.melody_sr)) + hop2 = int(round(self.hop2sec * self.melody_sr)) + print win2, hop2, norigframes + if norigframes<=win2: + nframes = 1 + win2 = norigframes + else: + nframes = np.int(1+np.floor((norigframes-win2)/float(hop2))) + return nframes, win2, hop2 + + def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None): melody = self.get_melody_from_file(filename, stop_sec=stop_sec) if len(melody) == 0: @@ -99,13 +109,7 @@ bihist = [] if secondframedecomp: nbins, norigframes = melody_matrix.shape - win2 = int(round(self.win2sec * self.melody_sr)) - hop2 = int(round(self.hop2sec * self.melody_sr)) - if norigframes<=win2: - nframes = 1 - win2 = norigframes - else: - nframes = int(np.ceil((norigframes-win2)/float(hop2))) + nframes, win2, hop2 = self.second_frame_decomposition(norigframes) bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = melody_matrix[:, (i*hop2):(i*hop2+win2)]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_OPMellin.py Thu Sep 14 16:30:41 2017 +0100 @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 1 19:11:52 2017 + +@author: mariapanteli +""" + +import pytest + +import numpy as np + +import scripts.OPMellin as OPMellin + + +opm = OPMellin.OPMellin() + + +def test_load_audiofile(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + assert opm.y is not None and opm.sr is not None + + +def test_mel_spectrogram(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + opm.mel_spectrogram(y=opm.y, sr=opm.sr) + # assume 40 mel bands + assert opm.melspec.shape[0] == 40 + + +def test_post_process_spec(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + opm.mel_spectrogram(y=opm.y, sr=opm.sr) + melspec = opm.melspec + opm.post_process_spec(melspec=melspec) + proc_melspec = opm.melspec + assert melspec.shape == proc_melspec.shape + + +def test_onset_patterns_n_frames(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + opm.mel_spectrogram(y=opm.y, sr=opm.sr) + opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr) + assert opm.op.shape[2] == np.round(((opm.melspec.shape[1] / opm.melsr) - opm.win2sec) * 2.) + + +def test_onset_patterns_n_bins(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + opm.mel_spectrogram(y=opm.y, sr=opm.sr) + opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr) + assert opm.op.shape[0] == 40 + + +def test_post_process_op(): + audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav' + opm.load_audiofile(audiofile, segment=False) + opm.mel_spectrogram(y=opm.y, sr=opm.sr) + opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr) + op = opm.op + opm.post_process_op() + proc_op = opm.op + assert op.shape == proc_op.shape \ No newline at end of file
--- a/tests/test_PitchBihist.py Thu Sep 14 15:21:52 2017 +0100 +++ b/tests/test_PitchBihist.py Thu Sep 14 16:30:41 2017 +0100 @@ -42,6 +42,19 @@ assert np.array_equal(melody_matrix[45, :], np.ones(n_frames)) +def test_second_frame_decomposition(): + norigframes = 100 + nframes, win2, hop2 = pbi.second_frame_decomposition(norigframes) + assert nframes == 1 + + +def test_second_frame_decomposition(): + norigframes = np.ceil(pbi.melody_sr * 16.) + nframes, win2, hop2 = pbi.second_frame_decomposition(norigframes) + # for 16-sec segment with .5 hop size expect ~16 frames round up + assert nframes == 17 + + def test_bihist_from_melodia(): melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv' bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=False)