Mercurial > hg > plosone_underreview

--- a/scripts/OPMellin.py	Thu Sep 14 15:21:52 2017 +0100
+++ b/scripts/OPMellin.py	Thu Sep 14 16:30:41 2017 +0100
@@ -5,10 +5,10 @@
 @author: mariapanteli
 """

+import numpy as np
+import scipy.signal
 import librosa
-import scipy.signal
-# import matplotlib.pyplot as plt
-import numpy
+

 class OPMellin:
     def __init__(self, win2sec=8):
@@ -19,10 +19,11 @@
         self.op = None
         self.opmellin = None
         self.win2sec = win2sec
+        self.hop2sec = 0.5

-    def load_audiofile(self, filename='test.wav', sr=None, segment=True):
+
+    def load_audiofile(self, filename, sr=None, segment=True):
         self.y, self.sr = librosa.load(filename, sr=sr)
-        #self.y, self.sr = librosa.load(filename) # default sr=22050
         if segment:
             tracklength = self.y.shape[0]/float(self.sr)
             startSample = 0
@@ -34,7 +35,6 @@
             self.y = self.y[startSample:endSample]


-    # def mel_spectrogram(self, y=None, sr=None, filename='1_21.wav'):
     def mel_spectrogram(self, y=None, sr=None):
         if self.y is None:
             self.y = y
@@ -42,22 +42,16 @@
             self.sr = sr
         win1 = int(round(0.04*self.sr))
         hop1 = int(round(win1/8.))
-        nfft1 = int(2**numpy.ceil(numpy.log2(win1)))
+        nfft1 = int(2**np.ceil(np.log2(win1)))
         nmels = 40
-        # y, sr = librosa.load(filename, sr=fs)
-        # melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=nfft, win_length=win1, window=scipy.signal.hamming, hop_length=hop1, n_mels=nmels)
-        # melspec = librosa.feature.melspectrogram(y=self.y, sr=self.sr, n_fft=nfft, hop_length=hop1, n_mels=nmels)
-        D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2
-        #melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels)
+        D = np.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2
         melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000)
         melsr = self.sr/float(hop1)
-        # librosa.display.specshow(melspec)
-        # return melspec, melsr
         self.melspec = melspec
         self.melsr = melsr

-    # post process spectrogram
-    def post_process_spec(self, melspec=None, log=True, medianfilt=False, sqrt=False, smooth=False, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
+
+    def post_process_spec(self, melspec=None, log=True, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
         if self.melspec is None:
             self.melspec = melspec
         if log:
@@ -71,98 +65,66 @@
                 self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks)
         if sqrt:
             self.melspec = self.melspec**.5
-        if smooth:
-            step = 50.0
-            nmels = self.melspec.shape[0]
-            mm = self.melspec
-            for i in range(nmels):
-                mm[i,:] = numpy.convolve(mm[i,:], numpy.ones(step)/step, mode='same')
-            self.melspec = mm
         if diff:
-            self.melspec = numpy.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1)  # append one frame before diff to keep number of frames the same
-            self.melspec = numpy.diff(self.melspec, n=1, axis=1)
+            # append one frame before diff to keep number of frames the same
+            self.melspec = np.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1)
+            self.melspec = np.diff(self.melspec, n=1, axis=1)
         if subtractmean:
             mean = self.melspec.mean(axis=1)
             mean.shape = (mean.shape[0], 1)
             self.melspec = self.melspec - mean
         if halfwave:
-            self.melspec[numpy.where(self.melspec < 0)] = 0
+            self.melspec[np.where(self.melspec < 0)] = 0
         if maxnormal:
             if self.melspec.max() != 0:  # avoid division by 0
                 self.melspec = self.melspec/self.melspec.max()
-        # return melspec


-    def onset_patterns(self, melspec=None, melsr=None, fft=True, logfilter=False, center=True, bpmrange=True):
+    def onset_patterns(self, melspec=None, melsr=None, center=False, bpmrange=True):
         if self.melspec is None:
             self.melspec = melspec
         if self.melsr is None:
             self.melsr = melsr
-        win2 = int(round(self.win2sec*self.melsr))
-        hop2 = int(round(0.5*self.melsr))
+        win2 = int(round(self.win2sec * self.melsr))
+        hop2 = int(round(self.hop2sec * self.melsr))
         nmels, nmelframes = self.melspec.shape
-        if fft:  # periodicity via fft
-            #nfft2 = int(2**numpy.ceil(numpy.log2(win2)))
-            nfft2 = 2048  # nfft2 does not depend on win2??
-            melspectemp = self.melspec
-            if ((nfft2 > win2) and (center is False)):
-                # in librosa version < 6.0, window is padded to the size of nfft
-                # so if win2<nfft2 the frames returned are less than expected
-                # solution: pad the signal by (nfft2-win2)/2 on the edges
-                # then frame decomposition (n_frames) matches the one expected using win2
-                melspectemp = numpy.concatenate([numpy.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, numpy.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1)
-            if melspectemp.shape[1]<nfft2:
-                # pad with zeros to have at least one 8-sec window
-                nzeros = nfft2 - melspectemp.shape[1]
-                melspectemp = numpy.concatenate([numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.)))),melspectemp, numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.))))],axis=1)
-                #melspectemp = numpy.concatenate([melspectemp, numpy.zeros((nmels,nzeros))],axis=1)
-            # nframes = int(round(numpy.ceil(round(nmelframes/hop2))))
-            ff0 = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
-            nframes = ff0.shape[1]
-            # nmags, nframes = ff0.shape
-            if logfilter:  # log filterbank
-                nbins = 25
-                binsperoct = 5
-                fft2 = numpy.zeros((nmels, nbins, nframes))
-                logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=nbins, bins_per_octave=binsperoct, fmin=0.5)
-                # logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=25, bins_per_octave=5, fmin=0.5)
-                for i in range(nmels):
-                    fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
-                    fftmags = numpy.dot(logfb, fftmags)  # log
-                    fft2[i, :, :] = fftmags
-            else:
-                # nmags = min(round(nfft2/2), 200)
-                if bpmrange:
-                    # freqresinbpm = float(self.melsr)/float(nfft2)*60.
-                    freqresinbpm = float(self.melsr)/float(nfft2/2.)*60.
-                    minmag = int(numpy.floor(30./freqresinbpm))  # min tempo 30bpm
-                    maxmag = int(numpy.ceil(960./freqresinbpm))  # max tempo 960 bpm
-                    magsinds = range(minmag, maxmag)
-                else:
-                    magsinds = range(0, 200)
-                nmags = len(magsinds)
-                fft2 = numpy.zeros((nmels, nmags, nframes))
-                for i in range(nmels):
-                    fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
-                    fftmags = fftmags[magsinds, :]
-                    fft2[i, :, :] = fftmags
-            op = fft2
-        else:  # periodicity via autocorrelation
-            nmags = min(win2, 1000)
-            if nmelframes < win2:
-                nframes = 1
-            else:
-                nframes = int(nmelframes/float(win2))
-            autocor = numpy.zeros((nmels, nmags, nframes))
-            for i in range(nmels):
-                ac = numpy.zeros((nmags, nframes))
-                for j in range(nframes):
-                    ac[:, j] = librosa.autocorrelate(self.melspec[i, (j*win2):min((j+1)*win2, nmelframes)], max_size=nmags)
-                autocor[i, :, :] = ac
-            op = autocor
+
+        #nfft2 = int(2**np.ceil(np.log2(win2)))
+        nfft2 = 2048  # nfft2 does not depend on win2??
+        melspectemp = self.melspec
+        if ((nfft2 > win2) and (center is False)):
+            # in librosa version < 6.0, window is padded to the size of nfft
+            # so if win2<nfft2 the frames returned are less than expected
+            # solution: pad the signal by (nfft2-win2)/2 on the edges
+            # then frame decomposition (n_frames) matches the one expected using win2
+            melspectemp = np.concatenate([np.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, np.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1)
+        if melspectemp.shape[1]<nfft2:
+            # pad with zeros to have at least one 8-sec window
+            nzeros = nfft2 - melspectemp.shape[1]
+            melspectemp = np.concatenate([np.zeros((nmels,int(np.ceil(nzeros / 2.)))),melspectemp, np.zeros((nmels,int(np.ceil(nzeros / 2.))))],axis=1)
+            #melspectemp = np.concatenate([melspectemp, np.zeros((nmels,nzeros))],axis=1)
+        # nframes = int(round(np.ceil(round(nmelframes/hop2))))
+        ff0 = np.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
+        nframes = ff0.shape[1]
+        # nmags, nframes = ff0.shape
+        if bpmrange:
+            freqresinbpm = float(self.melsr)/float(nfft2/2.)*60.
+            minmag = int(np.floor(30./freqresinbpm))  # min tempo 30bpm
+            maxmag = int(np.ceil(960./freqresinbpm))  # max tempo 960 bpm
+            magsinds = range(minmag, maxmag)
+        else:
+            magsinds = range(0, 200)
+        nmags = len(magsinds)
+        fft2 = np.zeros((nmels, nmags, nframes))
+        for i in range(nmels):
+            fftmags = np.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
+            fftmags = fftmags[magsinds, :]
+            fft2[i, :, :] = fftmags
+        op = fft2
         self.op = op

-    def post_process_op(self, medianfiltOP=False):
+
+    def post_process_op(self, medianfiltOP=True):
         if medianfiltOP:
             hop2 = int(round(0.5*self.melsr))
             ssr = self.melsr/float(hop2)
@@ -173,7 +135,8 @@
             for i in range(nmels):
                 for j in range(nframes):
                     #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks)
-                    self.op[i,:,j] = numpy.convolve(self.op[i,:,j], numpy.ones(ks)/ks, mode='same')
+                    self.op[i,:,j] = np.convolve(self.op[i,:,j], np.ones(ks)/ks, mode='same')
+

     def mellin_transform(self, op=None):
         if self.op is None:
@@ -181,22 +144,22 @@
         nmels, nmags, nframes = self.op.shape
         #nmagsout = min(200, nmags)
         nmagsout = 200
-        u_max = numpy.log(nmags)
-        delta_c = numpy.pi / u_max
+        u_max = np.log(nmags)
+        delta_c = np.pi / u_max
         c_max = nmagsout
-        c = numpy.arange(delta_c, c_max, delta_c)
+        c = np.arange(delta_c, c_max, delta_c)
         k = range(1, nmags)
         exponent = 0.5 - c*1j

-        normMat = 1./(exponent * numpy.sqrt(2*numpy.pi))
+        normMat = 1./(exponent * np.sqrt(2*np.pi))
         normMat.shape = (normMat.shape[0], 1)
-        normMat = numpy.repeat(normMat.T, nmels, axis=0)
-        kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k])
-        opmellin = numpy.zeros((nmels, nmagsout, nframes))
+        normMat = np.repeat(normMat.T, nmels, axis=0)
+        kernelMat = np.asarray([np.power(ki, exponent) for ki in k])
+        opmellin = np.zeros((nmels, nmagsout, nframes))
         for i in range(nframes):
             self.op[:, -1, i] = 0
-            deltaMat = - numpy.diff(self.op[:, :, i])
-            mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat)
+            deltaMat = - np.diff(self.op[:, :, i])
+            mellin = np.abs(np.dot(deltaMat, kernelMat) * normMat)
             opmellin[:, :, i] = mellin[:, :nmagsout]
         self.opmellin = opmellin

@@ -205,13 +168,13 @@
         if self.opmellin is None:
             self.opmellin = opmellin
         if aveFreq:
-            self.opmellin = numpy.mean(self.opmellin, axis=0)
+            self.opmellin = np.mean(self.opmellin, axis=0)
         else:  # just reshape
             nmels, nmags, nframes = self.opmellin.shape
             self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
         if normFrame:
-            min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True)
-            max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True)
+            min_opmellin = np.amin(self.opmellin, axis=0, keepdims=True)
+            max_opmellin = np.amax(self.opmellin, axis=0, keepdims=True)
             denom = max_opmellin - min_opmellin
             denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
             self.opmellin = (self.opmellin - min_opmellin) / denom
@@ -220,7 +183,6 @@
     def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False):
         self.load_audiofile(filename=filename)
         self.mel_spectrogram()
-        #self.post_process_spec(medianfilt=medianfiltspec)
         self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec)  # sqrt seems to work better
         self.onset_patterns(logfilter=logfilter, center=center)
         self.post_process_op(medianfiltOP=medianfiltOP)
@@ -228,6 +190,7 @@
         self.post_process_opmellin()
         return self.opmellin

+
     def get_opmellin_from_melspec(self, melspec=[], melsr=[]):
         self.melspec = melspec
         self.melsr = melsr
@@ -238,6 +201,7 @@
         self.post_process_opmellin()
         return self.opmellin

+
 if __name__ == '__main__':
     op = OPMellin()
     op.get_opmellin()
--- a/scripts/PitchBihist.py	Thu Sep 14 15:21:52 2017 +0100
+++ b/scripts/PitchBihist.py	Thu Sep 14 16:30:41 2017 +0100
@@ -8,8 +8,6 @@
 import os
 import scipy.signal

-import smoothiecore as s
-

 class PitchBihist:
     def __init__(self, win2sec=8):
@@ -91,6 +89,18 @@
         return B


+    def second_frame_decomposition(self, norigframes):
+        win2 = int(round(self.win2sec * self.melody_sr))
+        hop2 = int(round(self.hop2sec * self.melody_sr))
+        print win2, hop2, norigframes
+        if norigframes<=win2:
+            nframes = 1
+            win2 = norigframes
+        else:
+            nframes = np.int(1+np.floor((norigframes-win2)/float(hop2)))
+        return nframes, win2, hop2
+
+
     def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None):
         melody = self.get_melody_from_file(filename, stop_sec=stop_sec)
         if len(melody) == 0:
@@ -99,13 +109,7 @@
         bihist = []
         if secondframedecomp:
             nbins, norigframes = melody_matrix.shape
-            win2 = int(round(self.win2sec * self.melody_sr))
-            hop2 = int(round(self.hop2sec * self.melody_sr))
-            if norigframes<=win2:
-                nframes = 1
-                win2 = norigframes
-            else:
-                nframes = int(np.ceil((norigframes-win2)/float(hop2)))
+            nframes, win2, hop2 = self.second_frame_decomposition(norigframes)
             bihistframes = np.empty((nbins*nbins, nframes))
             for i in range(nframes):  # loop over all 8-sec frames
                 frame = melody_matrix[:, (i*hop2):(i*hop2+win2)]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_OPMellin.py	Thu Sep 14 16:30:41 2017 +0100
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep  1 19:11:52 2017
+
+@author: mariapanteli
+"""
+
+import pytest
+
+import numpy as np
+
+import scripts.OPMellin as OPMellin
+
+
+opm = OPMellin.OPMellin()
+
+
+def test_load_audiofile():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    assert opm.y is not None and opm.sr is not None
+
+
+def test_mel_spectrogram():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    opm.mel_spectrogram(y=opm.y, sr=opm.sr)
+    # assume 40 mel bands
+    assert opm.melspec.shape[0] == 40
+
+
+def test_post_process_spec():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    opm.mel_spectrogram(y=opm.y, sr=opm.sr)
+    melspec = opm.melspec
+    opm.post_process_spec(melspec=melspec)
+    proc_melspec = opm.melspec
+    assert melspec.shape == proc_melspec.shape
+
+
+def test_onset_patterns_n_frames():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    opm.mel_spectrogram(y=opm.y, sr=opm.sr)
+    opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr)
+    assert opm.op.shape[2] == np.round(((opm.melspec.shape[1] / opm.melsr) - opm.win2sec) * 2.)
+
+
+def test_onset_patterns_n_bins():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    opm.mel_spectrogram(y=opm.y, sr=opm.sr)
+    opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr)
+    assert opm.op.shape[0] == 40
+
+
+def test_post_process_op():
+    audiofile = 'data/sample_dataset/Audio/mel_1_2_1.wav'
+    opm.load_audiofile(audiofile, segment=False)
+    opm.mel_spectrogram(y=opm.y, sr=opm.sr)
+    opm.onset_patterns(melspec=opm.melspec, melsr=opm.melsr)
+    op = opm.op
+    opm.post_process_op()
+    proc_op = opm.op
+    assert op.shape == proc_op.shape
\ No newline at end of file
--- a/tests/test_PitchBihist.py	Thu Sep 14 15:21:52 2017 +0100
+++ b/tests/test_PitchBihist.py	Thu Sep 14 16:30:41 2017 +0100
@@ -42,6 +42,19 @@
     assert np.array_equal(melody_matrix[45, :], np.ones(n_frames))


+def test_second_frame_decomposition():
+    norigframes = 100
+    nframes, win2, hop2 = pbi.second_frame_decomposition(norigframes)
+    assert nframes == 1
+
+
+def test_second_frame_decomposition():
+    norigframes = np.ceil(pbi.melody_sr * 16.)
+    nframes, win2, hop2 = pbi.second_frame_decomposition(norigframes)
+    # for 16-sec segment with .5 hop size expect ~16 frames round up
+    assert nframes == 17
+
+
 def test_bihist_from_melodia():
     melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv'
     bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=False)