Mercurial > hg > plosone_underreview

--- a/scripts/PitchBihist.py	Thu Sep 14 13:07:19 2017 +0100
+++ b/scripts/PitchBihist.py	Thu Sep 14 14:16:29 2017 +0100
@@ -4,10 +4,11 @@

 @author: mariapanteli
 """
+import numpy as np
+import os
+import scipy.signal
+
 import smoothiecore as s
-import numpy
-import scipy.signal
-# import librosa


 class PitchBihist:
@@ -16,9 +17,109 @@
         self.sr = None
         self.chroma = None
         self.chromasr = None
+        self.melodiasr = 44100. / 128.
         self.bihist = None
         self.win2sec = win2sec

+
+    def hz_to_cents(self, freq_Hz, ref_Hz=32.703, n_cents=1200):
+        """ convert frequency values from Hz to cents
+            reference frequency at C1
+        """
+        freq_cents = np.round(n_cents * np.log2(freq_Hz/ref_Hz))
+        return freq_cents
+
+
+    def wrap_to_octave(self, cents, octave_length=1200):
+        """ wrap to a single octave 0-1200
+        """
+        octave_cents = cents % octave_length
+        return octave_cents
+
+
+    def get_melody_from_file(self, melodia_file, stop_sec=None):
+        if not os.path.exists(melodia_file):
+            return []
+        data = np.loadtxt(melodia_file, delimiter=',')
+        times, freqs = (data[:, 0], data[:, 1])
+        self.chromasr = 1. / (times[1] - times[0])
+        if stop_sec is not None:
+            stop_idx = np.where(times < stop_sec)[0]
+            times, freqs = times[stop_idx], freqs[stop_idx]
+        freqs[freqs<=0] = np.nan
+        melody = freqs
+        return melody
+
+
+    def get_melody_matrix(self, melody):
+        n_bins = 60
+        n_frames = len(melody)
+        melody_cents = self.hz_to_cents(melody, n_cents=n_bins)
+        melody_octave = self.wrap_to_octave(melody_cents, octave_length=n_bins)
+        melody_matrix = np.zeros((n_bins, n_frames))
+        for time, pitch in enumerate(melody_octave):
+            if not np.isnan(pitch):
+                melody_matrix[int(pitch), time] = 1
+        return melody_matrix
+
+
+    def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None):
+        melody = self.get_melody_from_file(filename, stop_sec=stop_sec)
+        if len(melody) == 0:
+            self.bihist = []
+            return self.bihist
+        melody_matrix = self.get_melody_matrix(melody)
+        if secondframedecomp:
+            nbins, norigframes = melody_matrix.shape
+            win2 = int(round(self.win2sec*self.chromasr))
+            hop2 = int(round(0.5*self.chromasr))
+            if norigframes<=win2:
+                nframes = 1
+                win2 = norigframes
+            else:
+                nframes = int(np.ceil((norigframes-win2)/float(hop2)))
+            bihistframes = np.empty((nbins*nbins, nframes))
+            for i in range(nframes):  # loop over all 8-sec frames
+                frame = melody_matrix[:, (i*hop2):(i*hop2+win2)]
+                bihist = self.bihistogram(frame)
+                bihist = np.reshape(bihist, -1)
+                bihistframes[:, i] = bihist
+            self.bihist = bihistframes
+        else:
+            self.bihist = self.bihistogram(melody_matrix)
+        return self.bihist
+
+
+    def bihistogram(self, spec, winsec=0.5, align=True):
+        win = int(round(winsec*self.chromasr))
+        ker = np.concatenate([np.zeros((win, 1)), np.ones((win+1, 1))], axis=0)
+        spec = spec.T  # transpose to have franes as rows in convolution
+
+        # energy threshold
+        thr = 0.3*np.max(spec)
+        spec[spec < max(thr, 0)] = 0
+
+        # transitions via convolution
+        tra = scipy.signal.convolve2d(spec, ker, mode='same')
+        tra[spec > 0] = 0
+
+        # multiply with original
+        B = np.dot(tra.T, spec)
+
+        # normalize to [0, 1]
+        mxB = np.max(B)
+        mnB = np.min(B)
+        if mxB != mnB:
+            B = (B - mnB)/float(mxB-mnB)
+
+        # circshift to highest?
+        if align:
+            ref = np.argmax(np.sum(spec, axis=0))
+            B = np.roll(B, -ref, axis=0)
+            B = np.roll(B, -ref, axis=1)
+        return B
+
+
     def bihist_from_chroma(self, filename='test.wav', secondframedecomp=True):
         self.chroma, self.chromasr = s.get_smoothie_for_bihist(filename=filename, hopinsec=0.005)
         if secondframedecomp:
@@ -28,96 +129,17 @@
             if norigframes<win2:
                 nframes = 1
             else:
-                nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
-            bihistframes = numpy.empty((nbins*nbins, nframes))
+                nframes = int(1+np.floor((norigframes-win2)/float(hop2)))
+            bihistframes = np.empty((nbins*nbins, nframes))
             for i in range(nframes):  # loop over all 8-sec frames
                 frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)]
                 bihist = self.bihistogram(frame)
-                bihist = numpy.reshape(bihist, -1)
+                bihist = np.reshape(bihist, -1)
                 bihistframes[:, i] = bihist
             self.bihist = bihistframes
         else:
-            self.bihist = numpy.reshape(self.bihistogram(), -1)
+            self.bihist = np.reshape(self.bihistogram(), -1)

-    def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None):
-        def hz_to_cents(freq_Hz, ref_Hz=32.703, n_cents=1200):
-            """ convert frequency values from Hz to cents
-                reference frequency at C1
-            """
-            freq_cents = numpy.round(n_cents * numpy.log2(freq_Hz/ref_Hz))
-            return freq_cents
-        def wrap_to_octave(cents, octave_length=1200):
-            """ wrap to a single octave 0-1200
-            """
-            octave_cents = cents % octave_length
-            return octave_cents
-
-        n_bins = 60
-        data = numpy.loadtxt(filename, delimiter=',')
-        times, freqs = (data[:, 0], data[:, 1])
-        self.chromasr = 1. / (times[1] - times[0])
-        #self.chromasr = 128.
-        if stop_sec is not None:
-            stop_idx = numpy.where(times < stop_sec)[0]
-            times, freqs = times[stop_idx], freqs[stop_idx]
-        freqs[freqs<=0] = numpy.nan
-        #melody = freqs[freqs>0]
-        melody = freqs
-        n_frames = len(melody)
-        melody_cents = hz_to_cents(melody, n_cents=n_bins)
-        melody_octave = wrap_to_octave(melody_cents, octave_length=n_bins)
-        melody_matrix = numpy.zeros((n_bins, n_frames))
-        for time, pitch in enumerate(melody_octave):
-            if not numpy.isnan(pitch):
-                melody_matrix[int(pitch), time] = 1
-        if secondframedecomp:
-            win2 = int(round(self.win2sec*self.chromasr))
-            hop2 = int(round(0.5*self.chromasr))
-            nbins, norigframes = melody_matrix.shape
-            if norigframes<win2:
-                nframes = 1
-                win2 = norigframes
-            else:
-                nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
-            bihistframes = numpy.empty((nbins*nbins, nframes))
-            for i in range(nframes):  # loop over all 8-sec frames
-                frame = melody_matrix[:, (i*hop2):(i*hop2+win2)]
-                bihist = self.bihistogram(frame)
-                bihist = numpy.reshape(bihist, -1)
-                bihistframes[:, i] = bihist
-            self.bihist = bihistframes
-        else:
-            self.bihist = self.bihistogram(melody_matrix)
-        return self.bihist
-
-    def bihistogram(self, spec, winsec=0.5, align=True):
-        win = int(round(winsec*self.chromasr))
-        ker = numpy.concatenate([numpy.zeros((win, 1)), numpy.ones((win+1, 1))], axis=0)
-        spec = spec.T  # transpose to have franes as rows in convolution
-
-        # energy threshold
-        thr = 0.3*numpy.max(spec)
-        spec[spec < max(thr, 0)] = 0
-
-        # transitions via convolution
-        tra = scipy.signal.convolve2d(spec, ker, mode='same')
-        tra[spec > 0] = 0
-
-        # multiply with original
-        B = numpy.dot(tra.T, spec)
-
-        # normalize
-        mxB = numpy.max(B)
-        mnB = numpy.min(B)
-        if mxB != mnB:
-            B = (B - mnB)/float(mxB-mnB)
-
-        # circshift to highest?
-        if align:
-            ref = numpy.argmax(numpy.sum(spec, axis=0))
-            B = numpy.roll(B, -ref, axis=0)
-            B = numpy.roll(B, -ref, axis=1)
-        return B

     def bihist_from_precomp_chroma(self, align=False):
         win2 = int(round(self.win2sec*self.chromasr))
@@ -126,12 +148,12 @@
         if norigframes<win2:
             nframes = 1
         else:
-            nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
-        bihistframes = numpy.empty((nbins*nbins, nframes))
+            nframes = int(1+np.floor((norigframes-win2)/float(hop2)))
+        bihistframes = np.empty((nbins*nbins, nframes))
         for i in range(nframes):  # loop over all 8-sec frames
             frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)]
             bihist = self.bihistogram(frame, align=align)
-            bihist = numpy.reshape(bihist, -1)
+            bihist = np.reshape(bihist, -1)
             bihistframes[:, i] = bihist
         self.bihist = bihistframes
--- a/scripts/load_features.py	Thu Sep 14 13:07:19 2017 +0100
+++ b/scripts/load_features.py	Thu Sep 14 14:16:29 2017 +0100
@@ -276,47 +276,4 @@
         n_stop = np.int(np.ceil(stop_sec * self.framessr2))
         pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])]
         return pbihist
-
-
-    # def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
-    #     if not os.path.exists(melodia_file):
-    #         return []
-    #     print 'extracting pitch bihist from melodia...'
-    #     pb = pbi.PitchBihist(win2sec=self.win2sec)
-    #     pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec)
-    #     if nmfpb is True:
-    #         pbihist = self.nmfpitchbihist(pbihist)
-    #     pbihist = pd.DataFrame(pbihist.T)
-    #     if scale:
-    #         # scale all frames by mean and std of recording
-    #         pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
-    #     return pbihist
-
-
-    # def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
-    #     base = os.path.basename(melodia_file)
-    #     root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
-    #     root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/'
-    #     root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/'
-    #     if 'SampleAudio' in base:
-    #         root = root_SM
-    #     else:
-    #         root = root_BL
-    #     base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
-    #     print 'load precomputed pitch bihist', root
-    #     #if self.win2sec == 8:
-    #     #    pbihist = pd.read_csv(os.path.join(root, base))
-    #     #else:
-    #     if 1:
-    #         pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
-    #         if nmfpb is True:
-    #             pbihist = self.nmfpitchbihist(pbihist)
-    #         pbihist = pd.DataFrame(pbihist.T)
-    #     n_stop = np.int(np.ceil(stop_sec * self.framessr2))
-    #     pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :]
-    #     print pbihist.shape
-    #     if scale:
-    #         # scale all frames by mean and std of recording
-    #         pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
-    #     return pbihist
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_PitchBihist.py	Thu Sep 14 14:16:29 2017 +0100
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep  1 19:11:52 2017
+
+@author: mariapanteli
+"""
+
+import pytest
+
+import numpy as np
+
+import scripts.PitchBihist as PitchBihist
+
+
+pbi = PitchBihist.PitchBihist()
+
+
+def test_hz_to_cents():
+    freq_Hz = np.array([32.703, 65.406, 55, 110])
+    freq_cents = pbi.hz_to_cents(freq_Hz)
+    freq_cents_true = np.array([0, 1200, 900, 2100])
+    assert np.array_equal(freq_cents, freq_cents_true)
+
+
+def test_wrap_to_octave():
+    cents = np.array([900, 2100, 1200])
+    octave_cents = pbi.wrap_to_octave(cents)
+    octave_cents_true = np.array([900, 900, 0])
+    assert np.array_equal(octave_cents, octave_cents_true)
+
+
+def test_get_melody_from_file():
+    melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv'
+    melody = pbi.get_melody_from_file(melodia_file)
+    assert len(melody) < 12. * pbi.chromasr
+
+
+def test_get_melody_matrix():
+    melody = 440 * np.ones(1000)
+    melody_matrix = pbi.get_melody_matrix(melody)
+    n_frames = melody_matrix.shape[1]
+    assert np.array_equal(melody_matrix[45, :], np.ones(n_frames))
+
+
+def test_bihist_from_melodia():
+    melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv'
+    bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=False)
+    assert bihist.shape == (60, 60)
+
+
+def test_bihist_from_melodia_n_frames():
+    melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv'
+    bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=True)
+    dur_sec = 11.5  # duration of first file in metadata.csv is > 11 seconds
+    n_frames_true = np.round((dur_sec - pbi.win2sec) * 2)  # for .5 sec hop size
+    assert bihist.shape[1] == n_frames_true
+
+
+def test_bihistogram():
+    melody = 440 * np.ones(1000)
+    melody_matrix = pbi.get_melody_matrix(melody)
+    bihist = pbi.bihistogram(melody_matrix, align=False)
+    assert np.array_equal(bihist, np.zeros((60, 60)))
+
+
+def test_bihistogram_values():
+    melody = np.concatenate([440 * np.ones(500), 32.703 * np.ones(500)])
+    melody_matrix = pbi.get_melody_matrix(melody)
+    # melody transitions from A to C (bin 45/60 to bin 0/60)
+    bihist = pbi.bihistogram(melody_matrix, align=False)
+    # expect only element [45, 0] to be non-zero
+    assert bihist[45, 0] > 0 and (np.sum(bihist) - bihist[45, 0]) == 0
+
+
+