diff scripts/load_features.py @ 26:29b5ee381305 branch-tests

notebooks rerunning, and changes in load features for melodia
author mpanteli <m.x.panteli@gmail.com>
date Wed, 13 Sep 2017 17:33:48 +0100
parents 8dea2b8349c5
children e4736064d282
line wrap: on
line diff
--- a/scripts/load_features.py	Wed Sep 13 13:53:09 2017 +0100
+++ b/scripts/load_features.py	Wed Sep 13 17:33:48 2017 +0100
@@ -26,7 +26,7 @@
         self.framessr2 = self.framessr/float(self.hop2)
     
     
-    def get_op_mfcc_for_file(self, melspec_file=None, scale=True):
+    def get_op_mfcc_for_file(self, melspec_file=None, scale=True, stop_sec=30.0):
         op = []
         mfc = []
         if not os.path.exists(melspec_file):
@@ -34,7 +34,8 @@
         print 'extracting onset patterns and mfccs...'
         songframes = pd.read_csv(melspec_file, engine="c", header=None)
         songframes.iloc[np.where(np.isnan(songframes))] = 0
-        songframes = songframes.iloc[0:min(len(songframes), 18000), :]  # only first 1.5 minutes
+	n_stop = np.int(np.ceil(stop_sec * self.framessr))
+        songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
         melspec = songframes.get_values().T
         op = self.get_op_from_melspec(melspec, K=2)
         mfc = self.get_mfcc_from_melspec(melspec)
@@ -45,14 +46,15 @@
         return op, mfc
     
     
-    def get_chroma_for_file(self, chroma_file=None, scale=True):
+    def get_chroma_for_file(self, chroma_file=None, scale=True, stop_sec=30.0):
         ch = []
         if not os.path.exists(chroma_file):
             return ch
         print 'extracting chroma...'
         songframes = pd.read_csv(chroma_file, engine="c", header=None)
         songframes.iloc[np.where(np.isnan(songframes))] = 0
-        songframes = songframes.iloc[0:min(len(songframes), 18000), :]  # only first 1.5 minutes
+        n_stop = np.int(np.ceil(stop_sec * self.framessr))
+	songframes = songframes.iloc[0:min(len(songframes), n_stop), :]
         chroma = songframes.get_values().T
         ch = self.get_ave_chroma(chroma)
         if scale:
@@ -98,7 +100,7 @@
         return music_idx
     
     
-    def get_features(self, df, class_label='Country'):
+    def get_features(self, df, stop_sec=30.0, class_label='Country'):
         oplist = []
         mflist = []
         chlist = []
@@ -114,15 +116,17 @@
             if len(music_idx)==0:
                 # no music segments -> skip this file
                 continue
-            try:
-                op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i])
-                ch = self.get_chroma_for_file(df['Chroma'].iloc[i])
-                pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
-                #pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
+            if 1:
+                op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec)
+                ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec)
+                #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
+                pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
                 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i])
-            except:
+            else:
                 continue
-            min_n_frames = np.min([len(op), len(mfcc), len(ch), len(pb)])  # ideally, features should have the same number of frames
+            n_stop = np.int(np.ceil(stop_sec * self.framessr2))
+	    print n_stop, len(op), len(mfcc), len(ch), len(pb)
+	    min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)])  # ideally, features should have the same number of frames
             if min_n_frames==0:
                 # no features extracted -> skip this file
                 continue
@@ -222,13 +226,13 @@
         return newframes    
 
 
-    def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
+    def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
         pb = []
         if not os.path.exists(melodia_file):
             return pb
         print 'extracting pitch bihist from melodia...'
         pb = pbi.PitchBihist(win2sec=self.win2sec)
-        pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=90.0)
+        pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec)
         if nmfpb is True:
             pbihist = self.nmfpitchbihist(pbihist)
         pbihist = pd.DataFrame(pbihist.T)
@@ -238,7 +242,7 @@
         return pbihist
 
 
-    def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True):
+    def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0):
         base = os.path.basename(melodia_file)    
         root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/'
         root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/'
@@ -247,14 +251,18 @@
             root = root_SM
         else:
             root = root_BL
+	    base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
         print 'load precomputed pitch bihist', root
-        if self.win2sec == 8:
-            pbihist = pd.read_csv(os.path.join(root, base)).iloc[1:,1:]
-        else:
+        #if self.win2sec == 8:
+        #    pbihist = pd.read_csv(os.path.join(root, base))
+        #else:
+	if 1:
             pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T
             if nmfpb is True:
                 pbihist = self.nmfpitchbihist(pbihist)
             pbihist = pd.DataFrame(pbihist.T)
+	n_stop = np.int(np.ceil(stop_sec * self.framessr2))
+	pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :]
         print pbihist.shape
         if scale:
             # scale all frames by mean and std of recording