comparison scripts/load_features.py @ 43:b1d9ba5f888e branch-tests

debugging tests
author Maria Panteli <m.x.panteli@gmail.com>
date Fri, 15 Sep 2017 16:17:55 +0100
parents 3b67cd634b9a
children 06e5711f9f62
comparison
equal deleted inserted replaced
41:57f53b0d1eaa 43:b1d9ba5f888e
101 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values() 101 bounds = pd.read_csv(segmenter_file, header=None, delimiter=',').get_values()
102 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2) 102 music_idx = self.get_music_idx_from_bounds(bounds, sr=self.framessr2)
103 return music_idx 103 return music_idx
104 104
105 105
106 def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): 106 def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True):
107 oplist = [] 107 oplist = []
108 mflist = [] 108 mflist = []
109 chlist = [] 109 chlist = []
110 pblist = [] 110 pblist = []
111 clabels = [] 111 clabels = []
114 for i in range(n_files): 114 for i in range(n_files):
115 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])): 115 if not (os.path.exists(df['Melspec'].iloc[i]) and os.path.exists(df['Chroma'].iloc[i]) and os.path.exists(df['Melodia'].iloc[i])):
116 continue 116 continue
117 print 'file ' + str(i) + ' of ' + str(n_files) 117 print 'file ' + str(i) + ' of ' + str(n_files)
118 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) 118 music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i])
119 if len(music_idx)==0: 119 #min_dur_sec=8.0
120 # no music segments -> skip this file 120 #min_n_frames = np.int(np.floor(min_dur_sec * self.framessr2))
121 continue 121 if len(music_idx)==0: # or len(music_idx)<min_n_frames:
122 try: 122 # no music segments or music segment too short -> skip this file
123 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) 123 continue
124 ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) 124 if 1:
125 pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) 125 # allow feature extraction of longer segments (2*stop_sec)
126 # because some of it might be speech segments that are filtered out
127 stop_sec_feat = 2 * stop_sec
128 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec_feat)
129 ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec_feat)
130 pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec_feat)
126 #if precomp_melody: 131 #if precomp_melody:
127 # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) 132 # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
128 #else: 133 #else:
129 # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) 134 # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec)
130 except: 135 else:
131 continue 136 continue
132 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) 137 n_stop = np.int(np.ceil(stop_sec * self.framessr2))
133 print n_stop, len(op), len(mfcc), len(ch), len(pb) 138 print n_stop, len(op), len(mfcc), len(ch), len(pb)
134 min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames 139 max_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames
135 if min_n_frames==0: 140 if max_n_frames==0:
136 # no features extracted -> skip this file 141 # no features extracted -> skip this file
137 continue 142 continue
138 music_idx = music_idx[music_idx<min_n_frames] 143 # music segment duration must be <= 30sec
144 music_idx = music_idx[music_idx<max_n_frames]
139 n_frames = len(music_idx) 145 n_frames = len(music_idx)
140 oplist.append(op.iloc[music_idx, :]) 146 oplist.append(op.iloc[music_idx, :])
141 mflist.append(mfcc.iloc[music_idx, :]) 147 mflist.append(mfcc.iloc[music_idx, :])
142 chlist.append(ch.iloc[music_idx, :]) 148 chlist.append(ch.iloc[music_idx, :])
143 pblist.append(pb.iloc[music_idx, :]) 149 pblist.append(pb.iloc[music_idx, :])
269 if 'SampleAudio' in base: 275 if 'SampleAudio' in base:
270 root = root_SM 276 root = root_SM
271 else: 277 else:
272 root = root_BL 278 root = root_BL
273 base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' 279 base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv'
280 bihist_file = os.path.join(root, base)
281 if not os.path.exists(bihist_file):
282 return pbihist
274 print 'load precomputed pitch bihist', root 283 print 'load precomputed pitch bihist', root
275 pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T 284 pbihist = np.loadtxt(bihist_file, delimiter=',').T
276 n_stop = np.int(np.ceil(stop_sec * self.framessr2)) 285 n_stop = np.int(np.ceil(stop_sec * self.framessr2))
277 pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] 286 pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])]
278 return pbihist 287 return pbihist
279 288