# HG changeset patch # User Maria Panteli # Date 1505488675 -3600 # Node ID b1d9ba5f888ea7e7fad9c43c2e9e1cbdb9619842 # Parent 57f53b0d1eaa088b179b51609394de0ba4966476 debugging tests diff -r 57f53b0d1eaa -r b1d9ba5f888e scripts/OPMellin.py --- a/scripts/OPMellin.py Fri Sep 15 12:27:11 2017 +0100 +++ b/scripts/OPMellin.py Fri Sep 15 16:17:55 2017 +0100 @@ -195,7 +195,7 @@ self.melspec = melspec self.melsr = melsr self.post_process_spec(log=False, sqrt=True, medianfilt=True) # sqrt seems to work better - self.onset_patterns(logfilter=False, center=False) + self.onset_patterns(center=False) self.post_process_op(medianfiltOP=True) self.mellin_transform() self.post_process_opmellin() diff -r 57f53b0d1eaa -r b1d9ba5f888e scripts/load_features.py --- a/scripts/load_features.py Fri Sep 15 12:27:11 2017 +0100 +++ b/scripts/load_features.py Fri Sep 15 16:17:55 2017 +0100 @@ -103,7 +103,7 @@ return music_idx - def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): + def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True): oplist = [] mflist = [] chlist = [] @@ -116,26 +116,32 @@ continue print 'file ' + str(i) + ' of ' + str(n_files) music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) - if len(music_idx)==0: - # no music segments -> skip this file + #min_dur_sec=8.0 + #min_n_frames = np.int(np.floor(min_dur_sec * self.framessr2)) + if len(music_idx)==0: # or len(music_idx) skip this file continue - try: - op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) - ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) - pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) + if 1: + # allow feature extraction of longer segments (2*stop_sec) + # because some of it might be speech segments that are filtered out + stop_sec_feat = 2 * stop_sec + op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec_feat) + ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec_feat) + pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec_feat) #if precomp_melody: # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) #else: # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) - except: + else: continue n_stop = np.int(np.ceil(stop_sec * self.framessr2)) print n_stop, len(op), len(mfcc), len(ch), len(pb) - min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames - if min_n_frames==0: + max_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames + if max_n_frames==0: # no features extracted -> skip this file continue - music_idx = music_idx[music_idx np.sum(pbihist[:, 60:120].ravel()) def test_get_pb_for_file_nmf(): - pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=True, scale=False) + pbihist = feat_loader.get_pb_for_file(TEST_MELODIA_FILE, nmfpb=True, scale=False) assert pbihist.shape[1] == 240 def test_get_features(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) df = df.iloc[:1, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + print df.head() + print os.getcwd() + ddf = pd.read_csv(df['Melodia'].iloc[0]) + print ddf.head() + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') assert len(np.unique(data_list[-1])) == 1 def test_get_features_n_files(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) n_files = 3 df = df.iloc[:n_files, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') assert len(np.unique(data_list[-1])) == n_files def test_get_features_n_frames(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) df = df.iloc[:1, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') dur_sec = 11.5 # duration of first file in metadata.csv is > 11 seconds n_frames_true = np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2)