Mercurial > hg > plosone_underreview
changeset 43:b1d9ba5f888e branch-tests
debugging tests
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 15 Sep 2017 16:17:55 +0100 |
parents | 57f53b0d1eaa |
children | 06e5711f9f62 |
files | scripts/OPMellin.py scripts/load_features.py tests/test_load_features.py |
diffstat | 3 files changed, 42 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/OPMellin.py Fri Sep 15 12:27:11 2017 +0100 +++ b/scripts/OPMellin.py Fri Sep 15 16:17:55 2017 +0100 @@ -195,7 +195,7 @@ self.melspec = melspec self.melsr = melsr self.post_process_spec(log=False, sqrt=True, medianfilt=True) # sqrt seems to work better - self.onset_patterns(logfilter=False, center=False) + self.onset_patterns(center=False) self.post_process_op(medianfiltOP=True) self.mellin_transform() self.post_process_opmellin()
--- a/scripts/load_features.py Fri Sep 15 12:27:11 2017 +0100 +++ b/scripts/load_features.py Fri Sep 15 16:17:55 2017 +0100 @@ -103,7 +103,7 @@ return music_idx - def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): + def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True): oplist = [] mflist = [] chlist = [] @@ -116,26 +116,32 @@ continue print 'file ' + str(i) + ' of ' + str(n_files) music_idx = self.get_music_idx_for_file(df['Speech'].iloc[i]) - if len(music_idx)==0: - # no music segments -> skip this file + #min_dur_sec=8.0 + #min_n_frames = np.int(np.floor(min_dur_sec * self.framessr2)) + if len(music_idx)==0: # or len(music_idx)<min_n_frames: + # no music segments or music segment too short -> skip this file continue - try: - op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) - ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) - pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) + if 1: + # allow feature extraction of longer segments (2*stop_sec) + # because some of it might be speech segments that are filtered out + stop_sec_feat = 2 * stop_sec + op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec_feat) + ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec_feat) + pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec_feat) #if precomp_melody: # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) #else: # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) - except: + else: continue n_stop = np.int(np.ceil(stop_sec * self.framessr2)) print n_stop, len(op), len(mfcc), len(ch), len(pb) - min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames - if min_n_frames==0: + max_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames + if max_n_frames==0: # no features extracted -> skip this file continue - music_idx = music_idx[music_idx<min_n_frames] + # music segment duration must be <= 30sec + music_idx = music_idx[music_idx<max_n_frames] n_frames = len(music_idx) oplist.append(op.iloc[music_idx, :]) mflist.append(mfcc.iloc[music_idx, :]) @@ -271,8 +277,11 @@ else: root = root_BL base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' + bihist_file = os.path.join(root, base) + if not os.path.exists(bihist_file): + return pbihist print 'load precomputed pitch bihist', root - pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T + pbihist = np.loadtxt(bihist_file, delimiter=',').T n_stop = np.int(np.ceil(stop_sec * self.framessr2)) pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] return pbihist
--- a/tests/test_load_features.py Fri Sep 15 12:27:11 2017 +0100 +++ b/tests/test_load_features.py Fri Sep 15 16:17:55 2017 +0100 @@ -14,6 +14,13 @@ feat_loader = load_features.FeatureLoader(win2sec=8) +#TEST_METADATA_FILE = '../data/sample_dataset/metadata.csv' +TEST_METADATA_FILE = os.path.join(os.path.dirname(__file__), os.path.pardir, + 'data', 'sample_dataset', 'metadata.csv') +#TEST_METADATA_FILE = 'data/sample_dataset/metadata.csv' +#TEST_MELODIA_FILE = 'data/sample_dataset/Melodia/mel_1_2_1.csv' +TEST_MELODIA_FILE = os.path.join(os.path.dirname(__file__), os.path.pardir, + 'data', 'sample_dataset', 'Melodia', 'mel_1_2_1.csv') def test_get_music_idx_from_bounds(): bounds = np.array([['0', '10.5', 'm']]) @@ -213,45 +220,49 @@ def test_get_pb_for_file_n_bins(): - pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=False, scale=False) + pbihist = feat_loader.get_pb_for_file(TEST_MELODIA_FILE, nmfpb=False, scale=False) assert pbihist.shape[1] == 3600 def test_get_pb_for_file_align(): - pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=False, scale=False) + pbihist = feat_loader.get_pb_for_file(TEST_MELODIA_FILE, nmfpb=False, scale=False) pbihist = pbihist.get_values() assert np.sum(pbihist[:, :60].ravel()) > np.sum(pbihist[:, 60:120].ravel()) def test_get_pb_for_file_nmf(): - pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=True, scale=False) + pbihist = feat_loader.get_pb_for_file(TEST_MELODIA_FILE, nmfpb=True, scale=False) assert pbihist.shape[1] == 240 def test_get_features(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) df = df.iloc[:1, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + print df.head() + print os.getcwd() + ddf = pd.read_csv(df['Melodia'].iloc[0]) + print ddf.head() + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') assert len(np.unique(data_list[-1])) == 1 def test_get_features_n_files(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) n_files = 3 df = df.iloc[:n_files, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') assert len(np.unique(data_list[-1])) == n_files def test_get_features_n_frames(): - df = pd.read_csv('data/sample_dataset/metadata.csv') + df = pd.read_csv(TEST_METADATA_FILE) df = df.iloc[:1, :] os.chdir('data/') - data_list = feat_loader.get_features(df) + data_list = feat_loader.get_features(df, precomp_melody=False) os.chdir('..') dur_sec = 11.5 # duration of first file in metadata.csv is > 11 seconds n_frames_true = np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2)