Mercurial > hg > plosone_underreview

--- a/scripts/classification.py	Thu Sep 21 17:35:57 2017 +0100
+++ b/scripts/classification.py	Thu Sep 21 17:36:16 2017 +0100
@@ -23,6 +23,13 @@
     return X, Y, Yaudio


+def feat_inds_from_pickle(filename):
+    X_list, Y, Yaudio = pickle.load(open(filename,'rb'))
+    feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])]
+    feat_labels = ['rhy', 'mel', 'mfc', 'chr']
+    return feat_labels, feat_inds
+
+
 def get_train_test_indices(audiolabs):
     trainset, valset, testset = map_and_average.load_train_val_test_sets()
     trainaudiolabels, testaudiolabels = trainset[2], testset[2]
@@ -53,15 +60,16 @@
         X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y)
         X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test)
         df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
-        df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
+        df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label)
         df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True)
         df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True)
     return df_results


-def classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=" "):
+def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "):
     n_dim = X_train.shape[1]
-    feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim)
+    #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim)
+    feat_labels, feat_inds = feat_inds_from_pickle(filename)
     #df_results = pd.DataFrame()
     # first the classification with all features together
     df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
--- a/scripts/map_and_average.py	Thu Sep 21 17:35:57 2017 +0100
+++ b/scripts/map_and_average.py	Thu Sep 21 17:36:16 2017 +0100
@@ -73,6 +73,22 @@
     return trainset, valset, testset


+def limit_to_n_seconds(dataset, n_sec=30.0, win_sec=8.0):
+    X, Y, Yaudio = dataset
+    uniq_audio, uniq_counts = np.unique(Yaudio, return_counts=True)
+    frame_sr = 2.0
+    max_n_frames = np.int(np.floor((n_sec - win_sec) * frame_sr))
+    X_new, Y_new, Yaudio_new = [], [], []
+    for audio in uniq_audio:
+        idx = np.where(Yaudio==audio)[0]
+        if len(idx) > max_n_frames:
+            idx = idx[:max_n_frames]
+        X_new.append(X[idx, :])
+        Y_new.append(Y[idx])
+        Yaudio_new.append(Yaudio[idx])
+    return [np.concatenate(X_new), np.concatenate(Y_new), np.concatenate(Yaudio_new)]
+
+
 def get_feat_inds(n_dim=840):
     '''assume frame with 840 features and return indices for each feature
     '''
--- a/tests/test_map_and_average.py	Thu Sep 21 17:35:57 2017 +0100
+++ b/tests/test_map_and_average.py	Thu Sep 21 17:36:16 2017 +0100
@@ -35,4 +35,13 @@
     audiolabels = np.array(['a', 'a', 'b', 'b', 'b'])
     feat, audio, labels = map_and_average.averageframes(features, audiolabels, classlabels)
     feat_true = np.array([[0, 1.5], [1, 1]])
-    assert np.array_equal(feat, feat_true)
\ No newline at end of file
+    assert np.array_equal(feat, feat_true)
+
+
+def test_limit_to_n_seconds():
+    X = np.random.randn(10, 3)
+    Y = np.random.randn(10)
+    Yaudio = np.concatenate([np.repeat('a', 7), np.repeat('b', 3)])
+    Xn, Yn, Yaudion = map_and_average.limit_to_n_seconds([X, Y, Yaudio], n_sec=3.0, win_sec=0.5)
+    Yaudion_true = np.concatenate([np.repeat('a', 5), np.repeat('b', 3)])
+    assert np.array_equal(Yaudion_true, Yaudion) and len(Xn)==len(Yn) and len(Yn)==len(Yaudion)
\ No newline at end of file