Mercurial > hg > plosone_underreview
changeset 62:4425a4918102 branch-tests
fixed indices for feature components
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Thu, 21 Sep 2017 17:35:07 +0100 |
parents | ac3fcd42e7bd |
children | e83ecc296669 861fe1b57672 |
files | scripts/classification.py scripts/map_and_average.py tests/test_map_and_average.py |
diffstat | 3 files changed, 38 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/classification.py Thu Sep 21 15:25:20 2017 +0100 +++ b/scripts/classification.py Thu Sep 21 17:35:07 2017 +0100 @@ -8,6 +8,7 @@ import pandas as pd import pickle from sklearn import metrics +from sklearn.model_selection import train_test_split import map_and_average import util_feature_learning @@ -22,6 +23,13 @@ return X, Y, Yaudio +def feat_inds_from_pickle(filename): + X_list, Y, Yaudio = pickle.load(open(filename,'rb')) + feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])] + feat_labels = ['rhy', 'mel', 'mfc', 'chr'] + return feat_labels, feat_inds + + def get_train_test_indices(audiolabs): trainset, valset, testset = map_and_average.load_train_val_test_sets() trainaudiolabels, testaudiolabels = trainset[2], testset[2] @@ -52,15 +60,16 @@ X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) - df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=transform_label) + df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) return df_results -def classify_each_feature(X_train, Y_train, X_test, Y_test, transform_label=" "): +def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "): n_dim = X_train.shape[1] - feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) + #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) + feat_labels, feat_inds = feat_inds_from_pickle(filename) #df_results = pd.DataFrame() # first the classification with all features together df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
--- a/scripts/map_and_average.py Thu Sep 21 15:25:20 2017 +0100 +++ b/scripts/map_and_average.py Thu Sep 21 17:35:07 2017 +0100 @@ -73,6 +73,22 @@ return trainset, valset, testset +def limit_to_n_seconds(dataset, n_sec=30.0, win_sec=8.0): + X, Y, Yaudio = dataset + uniq_audio, uniq_counts = np.unique(Yaudio, return_counts=True) + frame_sr = 2.0 + max_n_frames = np.int(np.floor((n_sec - win_sec) * frame_sr)) + X_new, Y_new, Yaudio_new = [], [], [] + for audio in uniq_audio: + idx = np.where(Yaudio==audio)[0] + if len(idx) > max_n_frames: + idx = idx[:max_n_frames] + X_new.append(X[idx, :]) + Y_new.append(Y[idx]) + Yaudio_new.append(Yaudio[idx]) + return [np.concatenate(X_new), np.concatenate(Y_new), np.concatenate(Yaudio_new)] + + def get_feat_inds(n_dim=840): '''assume frame with 840 features and return indices for each feature '''
--- a/tests/test_map_and_average.py Thu Sep 21 15:25:20 2017 +0100 +++ b/tests/test_map_and_average.py Thu Sep 21 17:35:07 2017 +0100 @@ -35,4 +35,13 @@ audiolabels = np.array(['a', 'a', 'b', 'b', 'b']) feat, audio, labels = map_and_average.averageframes(features, audiolabels, classlabels) feat_true = np.array([[0, 1.5], [1, 1]]) - assert np.array_equal(feat, feat_true) \ No newline at end of file + assert np.array_equal(feat, feat_true) + + +def test_limit_to_n_seconds(): + X = np.random.randn(10, 3) + Y = np.random.randn(10) + Yaudio = np.concatenate([np.repeat('a', 7), np.repeat('b', 3)]) + Xn, Yn, Yaudion = map_and_average.limit_to_n_seconds([X, Y, Yaudio], n_sec=3.0, win_sec=0.5) + Yaudion_true = np.concatenate([np.repeat('a', 5), np.repeat('b', 3)]) + assert np.array_equal(Yaudion_true, Yaudion) and len(Xn)==len(Yn) and len(Yn)==len(Yaudion) \ No newline at end of file