Mercurial > hg > plosone_underreview
diff scripts/classification.py @ 65:9b10b688c2ac branch-tests
results for 30 seconds
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Thu, 21 Sep 2017 20:11:43 +0100 |
parents | e83ecc296669 |
children | 98fc06ba2938 |
line wrap: on
line diff
--- a/scripts/classification.py Thu Sep 21 17:36:16 2017 +0100 +++ b/scripts/classification.py Thu Sep 21 20:11:43 2017 +0100 @@ -16,6 +16,7 @@ FILENAMES = map_and_average.OUTPUT_FILES TRANSFORM_LABELS = ['LDA', 'PCA', 'NMF', 'SSNMF', 'NA'] +RANDOM_STATE = 12345 def load_data_from_pickle(filename): X_list, Y, Yaudio = pickle.load(open(filename,'rb')) @@ -25,7 +26,11 @@ def feat_inds_from_pickle(filename): X_list, Y, Yaudio = pickle.load(open(filename,'rb')) - feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])] + len_inds = np.array([X_list[0].shape[1], X_list[1].shape[1], + X_list[2].shape[1], X_list[3].shape[1]]) + cum_sum = np.concatenate([[0], np.cumsum(len_inds)]) + feat_inds = [np.arange(cum_sum[i], cum_sum[i+1]) for i in range(len(X_list))] + #feat_inds = [X_list[0].shape[1], X_list[1].shape[1], X_list[2].shape[1], X_list[3].shape[1]] feat_labels = ['rhy', 'mel', 'mfc', 'chr'] return feat_labels, feat_inds @@ -57,11 +62,13 @@ print filename X, Y, Yaudio = load_data_from_pickle(filename) #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) - X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) - X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) - df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) - df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) - df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) + X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y) + X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test) + #df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) + #df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) + #df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) + #df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) + df_result = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) return df_results @@ -71,12 +78,13 @@ #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) feat_labels, feat_inds = feat_inds_from_pickle(filename) #df_results = pd.DataFrame() + feat_learner = util_feature_learning.Transformer() # first the classification with all features together df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) # then append for each feature separately for i in range(len(feat_inds)): df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, - X_test[:, feat_inds[i]], Y_test) + X_test[:, feat_inds[i]], Y_test, transform_label=transform_label) df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True) return df_results @@ -112,8 +120,8 @@ X, Y, Yaudio = load_data_from_pickle(filename) #traininds, testinds = get_train_test_indices(Yaudio) #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) - X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) - X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) + X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y) + X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test) if output_data: _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True) else: