comparison scripts/classification.py @ 65:9b10b688c2ac branch-tests

results for 30 seconds
author mpanteli <m.x.panteli@gmail.com>
date Thu, 21 Sep 2017 20:11:43 +0100
parents e83ecc296669
children 98fc06ba2938
comparison
equal deleted inserted replaced
64:e83ecc296669 65:9b10b688c2ac
14 import util_feature_learning 14 import util_feature_learning
15 15
16 16
17 FILENAMES = map_and_average.OUTPUT_FILES 17 FILENAMES = map_and_average.OUTPUT_FILES
18 TRANSFORM_LABELS = ['LDA', 'PCA', 'NMF', 'SSNMF', 'NA'] 18 TRANSFORM_LABELS = ['LDA', 'PCA', 'NMF', 'SSNMF', 'NA']
19 RANDOM_STATE = 12345
19 20
20 def load_data_from_pickle(filename): 21 def load_data_from_pickle(filename):
21 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) 22 X_list, Y, Yaudio = pickle.load(open(filename,'rb'))
22 X = np.concatenate(X_list, axis=1) 23 X = np.concatenate(X_list, axis=1)
23 return X, Y, Yaudio 24 return X, Y, Yaudio
24 25
25 26
26 def feat_inds_from_pickle(filename): 27 def feat_inds_from_pickle(filename):
27 X_list, Y, Yaudio = pickle.load(open(filename,'rb')) 28 X_list, Y, Yaudio = pickle.load(open(filename,'rb'))
28 feat_inds = [len(X_list[0]), len(X_list[1]), len(X_list[2]), len(X_list[3])] 29 len_inds = np.array([X_list[0].shape[1], X_list[1].shape[1],
30 X_list[2].shape[1], X_list[3].shape[1]])
31 cum_sum = np.concatenate([[0], np.cumsum(len_inds)])
32 feat_inds = [np.arange(cum_sum[i], cum_sum[i+1]) for i in range(len(X_list))]
33 #feat_inds = [X_list[0].shape[1], X_list[1].shape[1], X_list[2].shape[1], X_list[3].shape[1]]
29 feat_labels = ['rhy', 'mel', 'mfc', 'chr'] 34 feat_labels = ['rhy', 'mel', 'mfc', 'chr']
30 return feat_labels, feat_inds 35 return feat_labels, feat_inds
31 36
32 37
33 def get_train_test_indices(audiolabs): 38 def get_train_test_indices(audiolabs):
55 #traininds, testinds = get_train_test_indices(Yaudio) 60 #traininds, testinds = get_train_test_indices(Yaudio)
56 for filename, transform_label in zip(file_list, TRANSFORM_LABELS): 61 for filename, transform_label in zip(file_list, TRANSFORM_LABELS):
57 print filename 62 print filename
58 X, Y, Yaudio = load_data_from_pickle(filename) 63 X, Y, Yaudio = load_data_from_pickle(filename)
59 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) 64 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds)
60 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) 65 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y)
61 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) 66 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test)
62 df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) 67 #df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
63 df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label) 68 #df_result_feat = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label)
64 df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True) 69 #df_result = pd.concat([df_result, df_result_feat], axis=1, ignore_index=True)
70 #df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True)
71 df_result = classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=transform_label)
65 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) 72 df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True)
66 return df_results 73 return df_results
67 74
68 75
69 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "): 76 def classify_each_feature(X_train, Y_train, X_test, Y_test, filename, transform_label=" "):
70 n_dim = X_train.shape[1] 77 n_dim = X_train.shape[1]
71 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) 78 #feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim)
72 feat_labels, feat_inds = feat_inds_from_pickle(filename) 79 feat_labels, feat_inds = feat_inds_from_pickle(filename)
73 #df_results = pd.DataFrame() 80 #df_results = pd.DataFrame()
81 feat_learner = util_feature_learning.Transformer()
74 # first the classification with all features together 82 # first the classification with all features together
75 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label) 83 df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test, transform_label=transform_label)
76 # then append for each feature separately 84 # then append for each feature separately
77 for i in range(len(feat_inds)): 85 for i in range(len(feat_inds)):
78 df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, 86 df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train,
79 X_test[:, feat_inds[i]], Y_test) 87 X_test[:, feat_inds[i]], Y_test, transform_label=transform_label)
80 df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True) 88 df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True)
81 return df_results 89 return df_results
82 90
83 91
84 def plot_CF(CF, labels=None, figurename=None): 92 def plot_CF(CF, labels=None, figurename=None):
110 filename = FILENAMES[feat_learning_i] 118 filename = FILENAMES[feat_learning_i]
111 print filename 119 print filename
112 X, Y, Yaudio = load_data_from_pickle(filename) 120 X, Y, Yaudio = load_data_from_pickle(filename)
113 #traininds, testinds = get_train_test_indices(Yaudio) 121 #traininds, testinds = get_train_test_indices(Yaudio)
114 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) 122 #X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds)
115 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=12345, stratify=Y) 123 X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, train_size=0.6, random_state=RANDOM_STATE, stratify=Y)
116 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=12345, stratify=Y_val_test) 124 X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, train_size=0.5, random_state=RANDOM_STATE, stratify=Y_val_test)
117 if output_data: 125 if output_data:
118 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True) 126 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True)
119 else: 127 else:
120 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False) 128 _, CF = confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)
121 return CF 129 return CF