m@48: import numpy as np m@48: import pandas as pd m@48: import sys m@48: sys.path.append('../') m@48: import scripts.load_dataset as load_dataset m@48: import scripts.map_and_average as mapper m@48: import scripts.classification as classification m@48: import scripts.outliers as outliers m@48: m@48: #df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE) m@48: OUTPUT_FILES = load_dataset.OUTPUT_FILES m@48: n_iters = 1 m@48: n = int(sys.argv[1]) m@48: MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES m@48: m@48: #for n in range(n_iters): m@48: if 1: m@48: print "iteration %d" % n m@48: m@48: print "mapping..." m@48: mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for m@48: output_file in OUTPUT_FILES] m@48: _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99) m@48: mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for m@48: output_file in MAPPER_OUTPUT_FILES] m@48: mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio) m@48: m@48: #X = np.concatenate(ldadata_list, axis=1) m@48: m@48: ## classification and confusion m@48: #print "classifying..." m@48: #traininds, testinds = classification.get_train_test_indices(Yaudio) m@48: #X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds) m@48: #accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False) m@48: #print accuracy m@48: m@48: ## outliers m@48: #print "detecting outliers..." m@48: #df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999) m@48: #outliers.print_most_least_outliers_topN(df_global, N=10) m@48: m@48: ## write output m@48: #print "writing file" m@48: #df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)