e@0: #!/usr/bin/python2 e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Thu Apr 23 11:53:17 2015 e@0: e@0: @author: mmxgn e@0: """ e@0: e@0: # This file does the cluster estimation and the removal of outliers e@0: e@0: from sys import argv, exit e@0: from essentia.standard import YamlInput, YamlOutput e@0: from essentia import Pool e@0: from pca import * e@0: e@0: from numpy import * e@0: from sklearn import cluster e@0: from sklearn.metrics import pairwise_distances e@0: e@0: mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() e@0: e@0: if __name__=="__main__": e@0: if len(argv) != 2: e@0: print "[EE] Wrong number of arguments" e@0: print "[II] Correct syntax is:" e@0: print "[II] \t%s " e@0: print "[II] where is a .yaml file containing the" e@0: print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)" e@0: exit(-1) e@0: e@0: e@0: infile = argv[1] e@0: e@0: features_pool = YamlInput(filename = infile)() e@0: e@0: e@0: e@0: feature_captions = features_pool.descriptorNames() e@0: e@0: for c in features_pool.descriptorNames(): e@0: if c.split('.')[0] == 'metadata': e@0: feature_captions.remove(c) e@0: e@0: e@0: e@0: print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) e@0: print "[II] %d Features Available: " % len(feature_captions) e@0: e@0: e@0: e@0: print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: nfeatures_in = len(feature_captions) e@0: features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) e@0: e@0: for i in range(0, nfeatures_in): e@0: features_vector[i, :] = features_pool[feature_captions[i]].T e@0: e@0: print "[II] Extracting PCA configuration " e@0: e@0: kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) e@0: e@0: print "[II] Optimal number of PCs to keep: %d" % q e@0: e@0: feature_captions_array = array(feature_captions) e@0: e@0: features_to_keep = list(feature_captions_array[featurelist]) e@0: print "[II] Decided to keep %d features:" % len(features_to_keep) e@0: print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] e@0: e@0: e@0: e@0: # TODO: finish writing kernel,q and feature file to a yaml output and incorporate e@0: # labelling using the csvs. e@0: