Mercurial > hg > chourdakisreiss2016

#!/usr/bin/python2
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 23 11:53:17 2015

@author: mmxgn
"""

# This file does the cluster estimation and the removal of outliers

from sys import argv, exit
from essentia.standard import YamlInput, YamlOutput
from essentia import Pool
from pca import *

from numpy import *
from sklearn import cluster
from sklearn.metrics import pairwise_distances

mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()

if __name__=="__main__":
    if len(argv) != 2:
        print "[EE] Wrong number of arguments"
        print "[II] Correct syntax is:"
        print "[II] \t%s <training_file>"
        print "[II] where <training_file> is a .yaml file containing the"
        print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)"
        exit(-1)


    infile = argv[1]

    features_pool = YamlInput(filename = infile)()


    feature_captions = features_pool.descriptorNames()

    for c in features_pool.descriptorNames():
        if c.split('.')[0] == 'metadata':
            feature_captions.remove(c)


    print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
    print "[II] %d Features Available: " % len(feature_captions)


    print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]

    nfeatures_in = len(feature_captions)
    features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))

    for i in range(0, nfeatures_in):
        features_vector[i, :] = features_pool[feature_captions[i]].T

    print "[II] Extracting PCA configuration "

    kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)

    print "[II] Optimal number of PCs to keep: %d" % q

    feature_captions_array = array(feature_captions)

    features_to_keep = list(feature_captions_array[featurelist])
    print "[II] Decided to keep %d features:" % len(features_to_keep)
    print  str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]


    # TODO: finish writing kernel,q and feature file to a yaml output and incorporate
    # labelling using the csvs.
author	Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date	Sat, 30 Sep 2017 13:25:50 +0100
parents	246d5546657c
children