Mercurial > hg > chourdakisreiss2016
view experiment-reverb/code/supervised_training.py @ 2:c87a9505f294 tip
Added LICENSE for code, removed .wav files
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Sat, 30 Sep 2017 13:25:50 +0100 |
parents | 246d5546657c |
children |
line wrap: on
line source
#!/usr/bin/python2 # -*- coding: utf-8 -*- """ Created on Thu Apr 23 11:53:17 2015 @author: mmxgn """ # This file does the cluster estimation and the removal of outliers from sys import argv, exit from essentia.standard import YamlInput, YamlOutput from essentia import Pool from pca import * from numpy import * from sklearn import cluster from sklearn.metrics import pairwise_distances mse = lambda A,B: ((array(A)-array(B)) ** 2).mean() if __name__=="__main__": if len(argv) != 2: print "[EE] Wrong number of arguments" print "[II] Correct syntax is:" print "[II] \t%s <training_file>" print "[II] where <training_file> is a .yaml file containing the" print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)" exit(-1) infile = argv[1] features_pool = YamlInput(filename = infile)() feature_captions = features_pool.descriptorNames() for c in features_pool.descriptorNames(): if c.split('.')[0] == 'metadata': feature_captions.remove(c) print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0]) print "[II] %d Features Available: " % len(feature_captions) print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] nfeatures_in = len(feature_captions) features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]]))) for i in range(0, nfeatures_in): features_vector[i, :] = features_pool[feature_captions[i]].T print "[II] Extracting PCA configuration " kernel, q, featurelist = extract_pca_configuration_from_data(features_vector) print "[II] Optimal number of PCs to keep: %d" % q feature_captions_array = array(feature_captions) features_to_keep = list(feature_captions_array[featurelist]) print "[II] Decided to keep %d features:" % len(features_to_keep) print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7] # TODO: finish writing kernel,q and feature file to a yaml output and incorporate # labelling using the csvs.