chourdakisreiss2016: experiment-reverb/code/supervised

annotate experiment-reverb/code/supervised_training.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files

author	Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date	Sat, 30 Sep 2017 13:25:50 +0100
parents	246d5546657c
children

rev	line source
e@0	1 #!/usr/bin/python2
e@0	2 # -- coding: utf-8 --
e@0	3 """
e@0	4 Created on Thu Apr 23 11:53:17 2015
e@0	5
e@0	6 @author: mmxgn
e@0	7 """
e@0	8
e@0	9 # This file does the cluster estimation and the removal of outliers
e@0	10
e@0	11 from sys import argv, exit
e@0	12 from essentia.standard import YamlInput, YamlOutput
e@0	13 from essentia import Pool
e@0	14 from pca import *
e@0	15
e@0	16 from numpy import *
e@0	17 from sklearn import cluster
e@0	18 from sklearn.metrics import pairwise_distances
e@0	19
e@0	20 mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()
e@0	21
e@0	22 if __name__=="__main__":
e@0	23 if len(argv) != 2:
e@0	24 print "[EE] Wrong number of arguments"
e@0	25 print "[II] Correct syntax is:"
e@0	26 print "[II] \t%s <training_file>"
e@0	27 print "[II] where <training_file> is a .yaml file containing the"
e@0	28 print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)"
e@0	29 exit(-1)
e@0	30
e@0	31
e@0	32 infile = argv[1]
e@0	33
e@0	34 features_pool = YamlInput(filename = infile)()
e@0	35
e@0	36
e@0	37
e@0	38 feature_captions = features_pool.descriptorNames()
e@0	39
e@0	40 for c in features_pool.descriptorNames():
e@0	41 if c.split('.')[0] == 'metadata':
e@0	42 feature_captions.remove(c)
e@0	43
e@0	44
e@0	45
e@0	46 print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
e@0	47 print "[II] %d Features Available: " % len(feature_captions)
e@0	48
e@0	49
e@0	50
e@0	51 print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0	52
e@0	53 nfeatures_in = len(feature_captions)
e@0	54 features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
e@0	55
e@0	56 for i in range(0, nfeatures_in):
e@0	57 features_vector[i, :] = features_pool[feature_captions[i]].T
e@0	58
e@0	59 print "[II] Extracting PCA configuration "
e@0	60
e@0	61 kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
e@0	62
e@0	63 print "[II] Optimal number of PCs to keep: %d" % q
e@0	64
e@0	65 feature_captions_array = array(feature_captions)
e@0	66
e@0	67 features_to_keep = list(feature_captions_array[featurelist])
e@0	68 print "[II] Decided to keep %d features:" % len(features_to_keep)
e@0	69 print str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
e@0	70
e@0	71
e@0	72
e@0	73 # TODO: finish writing kernel,q and feature file to a yaml output and incorporate
e@0	74 # labelling using the csvs.
e@0	75

Mercurial > hg > chourdakisreiss2016

annotate experiment-reverb/code/supervised_training.py @ 2:c87a9505f294 tip