view experiment-reverb/code/supervised_training.py @ 2:c87a9505f294 tip

Added LICENSE for code, removed .wav files
author Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk>
date Sat, 30 Sep 2017 13:25:50 +0100
parents 246d5546657c
children
line wrap: on
line source
#!/usr/bin/python2
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 23 11:53:17 2015

@author: mmxgn
"""

# This file does the cluster estimation and the removal of outliers

from sys import argv, exit
from essentia.standard import YamlInput, YamlOutput
from essentia import Pool
from pca import *

from numpy import *
from sklearn import cluster
from sklearn.metrics import pairwise_distances

mse = lambda A,B: ((array(A)-array(B)) ** 2).mean()

if __name__=="__main__":
    if len(argv) != 2:
        print "[EE] Wrong number of arguments"
        print "[II] Correct syntax is:"
        print "[II] \t%s <training_file>"
        print "[II] where <training_file> is a .yaml file containing the"
        print "[II] features of the dataset (try output2_stage/fulltraining-last.yaml)"
        exit(-1)
        
    
    infile = argv[1]
    
    features_pool = YamlInput(filename = infile)()
    
    
    
    feature_captions = features_pool.descriptorNames()   
    
    for c in features_pool.descriptorNames():
        if c.split('.')[0] == 'metadata':
            feature_captions.remove(c)
            

                
    print "[II] Loaded training data from %s (%s) " % (infile, features_pool['metadata.date'][0])
    print "[II] %d Features Available: " % len(feature_captions)


    
    print str(feature_captions).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
    
    nfeatures_in = len(feature_captions)
    features_vector = zeros((nfeatures_in, len(features_pool[feature_captions[0]])))
    
    for i in range(0, nfeatures_in):
        features_vector[i, :] = features_pool[feature_captions[i]].T
    
    print "[II] Extracting PCA configuration "
    
    kernel, q, featurelist = extract_pca_configuration_from_data(features_vector)
    
    print "[II] Optimal number of PCs to keep: %d" % q
    
    feature_captions_array = array(feature_captions)
    
    features_to_keep = list(feature_captions_array[featurelist])
    print "[II] Decided to keep %d features:" % len(features_to_keep)
    print  str(features_to_keep).replace("', ","\n").replace('[','').replace("'","[II]\t ")[:-7]
    
    
    
    # TODO: finish writing kernel,q and feature file to a yaml output and incorporate
    # labelling using the csvs.