# HG changeset patch
# User Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
# Date 1437857476 -3600
# Node ID 2e3c57fba632ede0ea2b4c15f8f1624f4f9a63f5
# Parent  c63dac455296239fcc5164fb424d286eacd84baf
Convolutional Neural Network code
Scratch for Estimation of Distribution Algorithm

diff -r c63dac455296 -r 2e3c57fba632 Code/convolutional_mlp.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/convolutional_mlp.py	Sat Jul 25 21:51:16 2015 +0100
@@ -0,0 +1,372 @@
+"""This tutorial introduces the LeNet5 neural network architecture
+using Theano.  LeNet5 is a convolutional neural network, good for
+classifying images. This tutorial shows how to build the architecture,
+and comes with all the hyper-parameters you need to reproduce the
+paper's MNIST results.
+
+
+This implementation simplifies the model in the following ways:
+
+ - LeNetConvPool doesn't implement location-specific gain and bias parameters
+ - LeNetConvPool doesn't implement pooling by average, it implements pooling
+   by max.
+ - Digit classification is implemented with a logistic regression rather than
+   an RBF network
+ - LeNet5 was not fully-connected convolutions at second layer
+
+References:
+ - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
+   Gradient-Based Learning Applied to Document
+   Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
+   http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
+
+"""
+import os
+import sys
+import timeit
+
+import numpy
+
+import theano
+import theano.tensor as T
+from theano.tensor.signal import downsample
+from theano.tensor.nnet import conv
+
+from logistic_sgd import LogisticRegression, load_data
+from mlp import HiddenLayer
+
+# Rectifier Linear Unit
+# Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
+def relu(x):
+    return T.switch(x<0, 0, x)
+        
+
+
+class LeNetConvPoolLayer(object):
+    """Pool Layer of a convolutional network """
+
+    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
+        """
+        Allocate a LeNetConvPoolLayer with shared variable internal parameters.
+
+        :type rng: numpy.random.RandomState
+        :param rng: a random number generator used to initialize weights
+
+        :type input: theano.tensor.dtensor4
+        :param input: symbolic image tensor, of shape image_shape
+
+        :type filter_shape: tuple or list of length 4
+        :param filter_shape: (number of filters, num input feature maps,
+                              filter height, filter width)
+
+        :type image_shape: tuple or list of length 4
+        :param image_shape: (batch size, num input feature maps,
+                             image height, image width)
+
+        :type poolsize: tuple or list of length 2
+        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
+        """
+
+        assert image_shape[1] == filter_shape[1]
+        self.input = input
+
+        # there are "num input feature maps * filter height * filter width"
+        # inputs to each hidden unit
+        fan_in = numpy.prod(filter_shape[1:])
+        # each unit in the lower layer receives a gradient from:
+        # "num output feature maps * filter height * filter width" /
+        #   pooling size
+        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
+                   numpy.prod(poolsize))
+        # initialize weights with random weights
+        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
+        self.W = theano.shared(
+            numpy.asarray(
+                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
+                dtype=theano.config.floatX
+            ),
+            borrow=True
+        )
+
+        # the bias is a 1D tensor -- one bias per output feature map
+        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
+        self.b = theano.shared(value=b_values, borrow=True)
+
+        # convolve input feature maps with filters
+        conv_out = conv.conv2d(
+            input=input,
+            filters=self.W,
+            filter_shape=filter_shape,
+            image_shape=image_shape
+        )
+
+        # downsample each feature map individually, using maxpooling
+        pooled_out = downsample.max_pool_2d(
+            input=conv_out,
+            ds=poolsize,
+            ignore_border=True
+        )
+
+        # add the bias term. Since the bias is a vector (1D array), we first
+        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
+        # thus be broadcasted across mini-batches and feature map
+        # width & height
+        #self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+        self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
+        
+        # store parameters of this layer
+        self.params = [self.W, self.b]
+
+        # keep track of model input
+        self.input = input
+
+
+def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
+                    dataset='mnist.pkl.gz',
+                    nkerns=[20, 50, 70], batch_size=100):
+    """ Demonstrates lenet on MNIST dataset
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used (factor for the stochastic
+                          gradient)
+
+    :type n_epochs: int
+    :param n_epochs: maximal number of epochs to run the optimizer
+
+    :type dataset: string
+    :param dataset: path to the dataset used for training /testing (MNIST here)
+
+    :type nkerns: list of ints
+    :param nkerns: number of kernels on each layer
+    """
+
+    rng = numpy.random.RandomState(23455)
+
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x, test_set_y = datasets[2]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
+    
+    n_train_batches /= batch_size
+    n_valid_batches /= batch_size
+    n_test_batches /= batch_size
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()  # index to a [mini]batch
+
+    # start-snippet-1
+    x = T.matrix('x')   # the data is presented as rasterized images
+    y = T.ivector('y')  # the labels are presented as 1D vector of
+                        # [int] labels
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
+    # to a 4D tensor, compatible with our LeNetConvPoolLayer
+    # (28, 28) is the size of MNIST images.
+    #layer0_input = x.reshape((batch_size, 1, 28, 28))
+    layer0_input = x.reshape((batch_size, 1, 1206, 128))
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
+    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
+    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+    layer0 = LeNetConvPoolLayer(
+        rng,
+        input=layer0_input,
+        #image_shape=(batch_size, 1, 28, 28),
+        image_shape=(batch_size, 1, 1206, 128),
+        #filter_shape=(nkerns[0], 1, 5, 5),
+        filter_shape=(nkerns[0], 1, 11, 11),
+        poolsize=(2, 2)
+    )
+
+    # Construct the second convolutional pooling layer
+    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
+    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
+    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+    layer1 = LeNetConvPoolLayer(
+        rng,
+        input=layer0.output,
+        #image_shape=(batch_size, nkerns[0], 12, 12),
+        image_shape=(batch_size, nkerns[0], 598, 59),
+        #filter_shape=(nkerns[1], nkerns[0], 5, 5),
+        filter_shape=(nkerns[1], nkerns[0], 5, 5),
+        poolsize=(2, 2)
+    )
+    
+    # Construct the third convolutional pooling layer
+    layer2 = LeNetConvPoolLayer(
+        rng,
+        input=layer1.output,
+        image_shape=(batch_size, nkerns[1], 297, 27),
+        filter_shape=(nkerns[2], nkerns[1], 5, 5),
+        poolsize=(1, 1)
+    )
+
+    # the HiddenLayer being fully-connected, it operates on 2D matrices of
+    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
+    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+    layer3_input = layer2.output.flatten(2)
+
+    # construct a fully-connected sigmoidal layer
+    layer3 = HiddenLayer(
+        rng,
+        input=layer3_input,
+        #n_in=nkerns[1] * 4 * 4,
+        n_in=nkerns[2] * 293 * 23,
+        #n_out=500,
+        n_out=500,
+        #activation=T.tanh
+        activation=relu
+    )
+
+    # classify the values of the fully-connected sigmoidal layer
+    #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)
+    
+    # the cost we minimize during training is the NLL of the model
+    cost = layer4.negative_log_likelihood(y)
+
+    # create a function to compute the mistakes that are made by the model
+    test_model = theano.function(
+        [index],
+        layer4.errors(y),
+        givens={
+            x: test_set_x[index * batch_size: (index + 1) * batch_size],
+            y: test_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    validate_model = theano.function(
+        [index],
+        layer4.errors(y),
+        givens={
+            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
+            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    # create a list of all model parameters to be fit by gradient descent
+    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
+
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
+
+    # train_model is a function that updates the model parameters by
+    # SGD Since this model has many parameters, it would be tedious to
+    # manually create an update rule for each model parameter. We thus
+    # create the updates list by automatically looping over all
+    # (params[i], grads[i]) pairs.
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
+
+    train_model = theano.function(
+        [index],
+        cost,
+        updates=updates,
+        givens={
+            x: train_set_x[index * batch_size: (index + 1) * batch_size],
+            y: train_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+    # end-snippet-1
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is
+                           # found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+                                   # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+                                  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_validation_loss = numpy.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = timeit.default_timer()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            if iter % 100 == 0:
+                print 'training @ iter = ', iter
+            cost_ij = train_model(minibatch_index)
+
+            if (iter + 1) % validation_frequency == 0:
+
+                # compute zero-one loss on validation set
+                validation_losses = [validate_model(i) for i
+                                     in xrange(n_valid_batches)]
+                this_validation_loss = numpy.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+                    # test it on the test set
+                    test_losses = [
+                        test_model(i)
+                        for i in xrange(n_test_batches)
+                    ]
+                    test_score = numpy.mean(test_losses)
+                    print(('     epoch %i, minibatch %i/%i, test error of '
+                           'best model %f %%') %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           test_score * 100.))
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = timeit.default_timer()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i, '
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+
+if __name__ == '__main__':
+    evaluate_lenet5()
+
+
+def experiment(state, channel):
+    evaluate_lenet5(state.learning_rate, dataset=state.dataset)
+
diff -r c63dac455296 -r 2e3c57fba632 Code/eda.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/eda.py	Sat Jul 25 21:51:16 2015 +0100
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jul 22 17:42:09 2015
+
+@author: paulochiliguano
+"""
+
+import numpy as np
+from sklearn import mixture
+
+#User-item dictionary
+users = {"Angelica": {"SOAJJPC12AB017D63F": 3.5, "SOAKIXJ12AC3DF7152": 2.0,
+                      "SOAKPFH12A8C13BA4A": 4.5, "SOAGTJW12A6701F1F5": 5.0,
+                      "SOAKWCK12A8C139F81": 1.5, "SOAKNZI12A58A79CAC": 2.5,
+                      "SOAJZEP12A8C14379B": 2.0},
+         "Bill":{"SOAJJPC12AB017D63F": 2.0, "SOAKIXJ12AC3DF7152": 3.5,
+                 "SOAHQFM12A8C134B65": 4.0, "SOAGTJW12A6701F1F5": 2.0,
+                 "SOAKWCK12A8C139F81": 3.5, "SOAJZEP12A8C14379B": 3.0},
+         "Chan": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 1.0,
+                  "SOAHQFM12A8C134B65": 1.0, "SOAKPFH12A8C13BA4A": 3.0,
+                  "SOAGTJW12A6701F1F5": 5, "SOAKWCK12A8C139F81": 1.0},
+         "Dan": {"SOAJJPC12AB017D63F": 3.0, "SOAKIXJ12AC3DF7152": 4.0,
+                 "SOAHQFM12A8C134B65": 4.5, "SOAGTJW12A6701F1F5": 3.0,
+                 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
+                 "SOAJZEP12A8C14379B": 2.0},
+         "Hailey": {"SOAKIXJ12AC3DF7152": 4.0, "SOAHQFM12A8C134B65": 1.0,
+                    "SOAKPFH12A8C13BA4A": 4.0, "SOAKNZI12A58A79CAC": 4.0,
+                    "SOAJZEP12A8C14379B": 1.0},
+         "Jordyn":  {"SOAKIXJ12AC3DF7152": 4.5, "SOAHQFM12A8C134B65": 4.0,
+                     "SOAKPFH12A8C13BA4A": 5.0, "SOAGTJW12A6701F1F5": 5.0,
+                     "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
+                     "SOAJZEP12A8C14379B": 4.0},
+         "Sam": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 2.0,
+                 "SOAKPFH12A8C13BA4A": 3.0, "SOAGTJW12A6701F1F5": 5.0,
+                 "SOAKWCK12A8C139F81": 4.0, "SOAKNZI12A58A79CAC": 5.0},
+         "Veronica": {"SOAJJPC12AB017D63F": 3.0, "SOAKPFH12A8C13BA4A": 5.0,
+                      "SOAGTJW12A6701F1F5": 4.0, "SOAKWCK12A8C139F81": 2.5,
+                      "SOAKNZI12A58A79CAC": 3.0}
+        }
+
+items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1],
+         "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1],
+         "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1],
+         "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1],
+         "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1],
+         "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1],
+         "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1],
+         "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1]}
+
+profile = {"Profile0": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile1": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile2": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile3": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile4": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile5": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile6": [2.5, 4, 3.5, 3, 5, 4, 1],
+           "Profile7": [2.5, 4, 3.5, 3, 5, 4, 1]}
+           
+           
+
+
+np.random.seed(1)
+g = mixture.GMM(n_components=7)
+# Generate random observations with two modes centered on 0
+# and 10 to use for training.
+obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
+g.fit(obs) 
+np.round(g.weights_, 2)
+np.round(g.means_, 2)
+np.round(g.covars_, 2) 
+g.predict([[0], [2], [9], [10]]) 
+np.round(g.score([[0], [2], [9], [10]]), 2)
+# Refit the model on new data (initial parameters remain the
+# same), this time with an even split between the two modes.
+g.fit(20 * [[0]] +  20 * [[10]]) 
+np.round(g.weights_, 2)
diff -r c63dac455296 -r 2e3c57fba632 Code/feature_extraction.py
--- a/Code/feature_extraction.py	Tue Jul 21 11:54:30 2015 +0100
+++ b/Code/feature_extraction.py	Sat Jul 25 21:51:16 2015 +0100
@@ -21,7 +21,7 @@
     #cmd = ' '.join(cmd)
     #print cmd
     #raw_audio = numpy.fromstring(subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0],dtype='uint16')
-    audioFile, sr = librosa.load(filename, sr=22050, mono=True, duration=3)
+    audioFile, sr = librosa.load(filename, sr=22050, mono=True, duration=28)
     #random.randint(0,audioFile.size)
     #max_amp = 2.**(int(bits_per_sample)-1)
     #raw_audio = (raw_audio- max_amp)/max_amp
@@ -30,7 +30,9 @@
 def calc_specgram(x,fs,winSize,):
     S = librosa.feature.melspectrogram(y=x, sr=fs, n_mels=128, S=None, n_fft=winSize, hop_length=512)
     log_S = librosa.logamplitude(S, ref_power=np.max)
+    log_S = np.transpose(log_S)
     #spec = SpecGram(x,fs,winSize)
+    #return spec.specMat
     return log_S
 
 
@@ -90,7 +92,7 @@
     def extract_features(self,):
         for i in xrange(1,self.num_files):
     	    filename = self.filenames[i]
-            print 'Filename: ',filename
+         #print 'Filename: ',filename
     	    x = read_wav(filename)
     	    spec_x = calc_specgram(x,22050,1024)
     	    spec_x = make_4tensor(spec_x)
@@ -102,5 +104,5 @@
         self.h5.close()
         
 if __name__ == '__main__':
-	test = FeatExtraction('/media/paulo/5409-57C0')
+	test = FeatExtraction('/home/paulo/Downloads')
   
diff -r c63dac455296 -r 2e3c57fba632 Code/latent_vectors.py
--- a/Code/latent_vectors.py	Tue Jul 21 11:54:30 2015 +0100
+++ b/Code/latent_vectors.py	Sat Jul 25 21:51:16 2015 +0100
@@ -13,17 +13,26 @@
 import wmf
 
 # Read songID of downloaded audio clips
-with open('/homes/pchilguano/dataset/audio_files.txt', 'rb') as input1:
+with open('/homes/pchilguano/dataset/ten_songs.txt', 'rb') as input1:
     available = list(csv.reader(input1))
     chain1 = list(itertools.chain(*available))
     
 # Sparse user-item matrix
 result = pd.DataFrame()
 for chunk in pd.read_csv('/homes/pchilguano/dataset/train_triplets_wo_mismatches.csv', low_memory = False, delim_whitespace=False, chunksize=10000, names=['user','song','plays'], header=None):
-    chunk = chunk[chunk.song.isin(chain1)]    
-    result = result.append(chunk.pivot(index='user', columns='song', values='plays')    
-    , ignore_index=True)
+    chunk = chunk[chunk.song.isin(chain1)]
+    result = result.append(chunk, ignore_index=True)
+    #result = result.append(chunk.pivot(index='user', columns='song', values='plays'))
     print (result.shape)
+
+
+cnames = result.set_index('user').T.to_dict().keys()
+final = {}
+for a in cnames:
+    final[a] ={result.set_index('user').T.to_dict()[a]["song"]: result.set_index('user').T.to_dict()[a]["plays"]}
+
+dict((k, v.dropna().to_dict()) for k, v in pd.compat.iteritems(result))
+
 sresult = result.to_sparse()
 sresult.to_pickle('/homes/pchilguano/dataset/taste_profile_sparse.pkl')
 
diff -r c63dac455296 -r 2e3c57fba632 Code/logistic_sgd.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/logistic_sgd.py	Sat Jul 25 21:51:16 2015 +0100
@@ -0,0 +1,481 @@
+"""
+This tutorial introduces logistic regression using Theano and stochastic
+gradient descent.
+
+Logistic regression is a probabilistic, linear classifier. It is parametrized
+by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
+done by projecting data points onto a set of hyperplanes, the distance to
+which is used to determine a class membership probability.
+
+Mathematically, this can be written as:
+
+.. math::
+  P(Y=i|x, W,b) &= softmax_i(W x + b) \\
+                &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
+
+
+The output of the model or prediction is then done by taking the argmax of
+the vector whose i'th element is P(Y=i|x).
+
+.. math::
+
+  y_{pred} = argmax_i P(Y=i|x,W,b)
+
+
+This tutorial presents a stochastic gradient descent optimization method
+suitable for large datasets.
+
+
+References:
+
+    - textbooks: "Pattern Recognition and Machine Learning" -
+                 Christopher M. Bishop, section 4.3.2
+
+"""
+__docformat__ = 'restructedtext en'
+
+import cPickle
+import gzip
+import os
+import sys
+import timeit
+
+import numpy
+
+import theano
+import theano.tensor as T
+
+
+class LogisticRegression(object):
+    """Multi-class Logistic Regression Class
+
+    The logistic regression is fully described by a weight matrix :math:`W`
+    and bias vector :math:`b`. Classification is done by projecting data
+    points onto a set of hyperplanes, the distance to which is used to
+    determine a class membership probability.
+    """
+
+    def __init__(self, input, n_in, n_out):
+        """ Initialize the parameters of the logistic regression
+
+        :type input: theano.tensor.TensorType
+        :param input: symbolic variable that describes the input of the
+                      architecture (one minibatch)
+
+        :type n_in: int
+        :param n_in: number of input units, the dimension of the space in
+                     which the datapoints lie
+
+        :type n_out: int
+        :param n_out: number of output units, the dimension of the space in
+                      which the labels lie
+
+        """
+        # start-snippet-1
+        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
+        self.W = theano.shared(
+            value=numpy.zeros(
+                (n_in, n_out),
+                dtype=theano.config.floatX
+            ),
+            name='W',
+            borrow=True
+        )
+        # initialize the baises b as a vector of n_out 0s
+        self.b = theano.shared(
+            value=numpy.zeros(
+                (n_out,),
+                dtype=theano.config.floatX
+            ),
+            name='b',
+            borrow=True
+        )
+
+        # symbolic expression for computing the matrix of class-membership
+        # probabilities
+        # Where:
+        # W is a matrix where column-k represent the separation hyperplane for
+        # class-k
+        # x is a matrix where row-j  represents input training sample-j
+        # b is a vector where element-k represent the free parameter of
+        # hyperplane-k
+        #self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
+        self.p_y_given_x = relu(T.dot(input, self.W) + self.b)
+        #print(self.p_y_given_x)
+
+        # symbolic description of how to compute prediction as class whose
+        # probability is maximal
+        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
+        # end-snippet-1
+
+        # parameters of the model
+        self.params = [self.W, self.b]
+
+        # keep track of model input
+        self.input = input
+
+    def negative_log_likelihood(self, y):
+        """Return the mean of the negative log-likelihood of the prediction
+        of this model under a given target distribution.
+
+        .. math::
+
+            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
+            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
+                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
+            \ell (\theta=\{W,b\}, \mathcal{D})
+
+        :type y: theano.tensor.TensorType
+        :param y: corresponds to a vector that gives for each example the
+                  correct label
+
+        Note: we use the mean instead of the sum so that
+              the learning rate is less dependent on the batch size
+        """
+        # start-snippet-2
+        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
+        # number of examples (call it n) in the minibatch
+        # T.arange(y.shape[0]) is a symbolic vector which will contain
+        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
+        # Log-Probabilities (call it LP) with one row per example and
+        # one column per class LP[T.arange(y.shape[0]),y] is a vector
+        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
+        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
+        # the mean (across minibatch examples) of the elements in v,
+        # i.e., the mean log-likelihood across the minibatch.
+        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
+        # end-snippet-2
+
+    def errors(self, y):
+        """Return a float representing the number of errors in the minibatch
+        over the total number of examples of the minibatch ; zero one
+        loss over the size of the minibatch
+
+        :type y: theano.tensor.TensorType
+        :param y: corresponds to a vector that gives for each example the
+                  correct label
+        """
+
+        # check if y has same dimension of y_pred
+        if y.ndim != self.y_pred.ndim:
+            raise TypeError(
+                'y should have the same shape as self.y_pred',
+                ('y', y.type, 'y_pred', self.y_pred.type)
+            )
+        # check if y is of the correct datatype
+        if y.dtype.startswith('int'):
+            # the T.neq operator returns a vector of 0s and 1s, where 1
+            # represents a mistake in prediction
+            return T.mean(T.neq(self.y_pred, y))
+        else:
+            raise NotImplementedError()
+
+
+def load_data(dataset):
+    ''' Loads the dataset
+
+    :type dataset: string
+    :param dataset: the path to the dataset (here MNIST)
+    '''
+
+    #############
+    # LOAD DATA #
+    #############
+
+    # Download the MNIST dataset if it is not present
+    '''data_dir, data_file = os.path.split(dataset)
+    if data_dir == "" and not os.path.isfile(dataset):
+        # Check if dataset is in the data directory.
+        new_path = os.path.join(
+            os.path.split(__file__)[0],
+            "..",
+            "data",
+            dataset
+        )
+        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
+            dataset = new_path
+
+    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
+        import urllib
+        origin = (
+            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
+        )
+        print 'Downloading data from %s' % origin
+        urllib.urlretrieve(origin, dataset)
+
+    print '... loading data'
+    '''
+    # Load the dataset
+    #f = gzip.open(dataset, 'rb')
+    #train_set, valid_set, test_set = cPickle.load(f)
+    #f.close()
+    f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()
+    #train_set, valid_set, test_set format: tuple(input, target)
+    #input is an numpy.ndarray of 2 dimensions (a matrix)
+    #witch row's correspond to an example. target is a
+    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
+    #the number of rows in the input. It should give the target
+    #target to the example with the same index in the input.
+
+    def shared_dataset(data_xy, borrow=True):
+        """ Function that loads the dataset into shared variables
+
+        The reason we store our dataset in shared variables is to allow
+        Theano to copy it into the GPU memory (when code is run on GPU).
+        Since copying data into the GPU is slow, copying a minibatch everytime
+        is needed (the default behaviour if the data is not in a shared
+        variable) would lead to a large decrease in performance.
+        """
+        data_x, data_y = data_xy
+        shared_x = theano.shared(numpy.asarray(data_x,
+                                               dtype=theano.config.floatX),
+                                 borrow=borrow)
+        shared_y = theano.shared(numpy.asarray(data_y,
+                                               dtype=theano.config.floatX),
+                                 borrow=borrow)
+        # When storing data on the GPU it has to be stored as floats
+        # therefore we will store the labels as ``floatX`` as well
+        # (``shared_y`` does exactly that). But during our computations
+        # we need them as ints (we use labels as index, and if they are
+        # floats it doesn't make sense) therefore instead of returning
+        # ``shared_y`` we will have to cast it to int. This little hack
+        # lets ous get around this issue
+        return shared_x, T.cast(shared_y, 'int32')
+
+    test_set_x, test_set_y = shared_dataset(test_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    train_set_x, train_set_y = shared_dataset(train_set)
+
+    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
+            (test_set_x, test_set_y)]
+    return rval
+
+
+def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
+                           dataset='mnist.pkl.gz',
+                           batch_size=600):
+    """
+    Demonstrate stochastic gradient descent optimization of a log-linear
+    model
+
+    This is demonstrated on MNIST.
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used (factor for the stochastic
+                          gradient)
+
+    :type n_epochs: int
+    :param n_epochs: maximal number of epochs to run the optimizer
+
+    :type dataset: string
+    :param dataset: the path of the MNIST dataset file from
+                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
+
+    """
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x, test_set_y = datasets[2]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()  # index to a [mini]batch
+
+    # generate symbolic variables for input (x and y represent a
+    # minibatch)
+    x = T.matrix('x')  # data, presented as rasterized images
+    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
+
+    # construct the logistic regression class
+    # Each MNIST image has size 28*28
+    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
+
+    # the cost we minimize during training is the negative log likelihood of
+    # the model in symbolic format
+    cost = classifier.negative_log_likelihood(y)
+
+    # compiling a Theano function that computes the mistakes that are made by
+    # the model on a minibatch
+    test_model = theano.function(
+        inputs=[index],
+        outputs=classifier.errors(y),
+        givens={
+            x: test_set_x[index * batch_size: (index + 1) * batch_size],
+            y: test_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    validate_model = theano.function(
+        inputs=[index],
+        outputs=classifier.errors(y),
+        givens={
+            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
+            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    # compute the gradient of cost with respect to theta = (W,b)
+    g_W = T.grad(cost=cost, wrt=classifier.W)
+    g_b = T.grad(cost=cost, wrt=classifier.b)
+
+    # start-snippet-3
+    # specify how to update the parameters of the model as a list of
+    # (variable, update expression) pairs.
+    updates = [(classifier.W, classifier.W - learning_rate * g_W),
+               (classifier.b, classifier.b - learning_rate * g_b)]
+
+    # compiling a Theano function `train_model` that returns the cost, but in
+    # the same time updates the parameter of the model based on the rules
+    # defined in `updates`
+    train_model = theano.function(
+        inputs=[index],
+        outputs=cost,
+        updates=updates,
+        givens={
+            x: train_set_x[index * batch_size: (index + 1) * batch_size],
+            y: train_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+    # end-snippet-3
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training the model'
+    # early-stopping parameters
+    patience = 5000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is
+                                  # found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+                                  # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+                                  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_validation_loss = numpy.inf
+    test_score = 0.
+    start_time = timeit.default_timer()
+
+    done_looping = False
+    epoch = 0
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            minibatch_avg_cost = train_model(minibatch_index)
+            # iteration number
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            if (iter + 1) % validation_frequency == 0:
+                # compute zero-one loss on validation set
+                validation_losses = [validate_model(i)
+                                     for i in xrange(n_valid_batches)]
+                this_validation_loss = numpy.mean(validation_losses)
+
+                print(
+                    'epoch %i, minibatch %i/%i, validation error %f %%' %
+                    (
+                        epoch,
+                        minibatch_index + 1,
+                        n_train_batches,
+                        this_validation_loss * 100.
+                    )
+                )
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    best_validation_loss = this_validation_loss
+                    # test it on the test set
+
+                    test_losses = [test_model(i)
+                                   for i in xrange(n_test_batches)]
+                    test_score = numpy.mean(test_losses)
+
+                    print(
+                        (
+                            '     epoch %i, minibatch %i/%i, test error of'
+                            ' best model %f %%'
+                        ) %
+                        (
+                            epoch,
+                            minibatch_index + 1,
+                            n_train_batches,
+                            test_score * 100.
+                        )
+                    )
+
+                    # save the best model
+                    with open('best_model.pkl', 'w') as f:
+                        cPickle.dump(classifier, f)
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = timeit.default_timer()
+    print(
+        (
+            'Optimization complete with best validation score of %f %%,'
+            'with test performance %f %%'
+        )
+        % (best_validation_loss * 100., test_score * 100.)
+    )
+    print 'The code run for %d epochs, with %f epochs/sec' % (
+        epoch, 1. * epoch / (end_time - start_time))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.1fs' % ((end_time - start_time)))
+
+
+def predict():
+    """
+    An example of how to load a trained model and use it
+    to predict labels.
+    """
+
+    # load the saved model
+    classifier = cPickle.load(open('best_model.pkl'))
+
+    # compile a predictor function
+    predict_model = theano.function(
+        inputs=[classifier.input],
+        outputs=classifier.y_pred)
+
+    # We can test it on some examples from test test
+    dataset='mnist.pkl.gz'
+    datasets = load_data(dataset)
+    test_set_x, test_set_y = datasets[2]
+    test_set_x = test_set_x.get_value()
+
+    predicted_values = predict_model(test_set_x[:10])
+    print ("Predicted values for the first 10 examples in test set:")
+    print predicted_values
+
+
+if __name__ == '__main__':
+    sgd_optimization_mnist()
+
+
+# Rectifier Linear Unit
+#Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
+def relu(x):
+    return T.switch(x<0, 0, x)
diff -r c63dac455296 -r 2e3c57fba632 Code/make_lists.py
--- a/Code/make_lists.py	Tue Jul 21 11:54:30 2015 +0100
+++ b/Code/make_lists.py	Sat Jul 25 21:51:16 2015 +0100
@@ -1,6 +1,6 @@
 
 import numpy
-#import numpy.random as random
+import numpy.random as random
 import os
 import pickle
 import sys
@@ -41,13 +41,13 @@
     """
     Generates lists
     """
-    audio_path = os.path.join(gtzan_path,'preview_clip')
+    audio_path = os.path.join(gtzan_path,'audio')
     out_path = os.path.join(gtzan_path,'lists')
     files_list = []
     for ext in ['.au', '.mp3', '.wav']:
         files = U.getFiles(audio_path, ext)
         files_list.extend(files)
-    #random.shuffle(files_list)
+    random.shuffle(files_list)
     
     if not os.path.exists(out_path):
         os.makedirs(out_path)
diff -r c63dac455296 -r 2e3c57fba632 Code/mlp.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/mlp.py	Sat Jul 25 21:51:16 2015 +0100
@@ -0,0 +1,412 @@
+"""
+This tutorial introduces the multilayer perceptron using Theano.
+
+ A multilayer perceptron is a logistic regressor where
+instead of feeding the input to the logistic regression you insert a
+intermediate layer, called the hidden layer, that has a nonlinear
+activation function (usually tanh or sigmoid) . One can use many such
+hidden layers making the architecture deep. The tutorial will also tackle
+the problem of MNIST digit classification.
+
+.. math::
+
+    f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),
+
+References:
+
+    - textbooks: "Pattern Recognition and Machine Learning" -
+                 Christopher M. Bishop, section 5
+
+"""
+__docformat__ = 'restructedtext en'
+
+
+import os
+import sys
+import timeit
+
+import numpy
+
+import theano
+import theano.tensor as T
+
+
+from logistic_sgd import LogisticRegression, load_data
+
+
+# start-snippet-1
+class HiddenLayer(object):
+    def __init__(self, rng, input, n_in, n_out, W=None, b=None,
+                 activation=T.tanh):
+        """
+        Typical hidden layer of a MLP: units are fully-connected and have
+        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
+        and the bias vector b is of shape (n_out,).
+
+        NOTE : The nonlinearity used here is tanh
+
+        Hidden unit activation is given by: tanh(dot(input,W) + b)
+
+        :type rng: numpy.random.RandomState
+        :param rng: a random number generator used to initialize weights
+
+        :type input: theano.tensor.dmatrix
+        :param input: a symbolic tensor of shape (n_examples, n_in)
+
+        :type n_in: int
+        :param n_in: dimensionality of input
+
+        :type n_out: int
+        :param n_out: number of hidden units
+
+        :type activation: theano.Op or function
+        :param activation: Non linearity to be applied in the hidden
+                           layer
+        """
+        self.input = input
+        # end-snippet-1
+
+        # `W` is initialized with `W_values` which is uniformely sampled
+        # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
+        # for tanh activation function
+        # the output of uniform if converted using asarray to dtype
+        # theano.config.floatX so that the code is runable on GPU
+        # Note : optimal initialization of weights is dependent on the
+        #        activation function used (among other things).
+        #        For example, results presented in [Xavier10] suggest that you
+        #        should use 4 times larger initial weights for sigmoid
+        #        compared to tanh
+        #        We have no info for other function, so we use the same as
+        #        tanh.
+        if W is None:
+            W_values = numpy.asarray(
+                rng.uniform(
+                    low=-numpy.sqrt(6. / (n_in + n_out)),
+                    high=numpy.sqrt(6. / (n_in + n_out)),
+                    size=(n_in, n_out)
+                ),
+                dtype=theano.config.floatX
+            )
+            if activation == theano.tensor.nnet.sigmoid:
+                W_values *= 4
+
+            W = theano.shared(value=W_values, name='W', borrow=True)
+
+        if b is None:
+            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
+            b = theano.shared(value=b_values, name='b', borrow=True)
+
+        self.W = W
+        self.b = b
+
+        lin_output = T.dot(input, self.W) + self.b
+        self.output = (
+            lin_output if activation is None
+            else activation(lin_output)
+        )
+        # parameters of the model
+        self.params = [self.W, self.b]
+
+
+# start-snippet-2
+class MLP(object):
+    """Multi-Layer Perceptron Class
+
+    A multilayer perceptron is a feedforward artificial neural network model
+    that has one layer or more of hidden units and nonlinear activations.
+    Intermediate layers usually have as activation function tanh or the
+    sigmoid function (defined here by a ``HiddenLayer`` class)  while the
+    top layer is a softmax layer (defined here by a ``LogisticRegression``
+    class).
+    """
+
+    def __init__(self, rng, input, n_in, n_hidden, n_out):
+        """Initialize the parameters for the multilayer perceptron
+
+        :type rng: numpy.random.RandomState
+        :param rng: a random number generator used to initialize weights
+
+        :type input: theano.tensor.TensorType
+        :param input: symbolic variable that describes the input of the
+        architecture (one minibatch)
+
+        :type n_in: int
+        :param n_in: number of input units, the dimension of the space in
+        which the datapoints lie
+
+        :type n_hidden: int
+        :param n_hidden: number of hidden units
+
+        :type n_out: int
+        :param n_out: number of output units, the dimension of the space in
+        which the labels lie
+
+        """
+
+        # Since we are dealing with a one hidden layer MLP, this will translate
+        # into a HiddenLayer with a tanh activation function connected to the
+        # LogisticRegression layer; the activation function can be replaced by
+        # sigmoid or any other nonlinear function
+        self.hiddenLayer = HiddenLayer(
+            rng=rng,
+            input=input,
+            n_in=n_in,
+            n_out=n_hidden,
+            activation=T.tanh
+        )
+
+        # The logistic regression layer gets as input the hidden units
+        # of the hidden layer
+        self.logRegressionLayer = LogisticRegression(
+            input=self.hiddenLayer.output,
+            n_in=n_hidden,
+            n_out=n_out
+        )
+        # end-snippet-2 start-snippet-3
+        # L1 norm ; one regularization option is to enforce L1 norm to
+        # be small
+        self.L1 = (
+            abs(self.hiddenLayer.W).sum()
+            + abs(self.logRegressionLayer.W).sum()
+        )
+
+        # square of L2 norm ; one regularization option is to enforce
+        # square of L2 norm to be small
+        self.L2_sqr = (
+            (self.hiddenLayer.W ** 2).sum()
+            + (self.logRegressionLayer.W ** 2).sum()
+        )
+
+        # negative log likelihood of the MLP is given by the negative
+        # log likelihood of the output of the model, computed in the
+        # logistic regression layer
+        self.negative_log_likelihood = (
+            self.logRegressionLayer.negative_log_likelihood
+        )
+        # same holds for the function computing the number of errors
+        self.errors = self.logRegressionLayer.errors
+
+        # the parameters of the model are the parameters of the two layer it is
+        # made out of
+        self.params = self.hiddenLayer.params + self.logRegressionLayer.params
+        # end-snippet-3
+
+        # keep track of model input
+        self.input = input
+
+
+def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
+             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
+    """
+    Demonstrate stochastic gradient descent optimization for a multilayer
+    perceptron
+
+    This is demonstrated on MNIST.
+
+    :type learning_rate: float
+    :param learning_rate: learning rate used (factor for the stochastic
+    gradient
+
+    :type L1_reg: float
+    :param L1_reg: L1-norm's weight when added to the cost (see
+    regularization)
+
+    :type L2_reg: float
+    :param L2_reg: L2-norm's weight when added to the cost (see
+    regularization)
+
+    :type n_epochs: int
+    :param n_epochs: maximal number of epochs to run the optimizer
+
+    :type dataset: string
+    :param dataset: the path of the MNIST dataset file from
+                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
+
+
+   """
+    datasets = load_data(dataset)
+
+    train_set_x, train_set_y = datasets[0]
+    valid_set_x, valid_set_y = datasets[1]
+    test_set_x, test_set_y = datasets[2]
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    # allocate symbolic variables for the data
+    index = T.lscalar()  # index to a [mini]batch
+    x = T.matrix('x')  # the data is presented as rasterized images
+    y = T.ivector('y')  # the labels are presented as 1D vector of
+                        # [int] labels
+
+    rng = numpy.random.RandomState(1234)
+
+    # construct the MLP class
+    classifier = MLP(
+        rng=rng,
+        input=x,
+        n_in=28 * 28,
+        n_hidden=n_hidden,
+        n_out=10
+    )
+
+    # start-snippet-4
+    # the cost we minimize during training is the negative log likelihood of
+    # the model plus the regularization terms (L1 and L2); cost is expressed
+    # here symbolically
+    cost = (
+        classifier.negative_log_likelihood(y)
+        + L1_reg * classifier.L1
+        + L2_reg * classifier.L2_sqr
+    )
+    # end-snippet-4
+
+    # compiling a Theano function that computes the mistakes that are made
+    # by the model on a minibatch
+    test_model = theano.function(
+        inputs=[index],
+        outputs=classifier.errors(y),
+        givens={
+            x: test_set_x[index * batch_size:(index + 1) * batch_size],
+            y: test_set_y[index * batch_size:(index + 1) * batch_size]
+        }
+    )
+
+    validate_model = theano.function(
+        inputs=[index],
+        outputs=classifier.errors(y),
+        givens={
+            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
+            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
+        }
+    )
+
+    # start-snippet-5
+    # compute the gradient of cost with respect to theta (sotred in params)
+    # the resulting gradients will be stored in a list gparams
+    gparams = [T.grad(cost, param) for param in classifier.params]
+
+    # specify how to update the parameters of the model as a list of
+    # (variable, update expression) pairs
+
+    # given two lists of the same length, A = [a1, a2, a3, a4] and
+    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
+    # element is a pair formed from the two lists :
+    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
+    updates = [
+        (param, param - learning_rate * gparam)
+        for param, gparam in zip(classifier.params, gparams)
+    ]
+
+    # compiling a Theano function `train_model` that returns the cost, but
+    # in the same time updates the parameter of the model based on the rules
+    # defined in `updates`
+    train_model = theano.function(
+        inputs=[index],
+        outputs=cost,
+        updates=updates,
+        givens={
+            x: train_set_x[index * batch_size: (index + 1) * batch_size],
+            y: train_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+    # end-snippet-5
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is
+                           # found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+                                   # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+                                  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_validation_loss = numpy.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = timeit.default_timer()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            minibatch_avg_cost = train_model(minibatch_index)
+            # iteration number
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            if (iter + 1) % validation_frequency == 0:
+                # compute zero-one loss on validation set
+                validation_losses = [validate_model(i) for i
+                                     in xrange(n_valid_batches)]
+                this_validation_loss = numpy.mean(validation_losses)
+
+                print(
+                    'epoch %i, minibatch %i/%i, validation error %f %%' %
+                    (
+                        epoch,
+                        minibatch_index + 1,
+                        n_train_batches,
+                        this_validation_loss * 100.
+                    )
+                )
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+                    #improve patience if loss improvement is good enough
+                    if (
+                        this_validation_loss < best_validation_loss *
+                        improvement_threshold
+                    ):
+                        patience = max(patience, iter * patience_increase)
+
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+                    # test it on the test set
+                    test_losses = [test_model(i) for i
+                                   in xrange(n_test_batches)]
+                    test_score = numpy.mean(test_losses)
+
+                    print(('     epoch %i, minibatch %i/%i, test error of '
+                           'best model %f %%') %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           test_score * 100.))
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = timeit.default_timer()
+    print(('Optimization complete. Best validation score of %f %% '
+           'obtained at iteration %i, with test performance %f %%') %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+
+
+if __name__ == '__main__':
+    test_mlp()
+
+# Rectifier Linear Unit
+#Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
+def relu(x):
+    return T.switch(x<0, 0, x)
diff -r c63dac455296 -r 2e3c57fba632 Code/prepare_dataset.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Code/prepare_dataset.py	Sat Jul 25 21:51:16 2015 +0100
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jul 23 21:55:58 2015
+
+@author: paulochiliguano
+"""
+
+
+import tables
+import numpy as np
+import cPickle
+
+filename = '/homes/pchilguano/deep_learning/features/feats.h5'
+with tables.openFile(filename, 'r') as f:
+    features = f.root.x.read()
+    #filenames = f.root.filenames.read()
+
+#initial_shape = features.shape[1:]
+n_per_example = np.prod(features.shape[1:-1])
+number_of_features = features.shape[-1]
+flat_data = features.view()
+flat_data.shape = (features.shape[0], -1)
+#flat_targets = filenames.repeat(n_per_example)
+
+#genre = np.asarray([line.strip().split('\t')[1] for line in open(filename,'r').readlines()])
+
+
+filename = '/homes/pchilguano/deep_learning/lists/ground_truth.txt'
+with open(filename, 'r') as f:
+    tag_set = set()
+    for line in f:
+        tag = line.strip().split('\t')[1]
+        tag_set.add(tag)
+
+tag_dict = dict([(item, index) for index, item in enumerate(sorted(tag_set))])
+with open(filename, 'r') as f:
+    target = np.asarray([])
+    mp3_dict = {}
+    for line in f:
+        tag = line.strip().split('\t')[1]
+        target = np.append(target, tag_dict[tag])
+
+train_input, valid_input, test_input = np.array_split(flat_data, [flat_data.shape[0]*4/5, flat_data.shape[0]*9/10])
+train_target, valid_target, test_target = np.array_split(target, [target.shape[0]*4/5, target.shape[0]*9/10])
+
+f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'wb')
+cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL)
+f.close()