# HG changeset patch
# User Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
# Date 1438021477 -3600
# Node ID 68b8b088f50a4a05c95968d5ebc92d32cb7de563
# Parent  2e3c57fba632ede0ea2b4c15f8f1624f4f9a63f5
Code for pre-training

diff -r 2e3c57fba632 -r 68b8b088f50a Code/convolutional_mlp.py
--- a/Code/convolutional_mlp.py	Sat Jul 25 21:51:16 2015 +0100
+++ b/Code/convolutional_mlp.py	Mon Jul 27 19:24:37 2015 +0100
@@ -35,12 +35,17 @@
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
 
-# Rectifier Linear Unit
+# Paulo: Additional libraries
+import cPickle
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
+
+# Paulo: Rectifier Linear Unit
 # Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
 def relu(x):
-    return T.switch(x<0, 0, x)
+    return T.maximum(0.,x)
         
-
+# Paulo: Random Streams
+srng = RandomStreams()
 
 class LeNetConvPoolLayer(object):
     """Pool Layer of a convolutional network """
@@ -106,7 +111,16 @@
             ds=poolsize,
             ignore_border=True
         )
-
+        
+        # Paulo: dropout
+        # Source: https://github.com/Newmu/Theano-Tutorials/blob/master/5_convolutional_net.py
+        retain_prob = 1 - 0.25
+        pooled_out *= srng.binomial(
+            pooled_out.shape,
+            p=retain_prob,
+            dtype=theano.config.floatX)
+        pooled_out /= retain_prob
+        
         # add the bias term. Since the bias is a vector (1D array), we first
         # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
         # thus be broadcasted across mini-batches and feature map
@@ -123,7 +137,7 @@
 
 def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                     dataset='mnist.pkl.gz',
-                    nkerns=[20, 50, 70], batch_size=100):
+                    nkerns=[256, 256], batch_size=20):
     """ Demonstrates lenet on MNIST dataset
 
     :type learning_rate: float
@@ -174,7 +188,7 @@
     # to a 4D tensor, compatible with our LeNetConvPoolLayer
     # (28, 28) is the size of MNIST images.
     #layer0_input = x.reshape((batch_size, 1, 28, 28))
-    layer0_input = x.reshape((batch_size, 1, 1206, 128))
+    layer0_input = x.reshape((batch_size, 1, 1204, 513))
     # Construct the first convolutional pooling layer:
     # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
     # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
@@ -183,10 +197,11 @@
         rng,
         input=layer0_input,
         #image_shape=(batch_size, 1, 28, 28),
-        image_shape=(batch_size, 1, 1206, 128),
+        image_shape=(batch_size, 1, 1204, 513),
         #filter_shape=(nkerns[0], 1, 5, 5),
-        filter_shape=(nkerns[0], 1, 11, 11),
-        poolsize=(2, 2)
+        filter_shape=(nkerns[0], 1, 4, 513),
+        #poolsize=(2, 2)
+        poolsize=(4, 1)
     )
 
     # Construct the second convolutional pooling layer
@@ -197,50 +212,52 @@
         rng,
         input=layer0.output,
         #image_shape=(batch_size, nkerns[0], 12, 12),
-        image_shape=(batch_size, nkerns[0], 598, 59),
+        image_shape=(batch_size, nkerns[0], 300, 1),
         #filter_shape=(nkerns[1], nkerns[0], 5, 5),
-        filter_shape=(nkerns[1], nkerns[0], 5, 5),
-        poolsize=(2, 2)
+        filter_shape=(nkerns[1], nkerns[0], 4, 1),
+        #poolsize=(2, 2)
+        poolsize=(2, 1)
     )
     
     # Construct the third convolutional pooling layer
+    '''
     layer2 = LeNetConvPoolLayer(
         rng,
         input=layer1.output,
-        image_shape=(batch_size, nkerns[1], 297, 27),
+        image_shape=(batch_size, nkerns[1], 296, 123),
         filter_shape=(nkerns[2], nkerns[1], 5, 5),
         poolsize=(1, 1)
     )
-
+    '''
     # the HiddenLayer being fully-connected, it operates on 2D matrices of
     # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
     # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
     # or (500, 50 * 4 * 4) = (500, 800) with the default values.
-    layer3_input = layer2.output.flatten(2)
+    layer2_input = layer1.output.flatten(2)
 
     # construct a fully-connected sigmoidal layer
-    layer3 = HiddenLayer(
+    layer2 = HiddenLayer(
         rng,
-        input=layer3_input,
+        input=layer2_input,
         #n_in=nkerns[1] * 4 * 4,
-        n_in=nkerns[2] * 293 * 23,
+        n_in=nkerns[1] * 148 * 1,
         #n_out=500,
-        n_out=500,
+        n_out=513,
         #activation=T.tanh
         activation=relu
     )
 
     # classify the values of the fully-connected sigmoidal layer
     #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
-    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)
+    layer3 = LogisticRegression(input=layer2.output, n_in=513, n_out=10)
     
     # the cost we minimize during training is the NLL of the model
-    cost = layer4.negative_log_likelihood(y)
+    cost = layer3.negative_log_likelihood(y)
 
     # create a function to compute the mistakes that are made by the model
     test_model = theano.function(
         [index],
-        layer4.errors(y),
+        layer3.errors(y),
         givens={
             x: test_set_x[index * batch_size: (index + 1) * batch_size],
             y: test_set_y[index * batch_size: (index + 1) * batch_size]
@@ -249,15 +266,29 @@
 
     validate_model = theano.function(
         [index],
-        layer4.errors(y),
+        layer3.errors(y),
         givens={
             x: valid_set_x[index * batch_size: (index + 1) * batch_size],
             y: valid_set_y[index * batch_size: (index + 1) * batch_size]
         }
     )
-
+    
+    # Paulo: Set best param for MLP pre-training
+    f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'rb')
+    #params3 = cPickle.load(f)
+    params0, params1, params2, params3 = cPickle.load(f)
+    f.close()
+    #layer0.W.set_value(params0[0])
+    #layer0.b.set_value(params0[1])
+    layer1.W.set_value(params1[0])
+    layer1.b.set_value(params1[1])
+    layer2.W.set_value(params2[0])
+    layer2.b.set_value(params2[1])
+    layer3.W.set_value(params3[0])
+    layer3.b.set_value(params3[1])
+        
     # create a list of all model parameters to be fit by gradient descent
-    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
+    params = layer3.params + layer2.params + layer1.params + layer0.params
 
     # create a list of gradients for all model parameters
     grads = T.grad(cost, params)
@@ -349,7 +380,12 @@
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
                            test_score * 100.))
-
+                    # Paulo: Get best parameters for MLP
+                    best_params0 = [param.get_value().copy() for param in layer0.params]
+                    best_params1 = [param.get_value().copy() for param in layer1.params]
+                    best_params2 = [param.get_value().copy() for param in layer2.params]
+                    best_params3 = [param.get_value().copy() for param in layer3.params]
+                    
             if patience <= iter:
                 done_looping = True
                 break
@@ -362,7 +398,11 @@
     print >> sys.stderr, ('The code for file ' +
                           os.path.split(__file__)[1] +
                           ' ran for %.2fm' % ((end_time - start_time) / 60.))
-
+    # Paulo: Save best param for MLP
+    f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'wb')
+    cPickle.dump((best_params0, best_params1, best_params2, best_params3), f, protocol=cPickle.HIGHEST_PROTOCOL)
+    f.close()
+    
 if __name__ == '__main__':
     evaluate_lenet5()
 
diff -r 2e3c57fba632 -r 68b8b088f50a Code/eda.py
--- a/Code/eda.py	Sat Jul 25 21:51:16 2015 +0100
+++ b/Code/eda.py	Mon Jul 27 19:24:37 2015 +0100
@@ -5,6 +5,8 @@
 @author: paulochiliguano
 """
 
+
+import random
 import numpy as np
 from sklearn import mixture
 
@@ -38,15 +40,15 @@
                       "SOAKNZI12A58A79CAC": 3.0}
         }
 
-items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1],
-         "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1],
-         "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1],
-         "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1],
-         "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1],
-         "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1],
-         "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1],
-         "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1]}
-
+items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1, 5, 4, 1],
+         "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1, 5, 4, 1],
+         "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1, 5, 4, 1],
+         "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1, 5, 4, 1],
+         "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1],
+         "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1],
+         "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1],
+         "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]}
+'''
 profile = {"Profile0": [2.5, 4, 3.5, 3, 5, 4, 1],
            "Profile1": [2.5, 4, 3.5, 3, 5, 4, 1],
            "Profile2": [2.5, 4, 3.5, 3, 5, 4, 1],
@@ -55,9 +57,19 @@
            "Profile5": [2.5, 4, 3.5, 3, 5, 4, 1],
            "Profile6": [2.5, 4, 3.5, 3, 5, 4, 1],
            "Profile7": [2.5, 4, 3.5, 3, 5, 4, 1]}
-           
-           
+'''
 
+'''
+Generate M individuals uniformly
+'''
+np.random.seed(len(users))
+M = np.random.uniform(1, 5, len(users) * len(items.values()[0]))
+M.shape = (-1, len(items.values()[0]))
+profile = {}
+i = 0
+for row in M.tolist():
+    profile["Profile" + str(i)] = M.tolist()[i]
+    i = i + 1
 
 np.random.seed(1)
 g = mixture.GMM(n_components=7)
diff -r 2e3c57fba632 -r 68b8b088f50a Code/logistic_sgd.py
--- a/Code/logistic_sgd.py	Sat Jul 25 21:51:16 2015 +0100
+++ b/Code/logistic_sgd.py	Mon Jul 27 19:24:37 2015 +0100
@@ -99,9 +99,8 @@
         # x is a matrix where row-j  represents input training sample-j
         # b is a vector where element-k represent the free parameter of
         # hyperplane-k
-        #self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
-        self.p_y_given_x = relu(T.dot(input, self.W) + self.b)
-        #print(self.p_y_given_x)
+        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
+        #self.p_y_given_x = relu(T.dot(input, self.W) + self.b)
 
         # symbolic description of how to compute prediction as class whose
         # probability is maximal
@@ -204,11 +203,11 @@
         urllib.urlretrieve(origin, dataset)
 
     print '... loading data'
-    '''
+    
     # Load the dataset
-    #f = gzip.open(dataset, 'rb')
-    #train_set, valid_set, test_set = cPickle.load(f)
-    #f.close()
+    f = gzip.open(dataset, 'rb')
+    train_set, valid_set, test_set = cPickle.load(f)
+    f.close()'''
     f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'rb')
     train_set, valid_set, test_set = cPickle.load(f)
     f.close()
@@ -478,4 +477,4 @@
 # Rectifier Linear Unit
 #Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
 def relu(x):
-    return T.switch(x<0, 0, x)
+    return T.maximum(0.,x)
diff -r 2e3c57fba632 -r 68b8b088f50a Code/mlp.py
--- a/Code/mlp.py	Sat Jul 25 21:51:16 2015 +0100
+++ b/Code/mlp.py	Mon Jul 27 19:24:37 2015 +0100
@@ -409,4 +409,4 @@
 # Rectifier Linear Unit
 #Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
 def relu(x):
-    return T.switch(x<0, 0, x)
+    return T.maximum(0.,x)
diff -r 2e3c57fba632 -r 68b8b088f50a Code/prepare_dataset.py
--- a/Code/prepare_dataset.py	Sat Jul 25 21:51:16 2015 +0100
+++ b/Code/prepare_dataset.py	Mon Jul 27 19:24:37 2015 +0100
@@ -9,6 +9,7 @@
 import tables
 import numpy as np
 import cPickle
+import sklearn.preprocessing as preprocessing
 
 filename = '/homes/pchilguano/deep_learning/features/feats.h5'
 with tables.openFile(filename, 'r') as f:
@@ -19,12 +20,14 @@
 n_per_example = np.prod(features.shape[1:-1])
 number_of_features = features.shape[-1]
 flat_data = features.view()
-flat_data.shape = (features.shape[0], -1)
+flat_data.shape = (-1, number_of_features)
+scaler = preprocessing.StandardScaler().fit(flat_data)
+flat_data = scaler.transform(flat_data)
+#flat_data.shape = (features.shape[0], -1)
 #flat_targets = filenames.repeat(n_per_example)
 
 #genre = np.asarray([line.strip().split('\t')[1] for line in open(filename,'r').readlines()])
 
-
 filename = '/homes/pchilguano/deep_learning/lists/ground_truth.txt'
 with open(filename, 'r') as f:
     tag_set = set()
@@ -34,7 +37,7 @@
 
 tag_dict = dict([(item, index) for index, item in enumerate(sorted(tag_set))])
 with open(filename, 'r') as f:
-    target = np.asarray([])
+    target = np.asarray([], dtype='int32')
     mp3_dict = {}
     for line in f:
         tag = line.strip().split('\t')[1]
@@ -46,3 +49,12 @@
 f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'wb')
 cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL)
 f.close()
+
+flat_target = target.repeat(n_per_example)
+
+train_input, valid_input, test_input = np.array_split(flat_data, [flat_data.shape[0]*4/5, flat_data.shape[0]*9/10])
+train_target, valid_target, test_target = np.array_split(flat_target, [flat_target.shape[0]*4/5, flat_target.shape[0]*9/10])
+
+f = file('/homes/pchilguano/deep_learning/gtzan_logistic.pkl', 'wb')
+cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL)
+f.close()