Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
changeset 16:68b8b088f50a
Code for pre-training
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Mon, 27 Jul 2015 19:24:37 +0100 |
parents | 2e3c57fba632 |
children | ee13c193c76e |
files | Code/convolutional_mlp.py Code/eda.py Code/logistic_sgd.py Code/mlp.py Code/prepare_dataset.py |
diffstat | 5 files changed, 113 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/Code/convolutional_mlp.py Sat Jul 25 21:51:16 2015 +0100 +++ b/Code/convolutional_mlp.py Mon Jul 27 19:24:37 2015 +0100 @@ -35,12 +35,17 @@ from logistic_sgd import LogisticRegression, load_data from mlp import HiddenLayer -# Rectifier Linear Unit +# Paulo: Additional libraries +import cPickle +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams + +# Paulo: Rectifier Linear Unit # Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function def relu(x): - return T.switch(x<0, 0, x) + return T.maximum(0.,x) - +# Paulo: Random Streams +srng = RandomStreams() class LeNetConvPoolLayer(object): """Pool Layer of a convolutional network """ @@ -106,7 +111,16 @@ ds=poolsize, ignore_border=True ) - + + # Paulo: dropout + # Source: https://github.com/Newmu/Theano-Tutorials/blob/master/5_convolutional_net.py + retain_prob = 1 - 0.25 + pooled_out *= srng.binomial( + pooled_out.shape, + p=retain_prob, + dtype=theano.config.floatX) + pooled_out /= retain_prob + # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map @@ -123,7 +137,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', - nkerns=[20, 50, 70], batch_size=100): + nkerns=[256, 256], batch_size=20): """ Demonstrates lenet on MNIST dataset :type learning_rate: float @@ -174,7 +188,7 @@ # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. #layer0_input = x.reshape((batch_size, 1, 28, 28)) - layer0_input = x.reshape((batch_size, 1, 1206, 128)) + layer0_input = x.reshape((batch_size, 1, 1204, 513)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) @@ -183,10 +197,11 @@ rng, input=layer0_input, #image_shape=(batch_size, 1, 28, 28), - image_shape=(batch_size, 1, 1206, 128), + image_shape=(batch_size, 1, 1204, 513), #filter_shape=(nkerns[0], 1, 5, 5), - filter_shape=(nkerns[0], 1, 11, 11), - poolsize=(2, 2) + filter_shape=(nkerns[0], 1, 4, 513), + #poolsize=(2, 2) + poolsize=(4, 1) ) # Construct the second convolutional pooling layer @@ -197,50 +212,52 @@ rng, input=layer0.output, #image_shape=(batch_size, nkerns[0], 12, 12), - image_shape=(batch_size, nkerns[0], 598, 59), + image_shape=(batch_size, nkerns[0], 300, 1), #filter_shape=(nkerns[1], nkerns[0], 5, 5), - filter_shape=(nkerns[1], nkerns[0], 5, 5), - poolsize=(2, 2) + filter_shape=(nkerns[1], nkerns[0], 4, 1), + #poolsize=(2, 2) + poolsize=(2, 1) ) # Construct the third convolutional pooling layer + ''' layer2 = LeNetConvPoolLayer( rng, input=layer1.output, - image_shape=(batch_size, nkerns[1], 297, 27), + image_shape=(batch_size, nkerns[1], 296, 123), filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(1, 1) ) - + ''' # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. - layer3_input = layer2.output.flatten(2) + layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer - layer3 = HiddenLayer( + layer2 = HiddenLayer( rng, - input=layer3_input, + input=layer2_input, #n_in=nkerns[1] * 4 * 4, - n_in=nkerns[2] * 293 * 23, + n_in=nkerns[1] * 148 * 1, #n_out=500, - n_out=500, + n_out=513, #activation=T.tanh activation=relu ) # classify the values of the fully-connected sigmoidal layer #layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) - layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) + layer3 = LogisticRegression(input=layer2.output, n_in=513, n_out=10) # the cost we minimize during training is the NLL of the model - cost = layer4.negative_log_likelihood(y) + cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], - layer4.errors(y), + layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] @@ -249,15 +266,29 @@ validate_model = theano.function( [index], - layer4.errors(y), + layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) - + + # Paulo: Set best param for MLP pre-training + f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'rb') + #params3 = cPickle.load(f) + params0, params1, params2, params3 = cPickle.load(f) + f.close() + #layer0.W.set_value(params0[0]) + #layer0.b.set_value(params0[1]) + layer1.W.set_value(params1[0]) + layer1.b.set_value(params1[1]) + layer2.W.set_value(params2[0]) + layer2.b.set_value(params2[1]) + layer3.W.set_value(params3[0]) + layer3.b.set_value(params3[1]) + # create a list of all model parameters to be fit by gradient descent - params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params + params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) @@ -349,7 +380,12 @@ 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) - + # Paulo: Get best parameters for MLP + best_params0 = [param.get_value().copy() for param in layer0.params] + best_params1 = [param.get_value().copy() for param in layer1.params] + best_params2 = [param.get_value().copy() for param in layer2.params] + best_params3 = [param.get_value().copy() for param in layer3.params] + if patience <= iter: done_looping = True break @@ -362,7 +398,11 @@ print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) - + # Paulo: Save best param for MLP + f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'wb') + cPickle.dump((best_params0, best_params1, best_params2, best_params3), f, protocol=cPickle.HIGHEST_PROTOCOL) + f.close() + if __name__ == '__main__': evaluate_lenet5()
--- a/Code/eda.py Sat Jul 25 21:51:16 2015 +0100 +++ b/Code/eda.py Mon Jul 27 19:24:37 2015 +0100 @@ -5,6 +5,8 @@ @author: paulochiliguano """ + +import random import numpy as np from sklearn import mixture @@ -38,15 +40,15 @@ "SOAKNZI12A58A79CAC": 3.0} } -items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1], - "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1], - "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1], - "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1], - "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1], - "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1], - "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1], - "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1]} - +items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1, 5, 4, 1], + "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1, 5, 4, 1], + "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1, 5, 4, 1], + "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1, 5, 4, 1], + "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1], + "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1], + "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1], + "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]} +''' profile = {"Profile0": [2.5, 4, 3.5, 3, 5, 4, 1], "Profile1": [2.5, 4, 3.5, 3, 5, 4, 1], "Profile2": [2.5, 4, 3.5, 3, 5, 4, 1], @@ -55,9 +57,19 @@ "Profile5": [2.5, 4, 3.5, 3, 5, 4, 1], "Profile6": [2.5, 4, 3.5, 3, 5, 4, 1], "Profile7": [2.5, 4, 3.5, 3, 5, 4, 1]} - - +''' +''' +Generate M individuals uniformly +''' +np.random.seed(len(users)) +M = np.random.uniform(1, 5, len(users) * len(items.values()[0])) +M.shape = (-1, len(items.values()[0])) +profile = {} +i = 0 +for row in M.tolist(): + profile["Profile" + str(i)] = M.tolist()[i] + i = i + 1 np.random.seed(1) g = mixture.GMM(n_components=7)
--- a/Code/logistic_sgd.py Sat Jul 25 21:51:16 2015 +0100 +++ b/Code/logistic_sgd.py Mon Jul 27 19:24:37 2015 +0100 @@ -99,9 +99,8 @@ # x is a matrix where row-j represents input training sample-j # b is a vector where element-k represent the free parameter of # hyperplane-k - #self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) - self.p_y_given_x = relu(T.dot(input, self.W) + self.b) - #print(self.p_y_given_x) + self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) + #self.p_y_given_x = relu(T.dot(input, self.W) + self.b) # symbolic description of how to compute prediction as class whose # probability is maximal @@ -204,11 +203,11 @@ urllib.urlretrieve(origin, dataset) print '... loading data' - ''' + # Load the dataset - #f = gzip.open(dataset, 'rb') - #train_set, valid_set, test_set = cPickle.load(f) - #f.close() + f = gzip.open(dataset, 'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close()''' f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() @@ -478,4 +477,4 @@ # Rectifier Linear Unit #Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function def relu(x): - return T.switch(x<0, 0, x) + return T.maximum(0.,x)
--- a/Code/mlp.py Sat Jul 25 21:51:16 2015 +0100 +++ b/Code/mlp.py Mon Jul 27 19:24:37 2015 +0100 @@ -409,4 +409,4 @@ # Rectifier Linear Unit #Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function def relu(x): - return T.switch(x<0, 0, x) + return T.maximum(0.,x)
--- a/Code/prepare_dataset.py Sat Jul 25 21:51:16 2015 +0100 +++ b/Code/prepare_dataset.py Mon Jul 27 19:24:37 2015 +0100 @@ -9,6 +9,7 @@ import tables import numpy as np import cPickle +import sklearn.preprocessing as preprocessing filename = '/homes/pchilguano/deep_learning/features/feats.h5' with tables.openFile(filename, 'r') as f: @@ -19,12 +20,14 @@ n_per_example = np.prod(features.shape[1:-1]) number_of_features = features.shape[-1] flat_data = features.view() -flat_data.shape = (features.shape[0], -1) +flat_data.shape = (-1, number_of_features) +scaler = preprocessing.StandardScaler().fit(flat_data) +flat_data = scaler.transform(flat_data) +#flat_data.shape = (features.shape[0], -1) #flat_targets = filenames.repeat(n_per_example) #genre = np.asarray([line.strip().split('\t')[1] for line in open(filename,'r').readlines()]) - filename = '/homes/pchilguano/deep_learning/lists/ground_truth.txt' with open(filename, 'r') as f: tag_set = set() @@ -34,7 +37,7 @@ tag_dict = dict([(item, index) for index, item in enumerate(sorted(tag_set))]) with open(filename, 'r') as f: - target = np.asarray([]) + target = np.asarray([], dtype='int32') mp3_dict = {} for line in f: tag = line.strip().split('\t')[1] @@ -46,3 +49,12 @@ f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'wb') cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() + +flat_target = target.repeat(n_per_example) + +train_input, valid_input, test_input = np.array_split(flat_data, [flat_data.shape[0]*4/5, flat_data.shape[0]*9/10]) +train_target, valid_target, test_target = np.array_split(flat_target, [flat_target.shape[0]*4/5, flat_target.shape[0]*9/10]) + +f = file('/homes/pchilguano/deep_learning/gtzan_logistic.pkl', 'wb') +cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL) +f.close()