Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
comparison Code/genre_classification/learning/convolutional_mlp.py @ 24:68a62ca32441
Organized python scripts
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Sat, 15 Aug 2015 19:16:17 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
23:45e6f85d0ba4 | 24:68a62ca32441 |
---|---|
1 """This tutorial introduces the LeNet5 neural network architecture | |
2 using Theano. LeNet5 is a convolutional neural network, good for | |
3 classifying images. This tutorial shows how to build the architecture, | |
4 and comes with all the hyper-parameters you need to reproduce the | |
5 paper's MNIST results. | |
6 | |
7 | |
8 This implementation simplifies the model in the following ways: | |
9 | |
10 - LeNetConvPool doesn't implement location-specific gain and bias parameters | |
11 - LeNetConvPool doesn't implement pooling by average, it implements pooling | |
12 by max. | |
13 - Digit classification is implemented with a logistic regression rather than | |
14 an RBF network | |
15 - LeNet5 was not fully-connected convolutions at second layer | |
16 | |
17 References: | |
18 - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: | |
19 Gradient-Based Learning Applied to Document | |
20 Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998. | |
21 http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf | |
22 | |
23 """ | |
24 import os | |
25 import sys | |
26 import timeit | |
27 | |
28 import numpy | |
29 | |
30 import theano | |
31 import theano.tensor as T | |
32 from theano.tensor.signal import downsample | |
33 from theano.tensor.nnet import conv | |
34 | |
35 from logistic_sgd import LogisticRegression, load_data | |
36 from mlp import HiddenLayer | |
37 | |
38 # Paulo Chiliguano: Additional libraries | |
39 import cPickle | |
40 from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams | |
41 | |
42 # Paulo Chiliguano: Rectifier Linear Unit | |
43 # Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function | |
44 def relu(x): | |
45 return T.maximum(0.,x) | |
46 | |
47 # Paulo: Random Streams | |
48 srng = RandomStreams(seed=234) | |
49 | |
50 class LeNetConvPoolLayer(object): | |
51 """Pool Layer of a convolutional network """ | |
52 | |
53 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): | |
54 """ | |
55 Allocate a LeNetConvPoolLayer with shared variable internal parameters. | |
56 | |
57 :type rng: numpy.random.RandomState | |
58 :param rng: a random number generator used to initialize weights | |
59 | |
60 :type input: theano.tensor.dtensor4 | |
61 :param input: symbolic image tensor, of shape image_shape | |
62 | |
63 :type filter_shape: tuple or list of length 4 | |
64 :param filter_shape: (number of filters, num input feature maps, | |
65 filter height, filter width) | |
66 | |
67 :type image_shape: tuple or list of length 4 | |
68 :param image_shape: (batch size, num input feature maps, | |
69 image height, image width) | |
70 | |
71 :type poolsize: tuple or list of length 2 | |
72 :param poolsize: the downsampling (pooling) factor (#rows, #cols) | |
73 """ | |
74 | |
75 assert image_shape[1] == filter_shape[1] | |
76 self.input = input | |
77 | |
78 # there are "num input feature maps * filter height * filter width" | |
79 # inputs to each hidden unit | |
80 fan_in = numpy.prod(filter_shape[1:]) | |
81 # each unit in the lower layer receives a gradient from: | |
82 # "num output feature maps * filter height * filter width" / | |
83 # pooling size | |
84 fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / | |
85 numpy.prod(poolsize)) | |
86 # initialize weights with random weights | |
87 W_bound = numpy.sqrt(6. / (fan_in + fan_out)) | |
88 self.W = theano.shared( | |
89 numpy.asarray( | |
90 rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), | |
91 dtype=theano.config.floatX | |
92 ), | |
93 borrow=True | |
94 ) | |
95 | |
96 # the bias is a 1D tensor -- one bias per output feature map | |
97 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) | |
98 self.b = theano.shared(value=b_values, borrow=True) | |
99 | |
100 # convolve input feature maps with filters | |
101 conv_out = conv.conv2d( | |
102 input=input, | |
103 filters=self.W, | |
104 filter_shape=filter_shape, | |
105 image_shape=image_shape | |
106 ) | |
107 | |
108 # downsample each feature map individually, using maxpooling | |
109 pooled_out = downsample.max_pool_2d( | |
110 input=conv_out, | |
111 ds=poolsize, | |
112 ignore_border=True | |
113 ) | |
114 | |
115 # Paulo: dropout | |
116 # Source: https://github.com/Newmu/Theano-Tutorials/blob/master/5_convolutional_net.py | |
117 retain_prob = 1 - 0.20 | |
118 pooled_out *= srng.binomial( | |
119 pooled_out.shape, | |
120 p=retain_prob, | |
121 dtype=theano.config.floatX) | |
122 pooled_out /= retain_prob | |
123 | |
124 # add the bias term. Since the bias is a vector (1D array), we first | |
125 # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will | |
126 # thus be broadcasted across mini-batches and feature map | |
127 # width & height | |
128 #self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) | |
129 self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) | |
130 | |
131 # store parameters of this layer | |
132 self.params = [self.W, self.b] | |
133 | |
134 # keep track of model input | |
135 self.input = input | |
136 | |
137 | |
138 def evaluate_lenet5(learning_rate=0.1, n_epochs=200, | |
139 dataset='mnist.pkl.gz', | |
140 nkerns=[20, 50], batch_size=500): | |
141 """ Demonstrates lenet on MNIST dataset | |
142 | |
143 :type learning_rate: float | |
144 :param learning_rate: learning rate used (factor for the stochastic | |
145 gradient) | |
146 | |
147 :type n_epochs: int | |
148 :param n_epochs: maximal number of epochs to run the optimizer | |
149 | |
150 :type dataset: string | |
151 :param dataset: path to the dataset used for training /testing (MNIST here) | |
152 | |
153 :type nkerns: list of ints | |
154 :param nkerns: number of kernels on each layer | |
155 """ | |
156 | |
157 rng = numpy.random.RandomState(23455) | |
158 | |
159 datasets = load_data(dataset) | |
160 | |
161 train_set_x, train_set_y = datasets[0] | |
162 valid_set_x, valid_set_y = datasets[1] | |
163 test_set_x, test_set_y = datasets[2] | |
164 | |
165 # compute number of minibatches for training, validation and testing | |
166 n_train_batches = train_set_x.get_value(borrow=True).shape[0] | |
167 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] | |
168 n_test_batches = test_set_x.get_value(borrow=True).shape[0] | |
169 | |
170 n_train_batches /= batch_size | |
171 n_valid_batches /= batch_size | |
172 n_test_batches /= batch_size | |
173 | |
174 # allocate symbolic variables for the data | |
175 index = T.lscalar() # index to a [mini]batch | |
176 | |
177 # start-snippet-1 | |
178 x = T.matrix('x') # the data is presented as rasterized images | |
179 y = T.ivector('y') # the labels are presented as 1D vector of | |
180 # [int] labels | |
181 | |
182 ###################### | |
183 # BUILD ACTUAL MODEL # | |
184 ###################### | |
185 print '... building the model' | |
186 | |
187 # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) | |
188 # to a 4D tensor, compatible with our LeNetConvPoolLayer | |
189 # (28, 28) is the size of MNIST images. | |
190 #layer0_input = x.reshape((batch_size, 1, 28, 28)) | |
191 layer0_input = x.reshape((batch_size, 1, 130, 128)) | |
192 # Construct the first convolutional pooling layer: | |
193 # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | |
194 # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | |
195 # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | |
196 layer0 = LeNetConvPoolLayer( | |
197 rng, | |
198 input=layer0_input, | |
199 #image_shape=(batch_size, 1, 28, 28), | |
200 image_shape=(batch_size, 1, 130, 128), | |
201 #filter_shape=(nkerns[0], 1, 5, 5), | |
202 filter_shape=(nkerns[0], 1, 8, 1), | |
203 #poolsize=(2, 2) | |
204 poolsize=(4, 1) | |
205 ) | |
206 | |
207 # Construct the second convolutional pooling layer | |
208 # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | |
209 # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | |
210 # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | |
211 layer1 = LeNetConvPoolLayer( | |
212 rng, | |
213 input=layer0.output, | |
214 #image_shape=(batch_size, nkerns[0], 12, 12), | |
215 image_shape=(batch_size, nkerns[0], 30, 128), | |
216 #filter_shape=(nkerns[1], nkerns[0], 5, 5), | |
217 filter_shape=(nkerns[1], nkerns[0], 8, 1), | |
218 #poolsize=(2, 2) | |
219 poolsize=(4, 1) | |
220 ) | |
221 | |
222 # the HiddenLayer being fully-connected, it operates on 2D matrices of | |
223 # shape (batch_size, num_pixels) (i.e matrix of rasterized images). | |
224 # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), | |
225 # or (500, 50 * 4 * 4) = (500, 800) with the default values. | |
226 layer2_input = layer1.output.flatten(2) | |
227 | |
228 # construct a fully-connected sigmoidal layer | |
229 layer2 = HiddenLayer( | |
230 rng, | |
231 input=layer2_input, | |
232 #n_in=nkerns[1] * 4 * 4, | |
233 n_in=nkerns[1] * 5 * 128, | |
234 n_out=500, | |
235 #n_out=100, | |
236 #activation=T.tanh | |
237 activation=relu | |
238 ) | |
239 | |
240 # classify the values of the fully-connected sigmoidal layer | |
241 layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | |
242 #layer4 = LogisticRegression(input=layer3.output, n_in=50, n_out=10) | |
243 | |
244 # the cost we minimize during training is the NLL of the model | |
245 cost = layer3.negative_log_likelihood(y) | |
246 | |
247 # create a function to compute the mistakes that are made by the model | |
248 test_model = theano.function( | |
249 [index], | |
250 layer3.errors(y), | |
251 givens={ | |
252 x: test_set_x[index * batch_size: (index + 1) * batch_size], | |
253 y: test_set_y[index * batch_size: (index + 1) * batch_size] | |
254 } | |
255 ) | |
256 | |
257 validate_model = theano.function( | |
258 [index], | |
259 layer3.errors(y), | |
260 givens={ | |
261 x: valid_set_x[index * batch_size: (index + 1) * batch_size], | |
262 y: valid_set_y[index * batch_size: (index + 1) * batch_size] | |
263 } | |
264 ) | |
265 | |
266 # Paulo: Set best param for MLP pre-training | |
267 f = file('/homes/pchilguano/msc_project/dataset/genre_classification/\ | |
268 best_params.pkl', 'rb') | |
269 params0, params1, params2, params3 = cPickle.load(f) | |
270 f.close() | |
271 layer0.W.set_value(params0[0]) | |
272 layer0.b.set_value(params0[1]) | |
273 layer1.W.set_value(params1[0]) | |
274 layer1.b.set_value(params1[1]) | |
275 layer2.W.set_value(params2[0]) | |
276 layer2.b.set_value(params2[1]) | |
277 layer3.W.set_value(params3[0]) | |
278 layer3.b.set_value(params3[1]) | |
279 | |
280 # create a list of all model parameters to be fit by gradient descent | |
281 params = layer3.params + layer2.params + layer1.params + layer0.params | |
282 #params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params | |
283 | |
284 # create a list of gradients for all model parameters | |
285 grads = T.grad(cost, params) | |
286 | |
287 # train_model is a function that updates the model parameters by | |
288 # SGD Since this model has many parameters, it would be tedious to | |
289 # manually create an update rule for each model parameter. We thus | |
290 # create the updates list by automatically looping over all | |
291 # (params[i], grads[i]) pairs. | |
292 updates = [ | |
293 (param_i, param_i - learning_rate * grad_i) | |
294 for param_i, grad_i in zip(params, grads) | |
295 ] | |
296 | |
297 train_model = theano.function( | |
298 [index], | |
299 cost, | |
300 updates=updates, | |
301 givens={ | |
302 x: train_set_x[index * batch_size: (index + 1) * batch_size], | |
303 y: train_set_y[index * batch_size: (index + 1) * batch_size] | |
304 } | |
305 ) | |
306 # end-snippet-1 | |
307 | |
308 ############### | |
309 # TRAIN MODEL # | |
310 ############### | |
311 print '... training' | |
312 # early-stopping parameters | |
313 patience = 1000 # look as this many examples regardless | |
314 patience_increase = 2 # wait this much longer when a new best is | |
315 # found | |
316 improvement_threshold = 0.995 # a relative improvement of this much is | |
317 # considered significant | |
318 validation_frequency = min(n_train_batches, patience / 2) | |
319 # go through this many | |
320 # minibatche before checking the network | |
321 # on the validation set; in this case we | |
322 # check every epoch | |
323 | |
324 best_validation_loss = numpy.inf | |
325 best_iter = 0 | |
326 test_score = 0. | |
327 start_time = timeit.default_timer() | |
328 | |
329 epoch = 0 | |
330 done_looping = False | |
331 | |
332 while (epoch < n_epochs) and (not done_looping): | |
333 epoch = epoch + 1 | |
334 for minibatch_index in xrange(n_train_batches): | |
335 | |
336 iter = (epoch - 1) * n_train_batches + minibatch_index | |
337 | |
338 if iter % 100 == 0: | |
339 print 'training @ iter = ', iter | |
340 cost_ij = train_model(minibatch_index) | |
341 | |
342 if (iter + 1) % validation_frequency == 0: | |
343 | |
344 # compute zero-one loss on validation set | |
345 validation_losses = [validate_model(i) for i | |
346 in xrange(n_valid_batches)] | |
347 this_validation_loss = numpy.mean(validation_losses) | |
348 print('epoch %i, minibatch %i/%i, validation error %f %%' % | |
349 (epoch, minibatch_index + 1, n_train_batches, | |
350 this_validation_loss * 100.)) | |
351 | |
352 # if we got the best validation score until now | |
353 if this_validation_loss < best_validation_loss: | |
354 | |
355 #improve patience if loss improvement is good enough | |
356 if this_validation_loss < best_validation_loss * \ | |
357 improvement_threshold: | |
358 patience = max(patience, iter * patience_increase) | |
359 | |
360 # save best validation score and iteration number | |
361 best_validation_loss = this_validation_loss | |
362 best_iter = iter | |
363 | |
364 # test it on the test set | |
365 test_losses = [ | |
366 test_model(i) | |
367 for i in xrange(n_test_batches) | |
368 ] | |
369 test_score = numpy.mean(test_losses) | |
370 print((' epoch %i, minibatch %i/%i, test error of ' | |
371 'best model %f %%') % | |
372 (epoch, minibatch_index + 1, n_train_batches, | |
373 test_score * 100.)) | |
374 # Paulo: Get best parameters for MLP | |
375 best_params0 = [param.get_value().copy() for param in layer0.params] | |
376 best_params1 = [param.get_value().copy() for param in layer1.params] | |
377 best_params2 = [param.get_value().copy() for param in layer2.params] | |
378 best_params3 = [param.get_value().copy() for param in layer3.params] | |
379 #best_params4 = [param.get_value().copy() for param in layer4.params] | |
380 | |
381 if patience <= iter: | |
382 done_looping = True | |
383 break | |
384 | |
385 end_time = timeit.default_timer() | |
386 print('Optimization complete.') | |
387 print('Best validation score of %f %% obtained at iteration %i, ' | |
388 'with test performance %f %%' % | |
389 (best_validation_loss * 100., best_iter + 1, test_score * 100.)) | |
390 print >> sys.stderr, ('The code for file ' + | |
391 os.path.split(__file__)[1] + | |
392 ' ran for %.2fm' % ((end_time - start_time) / 60.)) | |
393 # Paulo: Save best param for MLP | |
394 f = file('/homes/pchilguano/msc_project/dataset/genre_classification/\ | |
395 best_params.pkl', 'wb') | |
396 cPickle.dump( | |
397 (best_params0, best_params1, best_params2, best_params3), | |
398 f, | |
399 protocol=cPickle.HIGHEST_PROTOCOL | |
400 ) | |
401 f.close() | |
402 | |
403 if __name__ == '__main__': | |
404 evaluate_lenet5( | |
405 learning_rate=0.01, | |
406 n_epochs=200, | |
407 dataset='/homes/pchilguano/msc_project/dataset/gtzan/features/\ | |
408 gtzan_3sec_2.pkl', | |
409 nkerns=[32, 32], | |
410 batch_size=10 | |
411 ) | |
412 | |
413 def experiment(state, channel): | |
414 evaluate_lenet5(state.learning_rate, dataset=state.dataset) | |
415 |