Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
comparison Code/genre_classification/classification/convolutional_mlp_7digital.py @ 24:68a62ca32441
Organized python scripts
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Sat, 15 Aug 2015 19:16:17 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
23:45e6f85d0ba4 | 24:68a62ca32441 |
---|---|
1 """This tutorial introduces the LeNet5 neural network architecture | |
2 using Theano. LeNet5 is a convolutional neural network, good for | |
3 classifying images. This tutorial shows how to build the architecture, | |
4 and comes with all the hyper-parameters you need to reproduce the | |
5 paper's MNIST results. | |
6 | |
7 | |
8 This implementation simplifies the model in the following ways: | |
9 | |
10 - LeNetConvPool doesn't implement location-specific gain and bias parameters | |
11 - LeNetConvPool doesn't implement pooling by average, it implements pooling | |
12 by max. | |
13 - Digit classification is implemented with a logistic regression rather than | |
14 an RBF network | |
15 - LeNet5 was not fully-connected convolutions at second layer | |
16 | |
17 References: | |
18 - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner: | |
19 Gradient-Based Learning Applied to Document | |
20 Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998. | |
21 http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf | |
22 | |
23 """ | |
24 import os | |
25 import sys | |
26 import timeit | |
27 | |
28 import numpy | |
29 | |
30 import theano | |
31 import theano.tensor as T | |
32 from theano.tensor.signal import downsample | |
33 from theano.tensor.nnet import conv | |
34 | |
35 from logistic_sgd import LogisticRegression, load_data | |
36 from mlp import HiddenLayer | |
37 | |
38 # Paulo: Additional libraries | |
39 import cPickle | |
40 from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams | |
41 | |
42 # Paulo: Rectifier Linear Unit | |
43 # Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function | |
44 def relu(x): | |
45 return T.maximum(0.,x) | |
46 | |
47 # Paulo: Random Streams | |
48 srng = RandomStreams() | |
49 | |
50 class LeNetConvPoolLayer(object): | |
51 """Pool Layer of a convolutional network """ | |
52 | |
53 def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): | |
54 """ | |
55 Allocate a LeNetConvPoolLayer with shared variable internal parameters. | |
56 | |
57 :type rng: numpy.random.RandomState | |
58 :param rng: a random number generator used to initialize weights | |
59 | |
60 :type input: theano.tensor.dtensor4 | |
61 :param input: symbolic image tensor, of shape image_shape | |
62 | |
63 :type filter_shape: tuple or list of length 4 | |
64 :param filter_shape: (number of filters, num input feature maps, | |
65 filter height, filter width) | |
66 | |
67 :type image_shape: tuple or list of length 4 | |
68 :param image_shape: (batch size, num input feature maps, | |
69 image height, image width) | |
70 | |
71 :type poolsize: tuple or list of length 2 | |
72 :param poolsize: the downsampling (pooling) factor (#rows, #cols) | |
73 """ | |
74 | |
75 assert image_shape[1] == filter_shape[1] | |
76 self.input = input | |
77 | |
78 # there are "num input feature maps * filter height * filter width" | |
79 # inputs to each hidden unit | |
80 fan_in = numpy.prod(filter_shape[1:]) | |
81 # each unit in the lower layer receives a gradient from: | |
82 # "num output feature maps * filter height * filter width" / | |
83 # pooling size | |
84 fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / | |
85 numpy.prod(poolsize)) | |
86 # initialize weights with random weights | |
87 W_bound = numpy.sqrt(6. / (fan_in + fan_out)) | |
88 self.W = theano.shared( | |
89 numpy.asarray( | |
90 rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), | |
91 dtype=theano.config.floatX | |
92 ), | |
93 borrow=True | |
94 ) | |
95 | |
96 # the bias is a 1D tensor -- one bias per output feature map | |
97 b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) | |
98 self.b = theano.shared(value=b_values, borrow=True) | |
99 | |
100 # convolve input feature maps with filters | |
101 conv_out = conv.conv2d( | |
102 input=input, | |
103 filters=self.W, | |
104 filter_shape=filter_shape, | |
105 image_shape=image_shape | |
106 ) | |
107 | |
108 # downsample each feature map individually, using maxpooling | |
109 pooled_out = downsample.max_pool_2d( | |
110 input=conv_out, | |
111 ds=poolsize, | |
112 ignore_border=True | |
113 ) | |
114 | |
115 # Paulo: dropout | |
116 # Source: https://github.com/Newmu/Theano-Tutorials/blob/master/5_convolutional_net.py | |
117 retain_prob = 1 - 0.20 | |
118 pooled_out *= srng.binomial( | |
119 pooled_out.shape, | |
120 p=retain_prob, | |
121 dtype=theano.config.floatX) | |
122 pooled_out /= retain_prob | |
123 | |
124 # add the bias term. Since the bias is a vector (1D array), we first | |
125 # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will | |
126 # thus be broadcasted across mini-batches and feature map | |
127 # width & height | |
128 #self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) | |
129 self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) | |
130 | |
131 # store parameters of this layer | |
132 self.params = [self.W, self.b] | |
133 | |
134 # keep track of model input | |
135 self.input = input | |
136 | |
137 ''' | |
138 def evaluate_lenet5(learning_rate=0.01, n_epochs=200, | |
139 dataset='mnist.pkl.gz', | |
140 nkerns=[32, 32], batch_size=10): | |
141 """ Demonstrates lenet on MNIST dataset | |
142 | |
143 :type learning_rate: float | |
144 :param learning_rate: learning rate used (factor for the stochastic | |
145 gradient) | |
146 | |
147 :type n_epochs: int | |
148 :param n_epochs: maximal number of epochs to run the optimizer | |
149 | |
150 :type dataset: string | |
151 :param dataset: path to the dataset used for training /testing (MNIST here) | |
152 | |
153 :type nkerns: list of ints | |
154 :param nkerns: number of kernels on each layer | |
155 """ | |
156 | |
157 rng = numpy.random.RandomState(23455) | |
158 | |
159 datasets = load_data(dataset) | |
160 | |
161 train_set_x, train_set_y = datasets[0] | |
162 valid_set_x, valid_set_y = datasets[1] | |
163 test_set_x, test_set_y = datasets[2] | |
164 | |
165 # compute number of minibatches for training, validation and testing | |
166 n_train_batches = train_set_x.get_value(borrow=True).shape[0] | |
167 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] | |
168 n_test_batches = test_set_x.get_value(borrow=True).shape[0] | |
169 | |
170 n_train_batches /= batch_size | |
171 n_valid_batches /= batch_size | |
172 n_test_batches /= batch_size | |
173 | |
174 # allocate symbolic variables for the data | |
175 index = T.lscalar() # index to a [mini]batch | |
176 | |
177 # start-snippet-1 | |
178 x = T.matrix('x') # the data is presented as rasterized images | |
179 y = T.ivector('y') # the labels are presented as 1D vector of | |
180 # [int] labels | |
181 | |
182 ###################### | |
183 # BUILD ACTUAL MODEL # | |
184 ###################### | |
185 print '... building the model' | |
186 | |
187 # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) | |
188 # to a 4D tensor, compatible with our LeNetConvPoolLayer | |
189 # (28, 28) is the size of MNIST images. | |
190 #layer0_input = x.reshape((batch_size, 1, 28, 28)) | |
191 layer0_input = x.reshape((batch_size, 1, 130, 128)) | |
192 # Construct the first convolutional pooling layer: | |
193 # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | |
194 # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | |
195 # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | |
196 layer0 = LeNetConvPoolLayer( | |
197 rng, | |
198 input=layer0_input, | |
199 #image_shape=(batch_size, 1, 28, 28), | |
200 image_shape=(batch_size, 1, 130, 128), | |
201 #filter_shape=(nkerns[0], 1, 5, 5), | |
202 filter_shape=(nkerns[0], 1, 8, 1), | |
203 #poolsize=(2, 2) | |
204 poolsize=(4, 1) | |
205 ) | |
206 | |
207 # Construct the second convolutional pooling layer | |
208 # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | |
209 # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | |
210 # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | |
211 layer1 = LeNetConvPoolLayer( | |
212 rng, | |
213 input=layer0.output, | |
214 #image_shape=(batch_size, nkerns[0], 12, 12), | |
215 image_shape=(batch_size, nkerns[0], 30, 128), | |
216 #filter_shape=(nkerns[1], nkerns[0], 5, 5), | |
217 filter_shape=(nkerns[1], nkerns[0], 8, 1), | |
218 #poolsize=(2, 2) | |
219 poolsize=(4, 1) | |
220 ) | |
221 | |
222 # the HiddenLayer being fully-connected, it operates on 2D matrices of | |
223 # shape (batch_size, num_pixels) (i.e matrix of rasterized images). | |
224 # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), | |
225 # or (500, 50 * 4 * 4) = (500, 800) with the default values. | |
226 layer2_input = layer1.output.flatten(2) | |
227 | |
228 # construct a fully-connected sigmoidal layer | |
229 layer2 = HiddenLayer( | |
230 rng, | |
231 input=layer2_input, | |
232 #n_in=nkerns[1] * 4 * 4, | |
233 n_in=nkerns[1] * 5 * 128, | |
234 n_out=500, | |
235 #n_out=100, | |
236 #activation=T.tanh | |
237 activation=relu | |
238 ) | |
239 | |
240 # classify the values of the fully-connected sigmoidal layer | |
241 layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | |
242 #layer4 = LogisticRegression(input=layer3.output, n_in=50, n_out=10) | |
243 | |
244 # the cost we minimize during training is the NLL of the model | |
245 cost = layer3.negative_log_likelihood(y) | |
246 | |
247 # create a function to compute the mistakes that are made by the model | |
248 test_model = theano.function( | |
249 [index], | |
250 layer3.errors(y), | |
251 givens={ | |
252 x: test_set_x[index * batch_size: (index + 1) * batch_size], | |
253 y: test_set_y[index * batch_size: (index + 1) * batch_size] | |
254 } | |
255 ) | |
256 | |
257 validate_model = theano.function( | |
258 [index], | |
259 layer3.errors(y), | |
260 givens={ | |
261 x: valid_set_x[index * batch_size: (index + 1) * batch_size], | |
262 y: valid_set_y[index * batch_size: (index + 1) * batch_size] | |
263 } | |
264 ) | |
265 | |
266 # Paulo: Set best param for MLP pre-training | |
267 f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'rb') | |
268 params0, params1, params2, params3 = cPickle.load(f) | |
269 f.close() | |
270 layer0.W.set_value(params0[0]) | |
271 layer0.b.set_value(params0[1]) | |
272 layer1.W.set_value(params1[0]) | |
273 layer1.b.set_value(params1[1]) | |
274 layer2.W.set_value(params2[0]) | |
275 layer2.b.set_value(params2[1]) | |
276 layer3.W.set_value(params3[0]) | |
277 layer3.b.set_value(params3[1]) | |
278 | |
279 # create a list of all model parameters to be fit by gradient descent | |
280 params = layer3.params + layer2.params + layer1.params + layer0.params | |
281 #params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params | |
282 | |
283 # create a list of gradients for all model parameters | |
284 grads = T.grad(cost, params) | |
285 | |
286 # train_model is a function that updates the model parameters by | |
287 # SGD Since this model has many parameters, it would be tedious to | |
288 # manually create an update rule for each model parameter. We thus | |
289 # create the updates list by automatically looping over all | |
290 # (params[i], grads[i]) pairs. | |
291 updates = [ | |
292 (param_i, param_i - learning_rate * grad_i) | |
293 for param_i, grad_i in zip(params, grads) | |
294 ] | |
295 | |
296 train_model = theano.function( | |
297 [index], | |
298 cost, | |
299 updates=updates, | |
300 givens={ | |
301 x: train_set_x[index * batch_size: (index + 1) * batch_size], | |
302 y: train_set_y[index * batch_size: (index + 1) * batch_size] | |
303 } | |
304 ) | |
305 # end-snippet-1 | |
306 | |
307 ############### | |
308 # TRAIN MODEL # | |
309 ############### | |
310 print '... training' | |
311 # early-stopping parameters | |
312 patience = 1000 # look as this many examples regardless | |
313 patience_increase = 2 # wait this much longer when a new best is | |
314 # found | |
315 improvement_threshold = 0.995 # a relative improvement of this much is | |
316 # considered significant | |
317 validation_frequency = min(n_train_batches, patience / 2) | |
318 # go through this many | |
319 # minibatche before checking the network | |
320 # on the validation set; in this case we | |
321 # check every epoch | |
322 | |
323 best_validation_loss = numpy.inf | |
324 best_iter = 0 | |
325 test_score = 0. | |
326 start_time = timeit.default_timer() | |
327 | |
328 epoch = 0 | |
329 done_looping = False | |
330 | |
331 while (epoch < n_epochs) and (not done_looping): | |
332 epoch = epoch + 1 | |
333 for minibatch_index in xrange(n_train_batches): | |
334 | |
335 iter = (epoch - 1) * n_train_batches + minibatch_index | |
336 | |
337 if iter % 100 == 0: | |
338 print 'training @ iter = ', iter | |
339 cost_ij = train_model(minibatch_index) | |
340 | |
341 if (iter + 1) % validation_frequency == 0: | |
342 | |
343 # compute zero-one loss on validation set | |
344 validation_losses = [validate_model(i) for i | |
345 in xrange(n_valid_batches)] | |
346 this_validation_loss = numpy.mean(validation_losses) | |
347 print('epoch %i, minibatch %i/%i, validation error %f %%' % | |
348 (epoch, minibatch_index + 1, n_train_batches, | |
349 this_validation_loss * 100.)) | |
350 | |
351 # if we got the best validation score until now | |
352 if this_validation_loss < best_validation_loss: | |
353 | |
354 #improve patience if loss improvement is good enough | |
355 if this_validation_loss < best_validation_loss * \ | |
356 improvement_threshold: | |
357 patience = max(patience, iter * patience_increase) | |
358 | |
359 # save best validation score and iteration number | |
360 best_validation_loss = this_validation_loss | |
361 best_iter = iter | |
362 | |
363 # test it on the test set | |
364 test_losses = [ | |
365 test_model(i) | |
366 for i in xrange(n_test_batches) | |
367 ] | |
368 test_score = numpy.mean(test_losses) | |
369 print((' epoch %i, minibatch %i/%i, test error of ' | |
370 'best model %f %%') % | |
371 (epoch, minibatch_index + 1, n_train_batches, | |
372 test_score * 100.)) | |
373 # Paulo: Get best parameters for MLP | |
374 best_params0 = [param.get_value().copy() for param in layer0.params] | |
375 best_params1 = [param.get_value().copy() for param in layer1.params] | |
376 best_params2 = [param.get_value().copy() for param in layer2.params] | |
377 best_params3 = [param.get_value().copy() for param in layer3.params] | |
378 #best_params4 = [param.get_value().copy() for param in layer4.params] | |
379 | |
380 if patience <= iter: | |
381 done_looping = True | |
382 break | |
383 | |
384 end_time = timeit.default_timer() | |
385 print('Optimization complete.') | |
386 print('Best validation score of %f %% obtained at iteration %i, ' | |
387 'with test performance %f %%' % | |
388 (best_validation_loss * 100., best_iter + 1, test_score * 100.)) | |
389 print >> sys.stderr, ('The code for file ' + | |
390 os.path.split(__file__)[1] + | |
391 ' ran for %.2fm' % ((end_time - start_time) / 60.)) | |
392 # Paulo: Save best param for MLP | |
393 f = file('/homes/pchilguano/deep_learning/best_params.pkl', 'wb') | |
394 cPickle.dump((best_params0, best_params1, best_params2, best_params3), f, protocol=cPickle.HIGHEST_PROTOCOL) | |
395 f.close() | |
396 ''' | |
397 def genres_lenet5(dataset, nkerns=[32, 32], batch_size=10): | |
398 """ | |
399 :type dataset: string | |
400 :param dataset: path to the dataset used for training /testing (MNIST here) | |
401 | |
402 :type nkerns: list of ints | |
403 :param nkerns: number of kernels on each layer | |
404 """ | |
405 | |
406 rng = numpy.random.RandomState(23455) | |
407 | |
408 f = file(dataset, 'rb') | |
409 data_x = cPickle.load(f) | |
410 f.close() | |
411 | |
412 test_set_x = theano.shared( | |
413 numpy.asarray( | |
414 data_x, | |
415 dtype=theano.config.floatX | |
416 ), | |
417 borrow=True | |
418 ) | |
419 | |
420 | |
421 #datasets = load_data(dataset) | |
422 | |
423 #train_set_x, train_set_y = datasets[0] | |
424 #valid_set_x, valid_set_y = datasets[1] | |
425 #test_set_x, test_set_y = datasets[2] | |
426 | |
427 # compute number of minibatches for training, validation and testing | |
428 #n_train_batches = train_set_x.get_value(borrow=True).shape[0] | |
429 #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] | |
430 n_test_batches = test_set_x.get_value(borrow=True).shape[0] | |
431 | |
432 #n_train_batches /= batch_size | |
433 #n_valid_batches /= batch_size | |
434 n_test_batches /= batch_size | |
435 | |
436 # allocate symbolic variables for the data | |
437 index = T.lscalar() # index to a [mini]batch | |
438 | |
439 # start-snippet-1 | |
440 x = T.matrix('x') # the data is presented as rasterized images | |
441 #y = T.ivector('y') # the labels are presented as 1D vector of | |
442 # [int] labels | |
443 | |
444 ###################### | |
445 # BUILD ACTUAL MODEL # | |
446 ###################### | |
447 print '... building the model' | |
448 | |
449 # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) | |
450 # to a 4D tensor, compatible with our LeNetConvPoolLayer | |
451 # (28, 28) is the size of MNIST images. | |
452 layer0_input = x.reshape((batch_size, 1, 130, 128)) | |
453 # Construct the first convolutional pooling layer: | |
454 # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | |
455 # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | |
456 # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | |
457 layer0 = LeNetConvPoolLayer( | |
458 rng, | |
459 input=layer0_input, | |
460 image_shape=(batch_size, 1, 130, 128), | |
461 filter_shape=(nkerns[0], 1, 8, 1), | |
462 poolsize=(4, 1) | |
463 ) | |
464 | |
465 # Construct the second convolutional pooling layer | |
466 # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | |
467 # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | |
468 # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | |
469 layer1 = LeNetConvPoolLayer( | |
470 rng, | |
471 input=layer0.output, | |
472 image_shape=(batch_size, nkerns[0], 30, 128), | |
473 filter_shape=(nkerns[1], nkerns[0], 8, 1), | |
474 poolsize=(4, 1) | |
475 ) | |
476 | |
477 # the HiddenLayer being fully-connected, it operates on 2D matrices of | |
478 # shape (batch_size, num_pixels) (i.e matrix of rasterized images). | |
479 # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), | |
480 # or (500, 50 * 4 * 4) = (500, 800) with the default values. | |
481 layer2_input = layer1.output.flatten(2) | |
482 | |
483 # construct a fully-connected sigmoidal layer | |
484 layer2 = HiddenLayer( | |
485 rng, | |
486 input=layer2_input, | |
487 n_in=nkerns[1] * 5 * 128, | |
488 n_out=500, | |
489 activation=relu | |
490 ) | |
491 | |
492 # classify the values of the fully-connected sigmoidal layer | |
493 layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | |
494 | |
495 # the cost we minimize during training is the NLL of the model | |
496 # cost = layer3.negative_log_likelihood(y) | |
497 ''' | |
498 # create a function to compute the mistakes that are made by the model | |
499 test_model = theano.function( | |
500 [index], | |
501 layer3.errors(y), | |
502 givens={ | |
503 x: test_set_x[index * batch_size: (index + 1) * batch_size], | |
504 y: test_set_y[index * batch_size: (index + 1) * batch_size] | |
505 } | |
506 ) | |
507 | |
508 validate_model = theano.function( | |
509 [index], | |
510 layer3.errors(y), | |
511 givens={ | |
512 x: valid_set_x[index * batch_size: (index + 1) * batch_size], | |
513 y: valid_set_y[index * batch_size: (index + 1) * batch_size] | |
514 } | |
515 ) | |
516 ''' | |
517 # Genre soft classification | |
518 test_model = theano.function( | |
519 [index], | |
520 layer3.p_y_given_x, | |
521 givens={ | |
522 x: test_set_x[index * batch_size: (index + 1) * batch_size] | |
523 } | |
524 ) | |
525 | |
526 # Paulo: Set best paramaters | |
527 f = file('/homes/pchilguano/msc_project/dataset/genre_classification/\ | |
528 best_params.pkl', 'rb') | |
529 params0, params1, params2, params3 = cPickle.load(f) | |
530 f.close() | |
531 layer0.W.set_value(params0[0]) | |
532 layer0.b.set_value(params0[1]) | |
533 layer1.W.set_value(params1[0]) | |
534 layer1.b.set_value(params1[1]) | |
535 layer2.W.set_value(params2[0]) | |
536 layer2.b.set_value(params2[1]) | |
537 layer3.W.set_value(params3[0]) | |
538 layer3.b.set_value(params3[1]) | |
539 | |
540 # Probabilities | |
541 print "Computing probabilities..." | |
542 start_time = timeit.default_timer() | |
543 genre_prob_batch = [test_model(i).tolist() for i in xrange(n_test_batches)] | |
544 end_time = timeit.default_timer() | |
545 print >> sys.stderr, ('The code for file ' + | |
546 os.path.split(__file__)[1] + | |
547 ' ran for %.2fm' % ((end_time - start_time) / 60.)) | |
548 genre_prob = [item for sublist in genre_prob_batch for item in sublist] | |
549 | |
550 filename = '/homes/pchilguano/msc_project/dataset/7digital/lists/\ | |
551 audio_files.txt' | |
552 with open(filename, 'r') as f: | |
553 songID = [line.strip().split('/')[-1][:-4] for line in f] | |
554 | |
555 items = dict(zip(songID, genre_prob)) | |
556 print "Saving songs feature vectors in dictionary..." | |
557 f = file('/homes/pchilguano/msc_project/dataset/genre_classification/\ | |
558 genre_prob.pkl', 'wb') | |
559 cPickle.dump(items, f, protocol=cPickle.HIGHEST_PROTOCOL) | |
560 f.close() | |
561 | |
562 ''' | |
563 # create a list of all model parameters to be fit by gradient descent | |
564 params = layer3.params + layer2.params + layer1.params + layer0.params | |
565 | |
566 # create a list of gradients for all model parameters | |
567 grads = T.grad(cost, params) | |
568 | |
569 # train_model is a function that updates the model parameters by | |
570 # SGD Since this model has many parameters, it would be tedious to | |
571 # manually create an update rule for each model parameter. We thus | |
572 # create the updates list by automatically looping over all | |
573 # (params[i], grads[i]) pairs. | |
574 updates = [ | |
575 (param_i, param_i - learning_rate * grad_i) | |
576 for param_i, grad_i in zip(params, grads) | |
577 ] | |
578 | |
579 train_model = theano.function( | |
580 [index], | |
581 cost, | |
582 updates=updates, | |
583 givens={ | |
584 x: train_set_x[index * batch_size: (index + 1) * batch_size], | |
585 y: train_set_y[index * batch_size: (index + 1) * batch_size] | |
586 } | |
587 ) | |
588 # end-snippet-1 | |
589 | |
590 ############### | |
591 # TRAIN MODEL # | |
592 ############### | |
593 print '... training' | |
594 # early-stopping parameters | |
595 patience = 1000 # look as this many examples regardless | |
596 patience_increase = 2 # wait this much longer when a new best is | |
597 # found | |
598 improvement_threshold = 0.995 # a relative improvement of this much is | |
599 # considered significant | |
600 validation_frequency = min(n_train_batches, patience / 2) | |
601 # go through this many | |
602 # minibatche before checking the network | |
603 # on the validation set; in this case we | |
604 # check every epoch | |
605 | |
606 best_validation_loss = numpy.inf | |
607 best_iter = 0 | |
608 test_score = 0. | |
609 start_time = timeit.default_timer() | |
610 | |
611 epoch = 0 | |
612 done_looping = False | |
613 | |
614 while (epoch < n_epochs) and (not done_looping): | |
615 epoch = epoch + 1 | |
616 for minibatch_index in xrange(n_train_batches): | |
617 | |
618 iter = (epoch - 1) * n_train_batches + minibatch_index | |
619 | |
620 if iter % 100 == 0: | |
621 print 'training @ iter = ', iter | |
622 cost_ij = train_model(minibatch_index) | |
623 | |
624 if (iter + 1) % validation_frequency == 0: | |
625 | |
626 # compute zero-one loss on validation set | |
627 validation_losses = [validate_model(i) for i | |
628 in xrange(n_valid_batches)] | |
629 this_validation_loss = numpy.mean(validation_losses) | |
630 print('epoch %i, minibatch %i/%i, validation error %f %%' % | |
631 (epoch, minibatch_index + 1, n_train_batches, | |
632 this_validation_loss * 100.)) | |
633 | |
634 # if we got the best validation score until now | |
635 if this_validation_loss < best_validation_loss: | |
636 | |
637 #improve patience if loss improvement is good enough | |
638 if this_validation_loss < best_validation_loss * \ | |
639 improvement_threshold: | |
640 patience = max(patience, iter * patience_increase) | |
641 | |
642 # save best validation score and iteration number | |
643 best_validation_loss = this_validation_loss | |
644 best_iter = iter | |
645 | |
646 # test it on the test set | |
647 test_losses = [ | |
648 test_model(i) | |
649 for i in xrange(n_test_batches) | |
650 ] | |
651 test_score = numpy.mean(test_losses) | |
652 print((' epoch %i, minibatch %i/%i, test error of ' | |
653 'best model %f %%') % | |
654 (epoch, minibatch_index + 1, n_train_batches, | |
655 test_score * 100.)) | |
656 # Paulo: Get best parameters for MLP | |
657 best_params0 = [param.get_value().copy() for param in layer0.params] | |
658 best_params1 = [param.get_value().copy() for param in layer1.params] | |
659 best_params2 = [param.get_value().copy() for param in layer2.params] | |
660 best_params3 = [param.get_value().copy() for param in layer3.params] | |
661 | |
662 if patience <= iter: | |
663 done_looping = True | |
664 break | |
665 | |
666 end_time = timeit.default_timer() | |
667 print('Optimization complete.') | |
668 print('Best validation score of %f %% obtained at iteration %i, ' | |
669 'with test performance %f %%' % | |
670 (best_validation_loss * 100., best_iter + 1, test_score * 100.)) | |
671 print >> sys.stderr, ('The code for file ' + | |
672 os.path.split(__file__)[1] + | |
673 ' ran for %.2fm' % ((end_time - start_time) / 60.)) | |
674 | |
675 # Paulo: Save best param for MLP | |
676 f = file('/homes/pchilguano/deep_learning/genre_prob.pkl', 'wb') | |
677 cPickle.dump((best_params0, best_params1, best_params2, best_params3), f, protocol=cPickle.HIGHEST_PROTOCOL) | |
678 f.close() | |
679 ''' | |
680 if __name__ == '__main__': | |
681 #evaluate_lenet5() | |
682 genres_lenet5( | |
683 dataset='/homes/pchilguano/msc_project/dataset/7digital/features/\ | |
684 feats.pkl' | |
685 ) | |
686 | |
687 #def experiment(state, channel): | |
688 # evaluate_lenet5(state.learning_rate, dataset=state.dataset) |