hybrid-music-recommender-using-content-based-and-social-information: Code/genre

annotate Code/genre_classification/learning/mlp.py @ 47:b0186d4a4496 tip

Move 7Digital dataset to Downloads

author	Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk>
date	Sat, 09 Jul 2022 00:50:43 -0500
parents	68a62ca32441
children

rev	line source
p@24	1 """
p@24	2 This tutorial introduces the multilayer perceptron using Theano.
p@24	3
p@24	4 A multilayer perceptron is a logistic regressor where
p@24	5 instead of feeding the input to the logistic regression you insert a
p@24	6 intermediate layer, called the hidden layer, that has a nonlinear
p@24	7 activation function (usually tanh or sigmoid) . One can use many such
p@24	8 hidden layers making the architecture deep. The tutorial will also tackle
p@24	9 the problem of MNIST digit classification.
p@24	10
p@24	11 .. math::
p@24	12
p@24	13 f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),
p@24	14
p@24	15 References:
p@24	16
p@24	17 - textbooks: "Pattern Recognition and Machine Learning" -
p@24	18 Christopher M. Bishop, section 5
p@24	19
p@24	20 """
p@24	21 __docformat__ = 'restructedtext en'
p@24	22
p@24	23
p@24	24 import os
p@24	25 import sys
p@24	26 import timeit
p@24	27
p@24	28 import numpy
p@24	29
p@24	30 import theano
p@24	31 import theano.tensor as T
p@24	32
p@24	33
p@24	34 from logistic_sgd import LogisticRegression, load_data
p@24	35
p@24	36
p@24	37 # start-snippet-1
p@24	38 class HiddenLayer(object):
p@24	39 def __init__(self, rng, input, n_in, n_out, W=None, b=None,
p@24	40 activation=T.tanh):
p@24	41 """
p@24	42 Typical hidden layer of a MLP: units are fully-connected and have
p@24	43 sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
p@24	44 and the bias vector b is of shape (n_out,).
p@24	45
p@24	46 NOTE : The nonlinearity used here is tanh
p@24	47
p@24	48 Hidden unit activation is given by: tanh(dot(input,W) + b)
p@24	49
p@24	50 :type rng: numpy.random.RandomState
p@24	51 :param rng: a random number generator used to initialize weights
p@24	52
p@24	53 :type input: theano.tensor.dmatrix
p@24	54 :param input: a symbolic tensor of shape (n_examples, n_in)
p@24	55
p@24	56 :type n_in: int
p@24	57 :param n_in: dimensionality of input
p@24	58
p@24	59 :type n_out: int
p@24	60 :param n_out: number of hidden units
p@24	61
p@24	62 :type activation: theano.Op or function
p@24	63 :param activation: Non linearity to be applied in the hidden
p@24	64 layer
p@24	65 """
p@24	66 self.input = input
p@24	67 # end-snippet-1
p@24	68
p@24	69 # `W` is initialized with `W_values` which is uniformely sampled
p@24	70 # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
p@24	71 # for tanh activation function
p@24	72 # the output of uniform if converted using asarray to dtype
p@24	73 # theano.config.floatX so that the code is runable on GPU
p@24	74 # Note : optimal initialization of weights is dependent on the
p@24	75 # activation function used (among other things).
p@24	76 # For example, results presented in [Xavier10] suggest that you
p@24	77 # should use 4 times larger initial weights for sigmoid
p@24	78 # compared to tanh
p@24	79 # We have no info for other function, so we use the same as
p@24	80 # tanh.
p@24	81 if W is None:
p@24	82 W_values = numpy.asarray(
p@24	83 rng.uniform(
p@24	84 low=-numpy.sqrt(6. / (n_in + n_out)),
p@24	85 high=numpy.sqrt(6. / (n_in + n_out)),
p@24	86 size=(n_in, n_out)
p@24	87 ),
p@24	88 dtype=theano.config.floatX
p@24	89 )
p@24	90 if activation == theano.tensor.nnet.sigmoid:
p@24	91 W_values *= 4
p@24	92
p@24	93 W = theano.shared(value=W_values, name='W', borrow=True)
p@24	94
p@24	95 if b is None:
p@24	96 b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
p@24	97 b = theano.shared(value=b_values, name='b', borrow=True)
p@24	98
p@24	99 self.W = W
p@24	100 self.b = b
p@24	101
p@24	102 lin_output = T.dot(input, self.W) + self.b
p@24	103 self.output = (
p@24	104 lin_output if activation is None
p@24	105 else activation(lin_output)
p@24	106 )
p@24	107 # parameters of the model
p@24	108 self.params = [self.W, self.b]
p@24	109
p@24	110
p@24	111 # start-snippet-2
p@24	112 class MLP(object):
p@24	113 """Multi-Layer Perceptron Class
p@24	114
p@24	115 A multilayer perceptron is a feedforward artificial neural network model
p@24	116 that has one layer or more of hidden units and nonlinear activations.
p@24	117 Intermediate layers usually have as activation function tanh or the
p@24	118 sigmoid function (defined here by a ``HiddenLayer`` class) while the
p@24	119 top layer is a softmax layer (defined here by a ``LogisticRegression``
p@24	120 class).
p@24	121 """
p@24	122
p@24	123 def __init__(self, rng, input, n_in, n_hidden, n_out):
p@24	124 """Initialize the parameters for the multilayer perceptron
p@24	125
p@24	126 :type rng: numpy.random.RandomState
p@24	127 :param rng: a random number generator used to initialize weights
p@24	128
p@24	129 :type input: theano.tensor.TensorType
p@24	130 :param input: symbolic variable that describes the input of the
p@24	131 architecture (one minibatch)
p@24	132
p@24	133 :type n_in: int
p@24	134 :param n_in: number of input units, the dimension of the space in
p@24	135 which the datapoints lie
p@24	136
p@24	137 :type n_hidden: int
p@24	138 :param n_hidden: number of hidden units
p@24	139
p@24	140 :type n_out: int
p@24	141 :param n_out: number of output units, the dimension of the space in
p@24	142 which the labels lie
p@24	143
p@24	144 """
p@24	145
p@24	146 # Since we are dealing with a one hidden layer MLP, this will translate
p@24	147 # into a HiddenLayer with a tanh activation function connected to the
p@24	148 # LogisticRegression layer; the activation function can be replaced by
p@24	149 # sigmoid or any other nonlinear function
p@24	150 self.hiddenLayer = HiddenLayer(
p@24	151 rng=rng,
p@24	152 input=input,
p@24	153 n_in=n_in,
p@24	154 n_out=n_hidden,
p@24	155 activation=T.tanh
p@24	156 )
p@24	157
p@24	158 # The logistic regression layer gets as input the hidden units
p@24	159 # of the hidden layer
p@24	160 self.logRegressionLayer = LogisticRegression(
p@24	161 input=self.hiddenLayer.output,
p@24	162 n_in=n_hidden,
p@24	163 n_out=n_out
p@24	164 )
p@24	165 # end-snippet-2 start-snippet-3
p@24	166 # L1 norm ; one regularization option is to enforce L1 norm to
p@24	167 # be small
p@24	168 self.L1 = (
p@24	169 abs(self.hiddenLayer.W).sum()
p@24	170 + abs(self.logRegressionLayer.W).sum()
p@24	171 )
p@24	172
p@24	173 # square of L2 norm ; one regularization option is to enforce
p@24	174 # square of L2 norm to be small
p@24	175 self.L2_sqr = (
p@24	176 (self.hiddenLayer.W ** 2).sum()
p@24	177 + (self.logRegressionLayer.W ** 2).sum()
p@24	178 )
p@24	179
p@24	180 # negative log likelihood of the MLP is given by the negative
p@24	181 # log likelihood of the output of the model, computed in the
p@24	182 # logistic regression layer
p@24	183 self.negative_log_likelihood = (
p@24	184 self.logRegressionLayer.negative_log_likelihood
p@24	185 )
p@24	186 # same holds for the function computing the number of errors
p@24	187 self.errors = self.logRegressionLayer.errors
p@24	188
p@24	189 # the parameters of the model are the parameters of the two layer it is
p@24	190 # made out of
p@24	191 self.params = self.hiddenLayer.params + self.logRegressionLayer.params
p@24	192 # end-snippet-3
p@24	193
p@24	194 # keep track of model input
p@24	195 self.input = input
p@24	196
p@24	197
p@24	198 def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
p@24	199 dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
p@24	200 """
p@24	201 Demonstrate stochastic gradient descent optimization for a multilayer
p@24	202 perceptron
p@24	203
p@24	204 This is demonstrated on MNIST.
p@24	205
p@24	206 :type learning_rate: float
p@24	207 :param learning_rate: learning rate used (factor for the stochastic
p@24	208 gradient
p@24	209
p@24	210 :type L1_reg: float
p@24	211 :param L1_reg: L1-norm's weight when added to the cost (see
p@24	212 regularization)
p@24	213
p@24	214 :type L2_reg: float
p@24	215 :param L2_reg: L2-norm's weight when added to the cost (see
p@24	216 regularization)
p@24	217
p@24	218 :type n_epochs: int
p@24	219 :param n_epochs: maximal number of epochs to run the optimizer
p@24	220
p@24	221 :type dataset: string
p@24	222 :param dataset: the path of the MNIST dataset file from
p@24	223 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
p@24	224
p@24	225
p@24	226 """
p@24	227 datasets = load_data(dataset)
p@24	228
p@24	229 train_set_x, train_set_y = datasets[0]
p@24	230 valid_set_x, valid_set_y = datasets[1]
p@24	231 test_set_x, test_set_y = datasets[2]
p@24	232
p@24	233 # compute number of minibatches for training, validation and testing
p@24	234 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	235 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	236 n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	237
p@24	238 ######################
p@24	239 # BUILD ACTUAL MODEL #
p@24	240 ######################
p@24	241 print '... building the model'
p@24	242
p@24	243 # allocate symbolic variables for the data
p@24	244 index = T.lscalar() # index to a [mini]batch
p@24	245 x = T.matrix('x') # the data is presented as rasterized images
p@24	246 y = T.ivector('y') # the labels are presented as 1D vector of
p@24	247 # [int] labels
p@24	248
p@24	249 rng = numpy.random.RandomState(1234)
p@24	250
p@24	251 # construct the MLP class
p@24	252 classifier = MLP(
p@24	253 rng=rng,
p@24	254 input=x,
p@24	255 n_in=28 * 28,
p@24	256 n_hidden=n_hidden,
p@24	257 n_out=10
p@24	258 )
p@24	259
p@24	260 # start-snippet-4
p@24	261 # the cost we minimize during training is the negative log likelihood of
p@24	262 # the model plus the regularization terms (L1 and L2); cost is expressed
p@24	263 # here symbolically
p@24	264 cost = (
p@24	265 classifier.negative_log_likelihood(y)
p@24	266 + L1_reg * classifier.L1
p@24	267 + L2_reg * classifier.L2_sqr
p@24	268 )
p@24	269 # end-snippet-4
p@24	270
p@24	271 # compiling a Theano function that computes the mistakes that are made
p@24	272 # by the model on a minibatch
p@24	273 test_model = theano.function(
p@24	274 inputs=[index],
p@24	275 outputs=classifier.errors(y),
p@24	276 givens={
p@24	277 x: test_set_x[index * batch_size:(index + 1) * batch_size],
p@24	278 y: test_set_y[index * batch_size:(index + 1) * batch_size]
p@24	279 }
p@24	280 )
p@24	281
p@24	282 validate_model = theano.function(
p@24	283 inputs=[index],
p@24	284 outputs=classifier.errors(y),
p@24	285 givens={
p@24	286 x: valid_set_x[index * batch_size:(index + 1) * batch_size],
p@24	287 y: valid_set_y[index * batch_size:(index + 1) * batch_size]
p@24	288 }
p@24	289 )
p@24	290
p@24	291 # start-snippet-5
p@24	292 # compute the gradient of cost with respect to theta (sotred in params)
p@24	293 # the resulting gradients will be stored in a list gparams
p@24	294 gparams = [T.grad(cost, param) for param in classifier.params]
p@24	295
p@24	296 # specify how to update the parameters of the model as a list of
p@24	297 # (variable, update expression) pairs
p@24	298
p@24	299 # given two lists of the same length, A = [a1, a2, a3, a4] and
p@24	300 # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
p@24	301 # element is a pair formed from the two lists :
p@24	302 # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
p@24	303 updates = [
p@24	304 (param, param - learning_rate * gparam)
p@24	305 for param, gparam in zip(classifier.params, gparams)
p@24	306 ]
p@24	307
p@24	308 # compiling a Theano function `train_model` that returns the cost, but
p@24	309 # in the same time updates the parameter of the model based on the rules
p@24	310 # defined in `updates`
p@24	311 train_model = theano.function(
p@24	312 inputs=[index],
p@24	313 outputs=cost,
p@24	314 updates=updates,
p@24	315 givens={
p@24	316 x: train_set_x[index * batch_size: (index + 1) * batch_size],
p@24	317 y: train_set_y[index * batch_size: (index + 1) * batch_size]
p@24	318 }
p@24	319 )
p@24	320 # end-snippet-5
p@24	321
p@24	322 ###############
p@24	323 # TRAIN MODEL #
p@24	324 ###############
p@24	325 print '... training'
p@24	326
p@24	327 # early-stopping parameters
p@24	328 patience = 10000 # look as this many examples regardless
p@24	329 patience_increase = 2 # wait this much longer when a new best is
p@24	330 # found
p@24	331 improvement_threshold = 0.995 # a relative improvement of this much is
p@24	332 # considered significant
p@24	333 validation_frequency = min(n_train_batches, patience / 2)
p@24	334 # go through this many
p@24	335 # minibatche before checking the network
p@24	336 # on the validation set; in this case we
p@24	337 # check every epoch
p@24	338
p@24	339 best_validation_loss = numpy.inf
p@24	340 best_iter = 0
p@24	341 test_score = 0.
p@24	342 start_time = timeit.default_timer()
p@24	343
p@24	344 epoch = 0
p@24	345 done_looping = False
p@24	346
p@24	347 while (epoch < n_epochs) and (not done_looping):
p@24	348 epoch = epoch + 1
p@24	349 for minibatch_index in xrange(n_train_batches):
p@24	350
p@24	351 minibatch_avg_cost = train_model(minibatch_index)
p@24	352 # iteration number
p@24	353 iter = (epoch - 1) * n_train_batches + minibatch_index
p@24	354
p@24	355 if (iter + 1) % validation_frequency == 0:
p@24	356 # compute zero-one loss on validation set
p@24	357 validation_losses = [validate_model(i) for i
p@24	358 in xrange(n_valid_batches)]
p@24	359 this_validation_loss = numpy.mean(validation_losses)
p@24	360
p@24	361 print(
p@24	362 'epoch %i, minibatch %i/%i, validation error %f %%' %
p@24	363 (
p@24	364 epoch,
p@24	365 minibatch_index + 1,
p@24	366 n_train_batches,
p@24	367 this_validation_loss * 100.
p@24	368 )
p@24	369 )
p@24	370
p@24	371 # if we got the best validation score until now
p@24	372 if this_validation_loss < best_validation_loss:
p@24	373 #improve patience if loss improvement is good enough
p@24	374 if (
p@24	375 this_validation_loss < best_validation_loss *
p@24	376 improvement_threshold
p@24	377 ):
p@24	378 patience = max(patience, iter * patience_increase)
p@24	379
p@24	380 best_validation_loss = this_validation_loss
p@24	381 best_iter = iter
p@24	382
p@24	383 # test it on the test set
p@24	384 test_losses = [test_model(i) for i
p@24	385 in xrange(n_test_batches)]
p@24	386 test_score = numpy.mean(test_losses)
p@24	387
p@24	388 print((' epoch %i, minibatch %i/%i, test error of '
p@24	389 'best model %f %%') %
p@24	390 (epoch, minibatch_index + 1, n_train_batches,
p@24	391 test_score * 100.))
p@24	392
p@24	393 if patience <= iter:
p@24	394 done_looping = True
p@24	395 break
p@24	396
p@24	397 end_time = timeit.default_timer()
p@24	398 print(('Optimization complete. Best validation score of %f %% '
p@24	399 'obtained at iteration %i, with test performance %f %%') %
p@24	400 (best_validation_loss * 100., best_iter + 1, test_score * 100.))
p@24	401 print >> sys.stderr, ('The code for file ' +
p@24	402 os.path.split(__file__)[1] +
p@24	403 ' ran for %.2fm' % ((end_time - start_time) / 60.))
p@24	404
p@24	405
p@24	406 if __name__ == '__main__':
p@24	407 test_mlp()
p@24	408
p@24	409 # Rectifier Linear Unit
p@24	410 #Source: http://stackoverflow.com/questions/26497564/theano-hiddenlayer-activation-function
p@24	411 def relu(x):
p@24	412 return T.maximum(0.,x)

Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information

annotate Code/genre_classification/learning/mlp.py @ 47:b0186d4a4496 tip