hybrid-music-recommender-using-content-based-and-social-information: Code/genre_classification/learning/logistic

annotate Code/genre_classification/learning/logistic_sgd.py @ 47:b0186d4a4496 tip

Move 7Digital dataset to Downloads

author	Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk>
date	Sat, 09 Jul 2022 00:50:43 -0500
parents	68a62ca32441
children

rev	line source
p@24	1 """
p@24	2 This tutorial introduces logistic regression using Theano and stochastic
p@24	3 gradient descent.
p@24	4
p@24	5 Logistic regression is a probabilistic, linear classifier. It is parametrized
p@24	6 by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
p@24	7 done by projecting data points onto a set of hyperplanes, the distance to
p@24	8 which is used to determine a class membership probability.
p@24	9
p@24	10 Mathematically, this can be written as:
p@24	11
p@24	12 .. math::
p@24	13 P(Y=i\|x, W,b) &= softmax_i(W x + b) \\
p@24	14 &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
p@24	15
p@24	16
p@24	17 The output of the model or prediction is then done by taking the argmax of
p@24	18 the vector whose i'th element is P(Y=i\|x).
p@24	19
p@24	20 .. math::
p@24	21
p@24	22 y_{pred} = argmax_i P(Y=i\|x,W,b)
p@24	23
p@24	24
p@24	25 This tutorial presents a stochastic gradient descent optimization method
p@24	26 suitable for large datasets.
p@24	27
p@24	28
p@24	29 References:
p@24	30
p@24	31 - textbooks: "Pattern Recognition and Machine Learning" -
p@24	32 Christopher M. Bishop, section 4.3.2
p@24	33
p@24	34 """
p@24	35 __docformat__ = 'restructedtext en'
p@24	36
p@24	37 import cPickle
p@24	38 import gzip
p@24	39 import os
p@24	40 import sys
p@24	41 import timeit
p@24	42
p@24	43 import numpy
p@24	44
p@24	45 import theano
p@24	46 import theano.tensor as T
p@24	47
p@24	48
p@24	49 class LogisticRegression(object):
p@24	50 """Multi-class Logistic Regression Class
p@24	51
p@24	52 The logistic regression is fully described by a weight matrix :math:`W`
p@24	53 and bias vector :math:`b`. Classification is done by projecting data
p@24	54 points onto a set of hyperplanes, the distance to which is used to
p@24	55 determine a class membership probability.
p@24	56 """
p@24	57
p@24	58 def __init__(self, input, n_in, n_out):
p@24	59 """ Initialize the parameters of the logistic regression
p@24	60
p@24	61 :type input: theano.tensor.TensorType
p@24	62 :param input: symbolic variable that describes the input of the
p@24	63 architecture (one minibatch)
p@24	64
p@24	65 :type n_in: int
p@24	66 :param n_in: number of input units, the dimension of the space in
p@24	67 which the datapoints lie
p@24	68
p@24	69 :type n_out: int
p@24	70 :param n_out: number of output units, the dimension of the space in
p@24	71 which the labels lie
p@24	72
p@24	73 """
p@24	74 # start-snippet-1
p@24	75 # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
p@24	76 self.W = theano.shared(
p@24	77 value=numpy.zeros(
p@24	78 (n_in, n_out),
p@24	79 dtype=theano.config.floatX
p@24	80 ),
p@24	81 name='W',
p@24	82 borrow=True
p@24	83 )
p@24	84 # initialize the baises b as a vector of n_out 0s
p@24	85 self.b = theano.shared(
p@24	86 value=numpy.zeros(
p@24	87 (n_out,),
p@24	88 dtype=theano.config.floatX
p@24	89 ),
p@24	90 name='b',
p@24	91 borrow=True
p@24	92 )
p@24	93
p@24	94 # symbolic expression for computing the matrix of class-membership
p@24	95 # probabilities
p@24	96 # Where:
p@24	97 # W is a matrix where column-k represent the separation hyperplane for
p@24	98 # class-k
p@24	99 # x is a matrix where row-j represents input training sample-j
p@24	100 # b is a vector where element-k represent the free parameter of
p@24	101 # hyperplane-k
p@24	102 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
p@24	103
p@24	104 # symbolic description of how to compute prediction as class whose
p@24	105 # probability is maximal
p@24	106 self.y_pred = T.argmax(self.p_y_given_x, axis=1)
p@24	107 # end-snippet-1
p@24	108
p@24	109 # parameters of the model
p@24	110 self.params = [self.W, self.b]
p@24	111
p@24	112 # keep track of model input
p@24	113 self.input = input
p@24	114
p@24	115 def negative_log_likelihood(self, y):
p@24	116 """Return the mean of the negative log-likelihood of the prediction
p@24	117 of this model under a given target distribution.
p@24	118
p@24	119 .. math::
p@24	120
p@24	121 \frac{1}{\|\mathcal{D}\|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
p@24	122 \frac{1}{\|\mathcal{D}\|} \sum_{i=0}^{\|\mathcal{D}\|}
p@24	123 \log(P(Y=y^{(i)}\|x^{(i)}, W,b)) \\
p@24	124 \ell (\theta=\{W,b\}, \mathcal{D})
p@24	125
p@24	126 :type y: theano.tensor.TensorType
p@24	127 :param y: corresponds to a vector that gives for each example the
p@24	128 correct label
p@24	129
p@24	130 Note: we use the mean instead of the sum so that
p@24	131 the learning rate is less dependent on the batch size
p@24	132 """
p@24	133 # start-snippet-2
p@24	134 # y.shape[0] is (symbolically) the number of rows in y, i.e.,
p@24	135 # number of examples (call it n) in the minibatch
p@24	136 # T.arange(y.shape[0]) is a symbolic vector which will contain
p@24	137 # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
p@24	138 # Log-Probabilities (call it LP) with one row per example and
p@24	139 # one column per class LP[T.arange(y.shape[0]),y] is a vector
p@24	140 # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
p@24	141 # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
p@24	142 # the mean (across minibatch examples) of the elements in v,
p@24	143 # i.e., the mean log-likelihood across the minibatch.
p@24	144 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
p@24	145 # end-snippet-2
p@24	146
p@24	147 def errors(self, y):
p@24	148 """Return a float representing the number of errors in the minibatch
p@24	149 over the total number of examples of the minibatch ; zero one
p@24	150 loss over the size of the minibatch
p@24	151
p@24	152 :type y: theano.tensor.TensorType
p@24	153 :param y: corresponds to a vector that gives for each example the
p@24	154 correct label
p@24	155 """
p@24	156
p@24	157 # check if y has same dimension of y_pred
p@24	158 if y.ndim != self.y_pred.ndim:
p@24	159 raise TypeError(
p@24	160 'y should have the same shape as self.y_pred',
p@24	161 ('y', y.type, 'y_pred', self.y_pred.type)
p@24	162 )
p@24	163 # check if y is of the correct datatype
p@24	164 if y.dtype.startswith('int'):
p@24	165 # the T.neq operator returns a vector of 0s and 1s, where 1
p@24	166 # represents a mistake in prediction
p@24	167 return T.mean(T.neq(self.y_pred, y))
p@24	168 else:
p@24	169 raise NotImplementedError()
p@24	170
p@24	171
p@24	172 def load_data(dataset):
p@24	173 ''' Loads the dataset
p@24	174
p@24	175 :type dataset: string
p@24	176 :param dataset: the path to the dataset (here MNIST)
p@24	177 '''
p@24	178 #############
p@24	179 # LOAD DATA #
p@24	180 #############
p@24	181 '''
p@24	182 # Download the MNIST dataset if it is not present
p@24	183 data_dir, data_file = os.path.split(dataset)
p@24	184 if data_dir == "" and not os.path.isfile(dataset):
p@24	185 # Check if dataset is in the data directory.
p@24	186 new_path = os.path.join(
p@24	187 os.path.split(__file__)[0],
p@24	188 "..",
p@24	189 "data",
p@24	190 dataset
p@24	191 )
p@24	192 if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
p@24	193 dataset = new_path
p@24	194
p@24	195 if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
p@24	196 import urllib
p@24	197 origin = (
p@24	198 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
p@24	199 )
p@24	200 print 'Downloading data from %s' % origin
p@24	201 urllib.urlretrieve(origin, dataset)
p@24	202
p@24	203 print '... loading data'
p@24	204
p@24	205 # Load the dataset
p@24	206 f = gzip.open(dataset, 'rb')
p@24	207 '''
p@24	208 f = file(dataset, 'rb')
p@24	209 train_set, valid_set, test_set = cPickle.load(f)
p@24	210 f.close()
p@24	211 #train_set, valid_set, test_set format: tuple(input, target)
p@24	212 #input is an numpy.ndarray of 2 dimensions (a matrix)
p@24	213 #witch row's correspond to an example. target is a
p@24	214 #numpy.ndarray of 1 dimensions (vector)) that have the same length as
p@24	215 #the number of rows in the input. It should give the target
p@24	216 #target to the example with the same index in the input.
p@24	217
p@24	218 def shared_dataset(data_xy, borrow=True):
p@24	219 """ Function that loads the dataset into shared variables
p@24	220
p@24	221 The reason we store our dataset in shared variables is to allow
p@24	222 Theano to copy it into the GPU memory (when code is run on GPU).
p@24	223 Since copying data into the GPU is slow, copying a minibatch everytime
p@24	224 is needed (the default behaviour if the data is not in a shared
p@24	225 variable) would lead to a large decrease in performance.
p@24	226 """
p@24	227 data_x, data_y = data_xy
p@24	228 shared_x = theano.shared(numpy.asarray(data_x,
p@24	229 dtype=theano.config.floatX),
p@24	230 borrow=borrow)
p@24	231 shared_y = theano.shared(numpy.asarray(data_y,
p@24	232 dtype=theano.config.floatX),
p@24	233 borrow=borrow)
p@24	234 # When storing data on the GPU it has to be stored as floats
p@24	235 # therefore we will store the labels as ``floatX`` as well
p@24	236 # (``shared_y`` does exactly that). But during our computations
p@24	237 # we need them as ints (we use labels as index, and if they are
p@24	238 # floats it doesn't make sense) therefore instead of returning
p@24	239 # ``shared_y`` we will have to cast it to int. This little hack
p@24	240 # lets ous get around this issue
p@24	241 return shared_x, T.cast(shared_y, 'int32')
p@24	242
p@24	243 test_set_x, test_set_y = shared_dataset(test_set)
p@24	244 valid_set_x, valid_set_y = shared_dataset(valid_set)
p@24	245 train_set_x, train_set_y = shared_dataset(train_set)
p@24	246
p@24	247 rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
p@24	248 (test_set_x, test_set_y)]
p@24	249 return rval
p@24	250
p@24	251
p@24	252 def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
p@24	253 dataset='mnist.pkl.gz',
p@24	254 batch_size=600):
p@24	255 """
p@24	256 Demonstrate stochastic gradient descent optimization of a log-linear
p@24	257 model
p@24	258
p@24	259 This is demonstrated on MNIST.
p@24	260
p@24	261 :type learning_rate: float
p@24	262 :param learning_rate: learning rate used (factor for the stochastic
p@24	263 gradient)
p@24	264
p@24	265 :type n_epochs: int
p@24	266 :param n_epochs: maximal number of epochs to run the optimizer
p@24	267
p@24	268 :type dataset: string
p@24	269 :param dataset: the path of the MNIST dataset file from
p@24	270 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
p@24	271
p@24	272 """
p@24	273 datasets = load_data(dataset)
p@24	274
p@24	275 train_set_x, train_set_y = datasets[0]
p@24	276 valid_set_x, valid_set_y = datasets[1]
p@24	277 test_set_x, test_set_y = datasets[2]
p@24	278
p@24	279 # compute number of minibatches for training, validation and testing
p@24	280 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	281 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	282 n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
p@24	283
p@24	284 ######################
p@24	285 # BUILD ACTUAL MODEL #
p@24	286 ######################
p@24	287 print '... building the model'
p@24	288
p@24	289 # allocate symbolic variables for the data
p@24	290 index = T.lscalar() # index to a [mini]batch
p@24	291
p@24	292 # generate symbolic variables for input (x and y represent a
p@24	293 # minibatch)
p@24	294 x = T.matrix('x') # data, presented as rasterized images
p@24	295 y = T.ivector('y') # labels, presented as 1D vector of [int] labels
p@24	296
p@24	297 # construct the logistic regression class
p@24	298 # Each MNIST image has size 28*28
p@24	299 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
p@24	300
p@24	301 # the cost we minimize during training is the negative log likelihood of
p@24	302 # the model in symbolic format
p@24	303 cost = classifier.negative_log_likelihood(y)
p@24	304
p@24	305 # compiling a Theano function that computes the mistakes that are made by
p@24	306 # the model on a minibatch
p@24	307 test_model = theano.function(
p@24	308 inputs=[index],
p@24	309 outputs=classifier.errors(y),
p@24	310 givens={
p@24	311 x: test_set_x[index * batch_size: (index + 1) * batch_size],
p@24	312 y: test_set_y[index * batch_size: (index + 1) * batch_size]
p@24	313 }
p@24	314 )
p@24	315
p@24	316 validate_model = theano.function(
p@24	317 inputs=[index],
p@24	318 outputs=classifier.errors(y),
p@24	319 givens={
p@24	320 x: valid_set_x[index * batch_size: (index + 1) * batch_size],
p@24	321 y: valid_set_y[index * batch_size: (index + 1) * batch_size]
p@24	322 }
p@24	323 )
p@24	324
p@24	325 # compute the gradient of cost with respect to theta = (W,b)
p@24	326 g_W = T.grad(cost=cost, wrt=classifier.W)
p@24	327 g_b = T.grad(cost=cost, wrt=classifier.b)
p@24	328
p@24	329 # start-snippet-3
p@24	330 # specify how to update the parameters of the model as a list of
p@24	331 # (variable, update expression) pairs.
p@24	332 updates = [(classifier.W, classifier.W - learning_rate * g_W),
p@24	333 (classifier.b, classifier.b - learning_rate * g_b)]
p@24	334
p@24	335 # compiling a Theano function `train_model` that returns the cost, but in
p@24	336 # the same time updates the parameter of the model based on the rules
p@24	337 # defined in `updates`
p@24	338 train_model = theano.function(
p@24	339 inputs=[index],
p@24	340 outputs=cost,
p@24	341 updates=updates,
p@24	342 givens={
p@24	343 x: train_set_x[index * batch_size: (index + 1) * batch_size],
p@24	344 y: train_set_y[index * batch_size: (index + 1) * batch_size]
p@24	345 }
p@24	346 )
p@24	347 # end-snippet-3
p@24	348
p@24	349 ###############
p@24	350 # TRAIN MODEL #
p@24	351 ###############
p@24	352 print '... training the model'
p@24	353 # early-stopping parameters
p@24	354 patience = 5000 # look as this many examples regardless
p@24	355 patience_increase = 2 # wait this much longer when a new best is
p@24	356 # found
p@24	357 improvement_threshold = 0.995 # a relative improvement of this much is
p@24	358 # considered significant
p@24	359 validation_frequency = min(n_train_batches, patience / 2)
p@24	360 # go through this many
p@24	361 # minibatche before checking the network
p@24	362 # on the validation set; in this case we
p@24	363 # check every epoch
p@24	364
p@24	365 best_validation_loss = numpy.inf
p@24	366 test_score = 0.
p@24	367 start_time = timeit.default_timer()
p@24	368
p@24	369 done_looping = False
p@24	370 epoch = 0
p@24	371 while (epoch < n_epochs) and (not done_looping):
p@24	372 epoch = epoch + 1
p@24	373 for minibatch_index in xrange(n_train_batches):
p@24	374
p@24	375 minibatch_avg_cost = train_model(minibatch_index)
p@24	376 # iteration number
p@24	377 iter = (epoch - 1) * n_train_batches + minibatch_index
p@24	378
p@24	379 if (iter + 1) % validation_frequency == 0:
p@24	380 # compute zero-one loss on validation set
p@24	381 validation_losses = [validate_model(i)
p@24	382 for i in xrange(n_valid_batches)]
p@24	383 this_validation_loss = numpy.mean(validation_losses)
p@24	384
p@24	385 print(
p@24	386 'epoch %i, minibatch %i/%i, validation error %f %%' %
p@24	387 (
p@24	388 epoch,
p@24	389 minibatch_index + 1,
p@24	390 n_train_batches,
p@24	391 this_validation_loss * 100.
p@24	392 )
p@24	393 )
p@24	394
p@24	395 # if we got the best validation score until now
p@24	396 if this_validation_loss < best_validation_loss:
p@24	397 #improve patience if loss improvement is good enough
p@24	398 if this_validation_loss < best_validation_loss * \
p@24	399 improvement_threshold:
p@24	400 patience = max(patience, iter * patience_increase)
p@24	401
p@24	402 best_validation_loss = this_validation_loss
p@24	403 # test it on the test set
p@24	404
p@24	405 test_losses = [test_model(i)
p@24	406 for i in xrange(n_test_batches)]
p@24	407 test_score = numpy.mean(test_losses)
p@24	408
p@24	409 print(
p@24	410 (
p@24	411 ' epoch %i, minibatch %i/%i, test error of'
p@24	412 ' best model %f %%'
p@24	413 ) %
p@24	414 (
p@24	415 epoch,
p@24	416 minibatch_index + 1,
p@24	417 n_train_batches,
p@24	418 test_score * 100.
p@24	419 )
p@24	420 )
p@24	421
p@24	422 # save the best model
p@24	423 with open('best_model.pkl', 'w') as f:
p@24	424 cPickle.dump(classifier, f)
p@24	425
p@24	426 if patience <= iter:
p@24	427 done_looping = True
p@24	428 break
p@24	429
p@24	430 end_time = timeit.default_timer()
p@24	431 print(
p@24	432 (
p@24	433 'Optimization complete with best validation score of %f %%,'
p@24	434 'with test performance %f %%'
p@24	435 )
p@24	436 % (best_validation_loss * 100., test_score * 100.)
p@24	437 )
p@24	438 print 'The code run for %d epochs, with %f epochs/sec' % (
p@24	439 epoch, 1. * epoch / (end_time - start_time))
p@24	440 print >> sys.stderr, ('The code for file ' +
p@24	441 os.path.split(__file__)[1] +
p@24	442 ' ran for %.1fs' % ((end_time - start_time)))
p@24	443
p@24	444
p@24	445 def predict():
p@24	446 """
p@24	447 An example of how to load a trained model and use it
p@24	448 to predict labels.
p@24	449 """
p@24	450
p@24	451 # load the saved model
p@24	452 classifier = cPickle.load(open('best_model.pkl'))
p@24	453
p@24	454 # compile a predictor function
p@24	455 predict_model = theano.function(
p@24	456 inputs=[classifier.input],
p@24	457 outputs=classifier.y_pred)
p@24	458
p@24	459 # We can test it on some examples from test test
p@24	460 dataset='mnist.pkl.gz'
p@24	461 datasets = load_data(dataset)
p@24	462 test_set_x, test_set_y = datasets[2]
p@24	463 test_set_x = test_set_x.get_value()
p@24	464
p@24	465 predicted_values = predict_model(test_set_x[:10])
p@24	466 print ("Predicted values for the first 10 examples in test set:")
p@24	467 print predicted_values
p@24	468
p@24	469
p@24	470 if __name__ == '__main__':
p@24	471 sgd_optimization_mnist()
p@24	472

Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information

annotate Code/genre_classification/learning/logistic_sgd.py @ 47:b0186d4a4496 tip