p@24
|
1 """
|
p@24
|
2 This tutorial introduces logistic regression using Theano and stochastic
|
p@24
|
3 gradient descent.
|
p@24
|
4
|
p@24
|
5 Logistic regression is a probabilistic, linear classifier. It is parametrized
|
p@24
|
6 by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
|
p@24
|
7 done by projecting data points onto a set of hyperplanes, the distance to
|
p@24
|
8 which is used to determine a class membership probability.
|
p@24
|
9
|
p@24
|
10 Mathematically, this can be written as:
|
p@24
|
11
|
p@24
|
12 .. math::
|
p@24
|
13 P(Y=i|x, W,b) &= softmax_i(W x + b) \\
|
p@24
|
14 &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
|
p@24
|
15
|
p@24
|
16
|
p@24
|
17 The output of the model or prediction is then done by taking the argmax of
|
p@24
|
18 the vector whose i'th element is P(Y=i|x).
|
p@24
|
19
|
p@24
|
20 .. math::
|
p@24
|
21
|
p@24
|
22 y_{pred} = argmax_i P(Y=i|x,W,b)
|
p@24
|
23
|
p@24
|
24
|
p@24
|
25 This tutorial presents a stochastic gradient descent optimization method
|
p@24
|
26 suitable for large datasets.
|
p@24
|
27
|
p@24
|
28
|
p@24
|
29 References:
|
p@24
|
30
|
p@24
|
31 - textbooks: "Pattern Recognition and Machine Learning" -
|
p@24
|
32 Christopher M. Bishop, section 4.3.2
|
p@24
|
33
|
p@24
|
34 """
|
p@24
|
35 __docformat__ = 'restructedtext en'
|
p@24
|
36
|
p@24
|
37 import cPickle
|
p@24
|
38 import gzip
|
p@24
|
39 import os
|
p@24
|
40 import sys
|
p@24
|
41 import timeit
|
p@24
|
42
|
p@24
|
43 import numpy
|
p@24
|
44
|
p@24
|
45 import theano
|
p@24
|
46 import theano.tensor as T
|
p@24
|
47
|
p@24
|
48
|
p@24
|
49 class LogisticRegression(object):
|
p@24
|
50 """Multi-class Logistic Regression Class
|
p@24
|
51
|
p@24
|
52 The logistic regression is fully described by a weight matrix :math:`W`
|
p@24
|
53 and bias vector :math:`b`. Classification is done by projecting data
|
p@24
|
54 points onto a set of hyperplanes, the distance to which is used to
|
p@24
|
55 determine a class membership probability.
|
p@24
|
56 """
|
p@24
|
57
|
p@24
|
58 def __init__(self, input, n_in, n_out):
|
p@24
|
59 """ Initialize the parameters of the logistic regression
|
p@24
|
60
|
p@24
|
61 :type input: theano.tensor.TensorType
|
p@24
|
62 :param input: symbolic variable that describes the input of the
|
p@24
|
63 architecture (one minibatch)
|
p@24
|
64
|
p@24
|
65 :type n_in: int
|
p@24
|
66 :param n_in: number of input units, the dimension of the space in
|
p@24
|
67 which the datapoints lie
|
p@24
|
68
|
p@24
|
69 :type n_out: int
|
p@24
|
70 :param n_out: number of output units, the dimension of the space in
|
p@24
|
71 which the labels lie
|
p@24
|
72
|
p@24
|
73 """
|
p@24
|
74 # start-snippet-1
|
p@24
|
75 # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
|
p@24
|
76 self.W = theano.shared(
|
p@24
|
77 value=numpy.zeros(
|
p@24
|
78 (n_in, n_out),
|
p@24
|
79 dtype=theano.config.floatX
|
p@24
|
80 ),
|
p@24
|
81 name='W',
|
p@24
|
82 borrow=True
|
p@24
|
83 )
|
p@24
|
84 # initialize the baises b as a vector of n_out 0s
|
p@24
|
85 self.b = theano.shared(
|
p@24
|
86 value=numpy.zeros(
|
p@24
|
87 (n_out,),
|
p@24
|
88 dtype=theano.config.floatX
|
p@24
|
89 ),
|
p@24
|
90 name='b',
|
p@24
|
91 borrow=True
|
p@24
|
92 )
|
p@24
|
93
|
p@24
|
94 # symbolic expression for computing the matrix of class-membership
|
p@24
|
95 # probabilities
|
p@24
|
96 # Where:
|
p@24
|
97 # W is a matrix where column-k represent the separation hyperplane for
|
p@24
|
98 # class-k
|
p@24
|
99 # x is a matrix where row-j represents input training sample-j
|
p@24
|
100 # b is a vector where element-k represent the free parameter of
|
p@24
|
101 # hyperplane-k
|
p@24
|
102 self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
|
p@24
|
103
|
p@24
|
104 # symbolic description of how to compute prediction as class whose
|
p@24
|
105 # probability is maximal
|
p@24
|
106 self.y_pred = T.argmax(self.p_y_given_x, axis=1)
|
p@24
|
107 # end-snippet-1
|
p@24
|
108
|
p@24
|
109 # parameters of the model
|
p@24
|
110 self.params = [self.W, self.b]
|
p@24
|
111
|
p@24
|
112 # keep track of model input
|
p@24
|
113 self.input = input
|
p@24
|
114
|
p@24
|
115 def negative_log_likelihood(self, y):
|
p@24
|
116 """Return the mean of the negative log-likelihood of the prediction
|
p@24
|
117 of this model under a given target distribution.
|
p@24
|
118
|
p@24
|
119 .. math::
|
p@24
|
120
|
p@24
|
121 \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
|
p@24
|
122 \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
|
p@24
|
123 \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
|
p@24
|
124 \ell (\theta=\{W,b\}, \mathcal{D})
|
p@24
|
125
|
p@24
|
126 :type y: theano.tensor.TensorType
|
p@24
|
127 :param y: corresponds to a vector that gives for each example the
|
p@24
|
128 correct label
|
p@24
|
129
|
p@24
|
130 Note: we use the mean instead of the sum so that
|
p@24
|
131 the learning rate is less dependent on the batch size
|
p@24
|
132 """
|
p@24
|
133 # start-snippet-2
|
p@24
|
134 # y.shape[0] is (symbolically) the number of rows in y, i.e.,
|
p@24
|
135 # number of examples (call it n) in the minibatch
|
p@24
|
136 # T.arange(y.shape[0]) is a symbolic vector which will contain
|
p@24
|
137 # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
|
p@24
|
138 # Log-Probabilities (call it LP) with one row per example and
|
p@24
|
139 # one column per class LP[T.arange(y.shape[0]),y] is a vector
|
p@24
|
140 # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
|
p@24
|
141 # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
|
p@24
|
142 # the mean (across minibatch examples) of the elements in v,
|
p@24
|
143 # i.e., the mean log-likelihood across the minibatch.
|
p@24
|
144 return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
|
p@24
|
145 # end-snippet-2
|
p@24
|
146
|
p@24
|
147 def errors(self, y):
|
p@24
|
148 """Return a float representing the number of errors in the minibatch
|
p@24
|
149 over the total number of examples of the minibatch ; zero one
|
p@24
|
150 loss over the size of the minibatch
|
p@24
|
151
|
p@24
|
152 :type y: theano.tensor.TensorType
|
p@24
|
153 :param y: corresponds to a vector that gives for each example the
|
p@24
|
154 correct label
|
p@24
|
155 """
|
p@24
|
156
|
p@24
|
157 # check if y has same dimension of y_pred
|
p@24
|
158 if y.ndim != self.y_pred.ndim:
|
p@24
|
159 raise TypeError(
|
p@24
|
160 'y should have the same shape as self.y_pred',
|
p@24
|
161 ('y', y.type, 'y_pred', self.y_pred.type)
|
p@24
|
162 )
|
p@24
|
163 # check if y is of the correct datatype
|
p@24
|
164 if y.dtype.startswith('int'):
|
p@24
|
165 # the T.neq operator returns a vector of 0s and 1s, where 1
|
p@24
|
166 # represents a mistake in prediction
|
p@24
|
167 return T.mean(T.neq(self.y_pred, y))
|
p@24
|
168 else:
|
p@24
|
169 raise NotImplementedError()
|
p@24
|
170
|
p@24
|
171
|
p@24
|
172 def load_data(dataset):
|
p@24
|
173 ''' Loads the dataset
|
p@24
|
174
|
p@24
|
175 :type dataset: string
|
p@24
|
176 :param dataset: the path to the dataset (here MNIST)
|
p@24
|
177 '''
|
p@24
|
178 #############
|
p@24
|
179 # LOAD DATA #
|
p@24
|
180 #############
|
p@24
|
181 '''
|
p@24
|
182 # Download the MNIST dataset if it is not present
|
p@24
|
183 data_dir, data_file = os.path.split(dataset)
|
p@24
|
184 if data_dir == "" and not os.path.isfile(dataset):
|
p@24
|
185 # Check if dataset is in the data directory.
|
p@24
|
186 new_path = os.path.join(
|
p@24
|
187 os.path.split(__file__)[0],
|
p@24
|
188 "..",
|
p@24
|
189 "data",
|
p@24
|
190 dataset
|
p@24
|
191 )
|
p@24
|
192 if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
|
p@24
|
193 dataset = new_path
|
p@24
|
194
|
p@24
|
195 if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
|
p@24
|
196 import urllib
|
p@24
|
197 origin = (
|
p@24
|
198 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
|
p@24
|
199 )
|
p@24
|
200 print 'Downloading data from %s' % origin
|
p@24
|
201 urllib.urlretrieve(origin, dataset)
|
p@24
|
202
|
p@24
|
203 print '... loading data'
|
p@24
|
204
|
p@24
|
205 # Load the dataset
|
p@24
|
206 f = gzip.open(dataset, 'rb')
|
p@24
|
207 '''
|
p@24
|
208 f = file(dataset, 'rb')
|
p@24
|
209 train_set, valid_set, test_set = cPickle.load(f)
|
p@24
|
210 f.close()
|
p@24
|
211 #train_set, valid_set, test_set format: tuple(input, target)
|
p@24
|
212 #input is an numpy.ndarray of 2 dimensions (a matrix)
|
p@24
|
213 #witch row's correspond to an example. target is a
|
p@24
|
214 #numpy.ndarray of 1 dimensions (vector)) that have the same length as
|
p@24
|
215 #the number of rows in the input. It should give the target
|
p@24
|
216 #target to the example with the same index in the input.
|
p@24
|
217
|
p@24
|
218 def shared_dataset(data_xy, borrow=True):
|
p@24
|
219 """ Function that loads the dataset into shared variables
|
p@24
|
220
|
p@24
|
221 The reason we store our dataset in shared variables is to allow
|
p@24
|
222 Theano to copy it into the GPU memory (when code is run on GPU).
|
p@24
|
223 Since copying data into the GPU is slow, copying a minibatch everytime
|
p@24
|
224 is needed (the default behaviour if the data is not in a shared
|
p@24
|
225 variable) would lead to a large decrease in performance.
|
p@24
|
226 """
|
p@24
|
227 data_x, data_y = data_xy
|
p@24
|
228 shared_x = theano.shared(numpy.asarray(data_x,
|
p@24
|
229 dtype=theano.config.floatX),
|
p@24
|
230 borrow=borrow)
|
p@24
|
231 shared_y = theano.shared(numpy.asarray(data_y,
|
p@24
|
232 dtype=theano.config.floatX),
|
p@24
|
233 borrow=borrow)
|
p@24
|
234 # When storing data on the GPU it has to be stored as floats
|
p@24
|
235 # therefore we will store the labels as ``floatX`` as well
|
p@24
|
236 # (``shared_y`` does exactly that). But during our computations
|
p@24
|
237 # we need them as ints (we use labels as index, and if they are
|
p@24
|
238 # floats it doesn't make sense) therefore instead of returning
|
p@24
|
239 # ``shared_y`` we will have to cast it to int. This little hack
|
p@24
|
240 # lets ous get around this issue
|
p@24
|
241 return shared_x, T.cast(shared_y, 'int32')
|
p@24
|
242
|
p@24
|
243 test_set_x, test_set_y = shared_dataset(test_set)
|
p@24
|
244 valid_set_x, valid_set_y = shared_dataset(valid_set)
|
p@24
|
245 train_set_x, train_set_y = shared_dataset(train_set)
|
p@24
|
246
|
p@24
|
247 rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
|
p@24
|
248 (test_set_x, test_set_y)]
|
p@24
|
249 return rval
|
p@24
|
250
|
p@24
|
251
|
p@24
|
252 def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
|
p@24
|
253 dataset='mnist.pkl.gz',
|
p@24
|
254 batch_size=600):
|
p@24
|
255 """
|
p@24
|
256 Demonstrate stochastic gradient descent optimization of a log-linear
|
p@24
|
257 model
|
p@24
|
258
|
p@24
|
259 This is demonstrated on MNIST.
|
p@24
|
260
|
p@24
|
261 :type learning_rate: float
|
p@24
|
262 :param learning_rate: learning rate used (factor for the stochastic
|
p@24
|
263 gradient)
|
p@24
|
264
|
p@24
|
265 :type n_epochs: int
|
p@24
|
266 :param n_epochs: maximal number of epochs to run the optimizer
|
p@24
|
267
|
p@24
|
268 :type dataset: string
|
p@24
|
269 :param dataset: the path of the MNIST dataset file from
|
p@24
|
270 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
|
p@24
|
271
|
p@24
|
272 """
|
p@24
|
273 datasets = load_data(dataset)
|
p@24
|
274
|
p@24
|
275 train_set_x, train_set_y = datasets[0]
|
p@24
|
276 valid_set_x, valid_set_y = datasets[1]
|
p@24
|
277 test_set_x, test_set_y = datasets[2]
|
p@24
|
278
|
p@24
|
279 # compute number of minibatches for training, validation and testing
|
p@24
|
280 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
|
p@24
|
281 n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
|
p@24
|
282 n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
|
p@24
|
283
|
p@24
|
284 ######################
|
p@24
|
285 # BUILD ACTUAL MODEL #
|
p@24
|
286 ######################
|
p@24
|
287 print '... building the model'
|
p@24
|
288
|
p@24
|
289 # allocate symbolic variables for the data
|
p@24
|
290 index = T.lscalar() # index to a [mini]batch
|
p@24
|
291
|
p@24
|
292 # generate symbolic variables for input (x and y represent a
|
p@24
|
293 # minibatch)
|
p@24
|
294 x = T.matrix('x') # data, presented as rasterized images
|
p@24
|
295 y = T.ivector('y') # labels, presented as 1D vector of [int] labels
|
p@24
|
296
|
p@24
|
297 # construct the logistic regression class
|
p@24
|
298 # Each MNIST image has size 28*28
|
p@24
|
299 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
|
p@24
|
300
|
p@24
|
301 # the cost we minimize during training is the negative log likelihood of
|
p@24
|
302 # the model in symbolic format
|
p@24
|
303 cost = classifier.negative_log_likelihood(y)
|
p@24
|
304
|
p@24
|
305 # compiling a Theano function that computes the mistakes that are made by
|
p@24
|
306 # the model on a minibatch
|
p@24
|
307 test_model = theano.function(
|
p@24
|
308 inputs=[index],
|
p@24
|
309 outputs=classifier.errors(y),
|
p@24
|
310 givens={
|
p@24
|
311 x: test_set_x[index * batch_size: (index + 1) * batch_size],
|
p@24
|
312 y: test_set_y[index * batch_size: (index + 1) * batch_size]
|
p@24
|
313 }
|
p@24
|
314 )
|
p@24
|
315
|
p@24
|
316 validate_model = theano.function(
|
p@24
|
317 inputs=[index],
|
p@24
|
318 outputs=classifier.errors(y),
|
p@24
|
319 givens={
|
p@24
|
320 x: valid_set_x[index * batch_size: (index + 1) * batch_size],
|
p@24
|
321 y: valid_set_y[index * batch_size: (index + 1) * batch_size]
|
p@24
|
322 }
|
p@24
|
323 )
|
p@24
|
324
|
p@24
|
325 # compute the gradient of cost with respect to theta = (W,b)
|
p@24
|
326 g_W = T.grad(cost=cost, wrt=classifier.W)
|
p@24
|
327 g_b = T.grad(cost=cost, wrt=classifier.b)
|
p@24
|
328
|
p@24
|
329 # start-snippet-3
|
p@24
|
330 # specify how to update the parameters of the model as a list of
|
p@24
|
331 # (variable, update expression) pairs.
|
p@24
|
332 updates = [(classifier.W, classifier.W - learning_rate * g_W),
|
p@24
|
333 (classifier.b, classifier.b - learning_rate * g_b)]
|
p@24
|
334
|
p@24
|
335 # compiling a Theano function `train_model` that returns the cost, but in
|
p@24
|
336 # the same time updates the parameter of the model based on the rules
|
p@24
|
337 # defined in `updates`
|
p@24
|
338 train_model = theano.function(
|
p@24
|
339 inputs=[index],
|
p@24
|
340 outputs=cost,
|
p@24
|
341 updates=updates,
|
p@24
|
342 givens={
|
p@24
|
343 x: train_set_x[index * batch_size: (index + 1) * batch_size],
|
p@24
|
344 y: train_set_y[index * batch_size: (index + 1) * batch_size]
|
p@24
|
345 }
|
p@24
|
346 )
|
p@24
|
347 # end-snippet-3
|
p@24
|
348
|
p@24
|
349 ###############
|
p@24
|
350 # TRAIN MODEL #
|
p@24
|
351 ###############
|
p@24
|
352 print '... training the model'
|
p@24
|
353 # early-stopping parameters
|
p@24
|
354 patience = 5000 # look as this many examples regardless
|
p@24
|
355 patience_increase = 2 # wait this much longer when a new best is
|
p@24
|
356 # found
|
p@24
|
357 improvement_threshold = 0.995 # a relative improvement of this much is
|
p@24
|
358 # considered significant
|
p@24
|
359 validation_frequency = min(n_train_batches, patience / 2)
|
p@24
|
360 # go through this many
|
p@24
|
361 # minibatche before checking the network
|
p@24
|
362 # on the validation set; in this case we
|
p@24
|
363 # check every epoch
|
p@24
|
364
|
p@24
|
365 best_validation_loss = numpy.inf
|
p@24
|
366 test_score = 0.
|
p@24
|
367 start_time = timeit.default_timer()
|
p@24
|
368
|
p@24
|
369 done_looping = False
|
p@24
|
370 epoch = 0
|
p@24
|
371 while (epoch < n_epochs) and (not done_looping):
|
p@24
|
372 epoch = epoch + 1
|
p@24
|
373 for minibatch_index in xrange(n_train_batches):
|
p@24
|
374
|
p@24
|
375 minibatch_avg_cost = train_model(minibatch_index)
|
p@24
|
376 # iteration number
|
p@24
|
377 iter = (epoch - 1) * n_train_batches + minibatch_index
|
p@24
|
378
|
p@24
|
379 if (iter + 1) % validation_frequency == 0:
|
p@24
|
380 # compute zero-one loss on validation set
|
p@24
|
381 validation_losses = [validate_model(i)
|
p@24
|
382 for i in xrange(n_valid_batches)]
|
p@24
|
383 this_validation_loss = numpy.mean(validation_losses)
|
p@24
|
384
|
p@24
|
385 print(
|
p@24
|
386 'epoch %i, minibatch %i/%i, validation error %f %%' %
|
p@24
|
387 (
|
p@24
|
388 epoch,
|
p@24
|
389 minibatch_index + 1,
|
p@24
|
390 n_train_batches,
|
p@24
|
391 this_validation_loss * 100.
|
p@24
|
392 )
|
p@24
|
393 )
|
p@24
|
394
|
p@24
|
395 # if we got the best validation score until now
|
p@24
|
396 if this_validation_loss < best_validation_loss:
|
p@24
|
397 #improve patience if loss improvement is good enough
|
p@24
|
398 if this_validation_loss < best_validation_loss * \
|
p@24
|
399 improvement_threshold:
|
p@24
|
400 patience = max(patience, iter * patience_increase)
|
p@24
|
401
|
p@24
|
402 best_validation_loss = this_validation_loss
|
p@24
|
403 # test it on the test set
|
p@24
|
404
|
p@24
|
405 test_losses = [test_model(i)
|
p@24
|
406 for i in xrange(n_test_batches)]
|
p@24
|
407 test_score = numpy.mean(test_losses)
|
p@24
|
408
|
p@24
|
409 print(
|
p@24
|
410 (
|
p@24
|
411 ' epoch %i, minibatch %i/%i, test error of'
|
p@24
|
412 ' best model %f %%'
|
p@24
|
413 ) %
|
p@24
|
414 (
|
p@24
|
415 epoch,
|
p@24
|
416 minibatch_index + 1,
|
p@24
|
417 n_train_batches,
|
p@24
|
418 test_score * 100.
|
p@24
|
419 )
|
p@24
|
420 )
|
p@24
|
421
|
p@24
|
422 # save the best model
|
p@24
|
423 with open('best_model.pkl', 'w') as f:
|
p@24
|
424 cPickle.dump(classifier, f)
|
p@24
|
425
|
p@24
|
426 if patience <= iter:
|
p@24
|
427 done_looping = True
|
p@24
|
428 break
|
p@24
|
429
|
p@24
|
430 end_time = timeit.default_timer()
|
p@24
|
431 print(
|
p@24
|
432 (
|
p@24
|
433 'Optimization complete with best validation score of %f %%,'
|
p@24
|
434 'with test performance %f %%'
|
p@24
|
435 )
|
p@24
|
436 % (best_validation_loss * 100., test_score * 100.)
|
p@24
|
437 )
|
p@24
|
438 print 'The code run for %d epochs, with %f epochs/sec' % (
|
p@24
|
439 epoch, 1. * epoch / (end_time - start_time))
|
p@24
|
440 print >> sys.stderr, ('The code for file ' +
|
p@24
|
441 os.path.split(__file__)[1] +
|
p@24
|
442 ' ran for %.1fs' % ((end_time - start_time)))
|
p@24
|
443
|
p@24
|
444
|
p@24
|
445 def predict():
|
p@24
|
446 """
|
p@24
|
447 An example of how to load a trained model and use it
|
p@24
|
448 to predict labels.
|
p@24
|
449 """
|
p@24
|
450
|
p@24
|
451 # load the saved model
|
p@24
|
452 classifier = cPickle.load(open('best_model.pkl'))
|
p@24
|
453
|
p@24
|
454 # compile a predictor function
|
p@24
|
455 predict_model = theano.function(
|
p@24
|
456 inputs=[classifier.input],
|
p@24
|
457 outputs=classifier.y_pred)
|
p@24
|
458
|
p@24
|
459 # We can test it on some examples from test test
|
p@24
|
460 dataset='mnist.pkl.gz'
|
p@24
|
461 datasets = load_data(dataset)
|
p@24
|
462 test_set_x, test_set_y = datasets[2]
|
p@24
|
463 test_set_x = test_set_x.get_value()
|
p@24
|
464
|
p@24
|
465 predicted_values = predict_model(test_set_x[:10])
|
p@24
|
466 print ("Predicted values for the first 10 examples in test set:")
|
p@24
|
467 print predicted_values
|
p@24
|
468
|
p@24
|
469
|
p@24
|
470 if __name__ == '__main__':
|
p@24
|
471 sgd_optimization_mnist()
|
p@24
|
472
|