wolffd@0
|
1 function net = mlp(nin, nhidden, nout, outfunc, prior, beta)
|
wolffd@0
|
2 %MLP Create a 2-layer feedforward network.
|
wolffd@0
|
3 %
|
wolffd@0
|
4 % Description
|
wolffd@0
|
5 % NET = MLP(NIN, NHIDDEN, NOUT, FUNC) takes the number of inputs,
|
wolffd@0
|
6 % hidden units and output units for a 2-layer feed-forward network,
|
wolffd@0
|
7 % together with a string FUNC which specifies the output unit
|
wolffd@0
|
8 % activation function, and returns a data structure NET. The weights
|
wolffd@0
|
9 % are drawn from a zero mean, unit variance isotropic Gaussian, with
|
wolffd@0
|
10 % varianced scaled by the fan-in of the hidden or output units as
|
wolffd@0
|
11 % appropriate. This makes use of the Matlab function RANDN and so the
|
wolffd@0
|
12 % seed for the random weight initialization can be set using
|
wolffd@0
|
13 % RANDN('STATE', S) where S is the seed value. The hidden units use
|
wolffd@0
|
14 % the TANH activation function.
|
wolffd@0
|
15 %
|
wolffd@0
|
16 % The fields in NET are
|
wolffd@0
|
17 % type = 'mlp'
|
wolffd@0
|
18 % nin = number of inputs
|
wolffd@0
|
19 % nhidden = number of hidden units
|
wolffd@0
|
20 % nout = number of outputs
|
wolffd@0
|
21 % nwts = total number of weights and biases
|
wolffd@0
|
22 % actfn = string describing the output unit activation function:
|
wolffd@0
|
23 % 'linear'
|
wolffd@0
|
24 % 'logistic
|
wolffd@0
|
25 % 'softmax'
|
wolffd@0
|
26 % w1 = first-layer weight matrix
|
wolffd@0
|
27 % b1 = first-layer bias vector
|
wolffd@0
|
28 % w2 = second-layer weight matrix
|
wolffd@0
|
29 % b2 = second-layer bias vector
|
wolffd@0
|
30 % Here W1 has dimensions NIN times NHIDDEN, B1 has dimensions 1 times
|
wolffd@0
|
31 % NHIDDEN, W2 has dimensions NHIDDEN times NOUT, and B2 has dimensions
|
wolffd@0
|
32 % 1 times NOUT.
|
wolffd@0
|
33 %
|
wolffd@0
|
34 % NET = MLP(NIN, NHIDDEN, NOUT, FUNC, PRIOR), in which PRIOR is a
|
wolffd@0
|
35 % scalar, allows the field NET.ALPHA in the data structure NET to be
|
wolffd@0
|
36 % set, corresponding to a zero-mean isotropic Gaussian prior with
|
wolffd@0
|
37 % inverse variance with value PRIOR. Alternatively, PRIOR can consist
|
wolffd@0
|
38 % of a data structure with fields ALPHA and INDEX, allowing individual
|
wolffd@0
|
39 % Gaussian priors to be set over groups of weights in the network. Here
|
wolffd@0
|
40 % ALPHA is a column vector in which each element corresponds to a
|
wolffd@0
|
41 % separate group of weights, which need not be mutually exclusive. The
|
wolffd@0
|
42 % membership of the groups is defined by the matrix INDX in which the
|
wolffd@0
|
43 % columns correspond to the elements of ALPHA. Each column has one
|
wolffd@0
|
44 % element for each weight in the matrix, in the order defined by the
|
wolffd@0
|
45 % function MLPPAK, and each element is 1 or 0 according to whether the
|
wolffd@0
|
46 % weight is a member of the corresponding group or not. A utility
|
wolffd@0
|
47 % function MLPPRIOR is provided to help in setting up the PRIOR data
|
wolffd@0
|
48 % structure.
|
wolffd@0
|
49 %
|
wolffd@0
|
50 % NET = MLP(NIN, NHIDDEN, NOUT, FUNC, PRIOR, BETA) also sets the
|
wolffd@0
|
51 % additional field NET.BETA in the data structure NET, where beta
|
wolffd@0
|
52 % corresponds to the inverse noise variance.
|
wolffd@0
|
53 %
|
wolffd@0
|
54 % See also
|
wolffd@0
|
55 % MLPPRIOR, MLPPAK, MLPUNPAK, MLPFWD, MLPERR, MLPBKP, MLPGRAD
|
wolffd@0
|
56 %
|
wolffd@0
|
57
|
wolffd@0
|
58 % Copyright (c) Ian T Nabney (1996-2001)
|
wolffd@0
|
59
|
wolffd@0
|
60 net.type = 'mlp';
|
wolffd@0
|
61 net.nin = nin;
|
wolffd@0
|
62 net.nhidden = nhidden;
|
wolffd@0
|
63 net.nout = nout;
|
wolffd@0
|
64 net.nwts = (nin + 1)*nhidden + (nhidden + 1)*nout;
|
wolffd@0
|
65
|
wolffd@0
|
66 outfns = {'linear', 'logistic', 'softmax'};
|
wolffd@0
|
67
|
wolffd@0
|
68 if sum(strcmp(outfunc, outfns)) == 0
|
wolffd@0
|
69 error('Undefined output function. Exiting.');
|
wolffd@0
|
70 else
|
wolffd@0
|
71 net.outfn = outfunc;
|
wolffd@0
|
72 end
|
wolffd@0
|
73
|
wolffd@0
|
74 if nargin > 4
|
wolffd@0
|
75 if isstruct(prior)
|
wolffd@0
|
76 net.alpha = prior.alpha;
|
wolffd@0
|
77 net.index = prior.index;
|
wolffd@0
|
78 elseif size(prior) == [1 1]
|
wolffd@0
|
79 net.alpha = prior;
|
wolffd@0
|
80 else
|
wolffd@0
|
81 error('prior must be a scalar or a structure');
|
wolffd@0
|
82 end
|
wolffd@0
|
83 end
|
wolffd@0
|
84
|
wolffd@0
|
85 net.w1 = randn(nin, nhidden)/sqrt(nin + 1);
|
wolffd@0
|
86 net.b1 = randn(1, nhidden)/sqrt(nin + 1);
|
wolffd@0
|
87 net.w2 = randn(nhidden, nout)/sqrt(nhidden + 1);
|
wolffd@0
|
88 net.b2 = randn(1, nout)/sqrt(nhidden + 1);
|
wolffd@0
|
89
|
wolffd@0
|
90 if nargin == 6
|
wolffd@0
|
91 net.beta = beta;
|
wolffd@0
|
92 end
|