wolffd@0
|
1 function [sTo] = som_autolabel(sTo, sFrom, mode, inds)
|
wolffd@0
|
2
|
wolffd@0
|
3 %SOM_AUTOLABEL Automatical labeling, or clearing of labels.
|
wolffd@0
|
4 %
|
wolffd@0
|
5 % sTo = som_autolabel(sTo, sFrom, [mode], [inds])
|
wolffd@0
|
6 %
|
wolffd@0
|
7 % sM = som_autolabel(sM,sD);
|
wolffd@0
|
8 % sD = som_autolabel(sD,sM);
|
wolffd@0
|
9 % sM = som_autolabel(sM,sD,'vote',[5]);
|
wolffd@0
|
10 %
|
wolffd@0
|
11 % Input and output arguments ([]'s are optional):
|
wolffd@0
|
12 % sTo (struct) data or map struct to which the labels are put,
|
wolffd@0
|
13 % the modified struct is returned
|
wolffd@0
|
14 % sFrom (struct) data or map struct from which the labels are taken
|
wolffd@0
|
15 % [mode] (string) labeling algorithm: 'add' (the default), 'freq'
|
wolffd@0
|
16 % or 'vote'
|
wolffd@0
|
17 % [inds] (vector) the column-indexes of the labels that are to be
|
wolffd@0
|
18 % used in the operation (e.g. [2] would mean to use
|
wolffd@0
|
19 % only the second column of labels array in sFrom)
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % The modes:
|
wolffd@0
|
22 % 'add': all labels from sFrom are added to sTo (even multiple
|
wolffd@0
|
23 % copies of same)
|
wolffd@0
|
24 % 'add1': only one instance of each label is kept
|
wolffd@0
|
25 % 'freq': only one instance of each label is kept and '(#)', where
|
wolffd@0
|
26 % # is the frequency of the label, is added to the end of
|
wolffd@0
|
27 % the label. Labels are ordered according to frequency.
|
wolffd@0
|
28 % 'vote': only the label with most instances is kept
|
wolffd@0
|
29 %
|
wolffd@0
|
30 % NOTE: The operations are only performed for the new labels.
|
wolffd@0
|
31 % The old labels in sTo are left as they are.
|
wolffd@0
|
32 % NOTE: all empty labels ('') are ignored.
|
wolffd@0
|
33 %
|
wolffd@0
|
34 % For more help, try 'type som_autolabel' or check out online documentation.
|
wolffd@0
|
35 % See also SOM_LABEL, SOM_BMUS, SOM_SHOW_ADD, SOM_SHOW.
|
wolffd@0
|
36
|
wolffd@0
|
37 %%%%%%%%%%%%% DETAILED DESCRIPTION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
38 %
|
wolffd@0
|
39 % som_autolabel
|
wolffd@0
|
40 %
|
wolffd@0
|
41 % PURPOSE
|
wolffd@0
|
42 %
|
wolffd@0
|
43 % Automatically label to map/data structs based on given data/map.
|
wolffd@0
|
44 %
|
wolffd@0
|
45 % SYNTAX
|
wolffd@0
|
46 %
|
wolffd@0
|
47 % sTo = som_autolabel(sTo, sFrom)
|
wolffd@0
|
48 % sTo = som_autolabel(sTo, sFrom, 'add')
|
wolffd@0
|
49 % sTo = som_autolabel(sTo, sFrom, 'freq')
|
wolffd@0
|
50 % sTo = som_autolabel(sTo, sFrom, 'vote')
|
wolffd@0
|
51 % sTo = som_autolabel(..., inds)
|
wolffd@0
|
52 %
|
wolffd@0
|
53 % DESCRIPTION
|
wolffd@0
|
54 %
|
wolffd@0
|
55 % This function automatically labels given map/data struct based on an
|
wolffd@0
|
56 % already labelled data/map struct. Basically, the BMU of each vector in the
|
wolffd@0
|
57 % sFrom struct is found from among the vectors in sTo, and the vectors in
|
wolffd@0
|
58 % sFrom are added to the corresponding vector in the sTo struct. The actual
|
wolffd@0
|
59 % labels to add are selected based on the mode ('add', 'freq' or 'vote').
|
wolffd@0
|
60 %
|
wolffd@0
|
61 % 'add' : all labels from sFrom are added to sTo - even if there would
|
wolffd@0
|
62 % be multiple instances of the same label
|
wolffd@0
|
63 % 'add1' : only one instance of each label is kept
|
wolffd@0
|
64 % 'freq' : only one instance of each label is kept and '(#)', where
|
wolffd@0
|
65 % # is the frequency of the label, is added to the end of
|
wolffd@0
|
66 % the label. Labels are ordered according to frequency.
|
wolffd@0
|
67 % 'vote' : only the label with most instances is added
|
wolffd@0
|
68 %
|
wolffd@0
|
69 % Note that these operations do not effect the old labels of sTo: they
|
wolffd@0
|
70 % are left as they were.
|
wolffd@0
|
71 %
|
wolffd@0
|
72 % NOTE: empty labels ('') are ignored.
|
wolffd@0
|
73 %
|
wolffd@0
|
74 % REQUIRED INPUT ARGUMENTS
|
wolffd@0
|
75 %
|
wolffd@0
|
76 % sTo (struct) data or map struct to which the labels are put
|
wolffd@0
|
77 % sFrom (struct) data or map struct from which the labels are taken
|
wolffd@0
|
78 %
|
wolffd@0
|
79 % OPTIONAL INPUT ARGUMENTS
|
wolffd@0
|
80 %
|
wolffd@0
|
81 % mode (string) The mode of operation: 'add' (default),
|
wolffd@0
|
82 % 'add1', 'freq' or 'vote'
|
wolffd@0
|
83 % inds (vector) The columns of the '.labels' field in sFrom to be
|
wolffd@0
|
84 % used in operation
|
wolffd@0
|
85 %
|
wolffd@0
|
86 % OUTPUT ARGUMENTS
|
wolffd@0
|
87 %
|
wolffd@0
|
88 % sTo (struct) the given data/map struct with modified labels
|
wolffd@0
|
89 %
|
wolffd@0
|
90 % EXAMPLES
|
wolffd@0
|
91 %
|
wolffd@0
|
92 % To label a trained map based on (labelled) training data, just do
|
wolffd@0
|
93 %
|
wolffd@0
|
94 % sM = som_autolabel(sM,sD);
|
wolffd@0
|
95 %
|
wolffd@0
|
96 % This operation is sometimes called "calibration" in the literature.
|
wolffd@0
|
97 % You can also do this the other way around: use a labelled map to
|
wolffd@0
|
98 % label a data set:
|
wolffd@0
|
99 %
|
wolffd@0
|
100 % sD = som_autolabel(sD,sM);
|
wolffd@0
|
101 %
|
wolffd@0
|
102 % If you only want a single instance of each label, use the 'freq' mode:
|
wolffd@0
|
103 %
|
wolffd@0
|
104 % sM = som_autolabel(sM,sD,'freq');
|
wolffd@0
|
105 %
|
wolffd@0
|
106 % If you already have labels in the struct, and want to perform 'freq' on
|
wolffd@0
|
107 % them, do the following:
|
wolffd@0
|
108 %
|
wolffd@0
|
109 % sMtemp = som_label(sM,'clear','all'); % make a map struct with no labels
|
wolffd@0
|
110 % sM = som_autolabel(sMtemp,sM,'freq'); % add labels to it
|
wolffd@0
|
111 %
|
wolffd@0
|
112 % The third mode 'vote' votes between the labels and only adds the one
|
wolffd@0
|
113 % which is most frequent. If two labels are equally frequent, one or the
|
wolffd@0
|
114 % other is chosen based on which appears first in the list.
|
wolffd@0
|
115 %
|
wolffd@0
|
116 % sM = som_autolabel(sM,sD,'vote');
|
wolffd@0
|
117 %
|
wolffd@0
|
118 % The lat argument is useful if you have specific labels in each column
|
wolffd@0
|
119 % of the '.labels' field. For example, the first column might be an
|
wolffd@0
|
120 % identifier, the next a typecode and the last a year. In this case, you
|
wolffd@0
|
121 % might want to label the map based only on the typecode:
|
wolffd@0
|
122 %
|
wolffd@0
|
123 % sM = som_autolabel(sM,sD,'vote',2);
|
wolffd@0
|
124 %
|
wolffd@0
|
125 % SEE ALSO
|
wolffd@0
|
126 %
|
wolffd@0
|
127 % som_label Give/remove labels from a map/data set.
|
wolffd@0
|
128 % som_bmus Find BMUs from the map for the given set of data vectors.
|
wolffd@0
|
129 % som_show Show map planes.
|
wolffd@0
|
130 % som_show_add Add for example labels to the SOM_SHOW visualization.
|
wolffd@0
|
131
|
wolffd@0
|
132 % Copyright (c) 1997-2000 by the SOM toolbox programming team.
|
wolffd@0
|
133 % http://www.cis.hut.fi/projects/somtoolbox/
|
wolffd@0
|
134
|
wolffd@0
|
135 % Version 1.0beta juuso 101297
|
wolffd@0
|
136 % Version 2.0beta juuso 101199
|
wolffd@0
|
137
|
wolffd@0
|
138 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
139 %% check arguments
|
wolffd@0
|
140
|
wolffd@0
|
141 error(nargchk(2, 4, nargin)); % check no. of input args is correct
|
wolffd@0
|
142
|
wolffd@0
|
143 % sTo
|
wolffd@0
|
144 todata = strcmp(sTo.type,'som_data');
|
wolffd@0
|
145
|
wolffd@0
|
146 % sFrom
|
wolffd@0
|
147 [dummy m] = size(sFrom.labels);
|
wolffd@0
|
148
|
wolffd@0
|
149 % mode
|
wolffd@0
|
150 if nargin<3 | isempty(mode), mode = 'add'; end
|
wolffd@0
|
151
|
wolffd@0
|
152 % inds
|
wolffd@0
|
153 if nargin<4, inds = 1:m; end
|
wolffd@0
|
154 inds = inds(find(inds>0 & inds<=m));
|
wolffd@0
|
155
|
wolffd@0
|
156 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
157 %% get a list of the labels to be added to each vector
|
wolffd@0
|
158
|
wolffd@0
|
159 % calculate BMUs
|
wolffd@0
|
160 if todata, bmus = som_bmus(sFrom,sTo,1);
|
wolffd@0
|
161 else bmus = som_bmus(sTo,sFrom,1); end
|
wolffd@0
|
162
|
wolffd@0
|
163 % for each vector in sTo, make a list of all new labels
|
wolffd@0
|
164 Labels = cell(size(sTo.labels,1),1);
|
wolffd@0
|
165 for d=1:length(bmus),
|
wolffd@0
|
166 m = bmus(d);
|
wolffd@0
|
167 if todata, t = d; f = m; else t = m; f = d; end
|
wolffd@0
|
168 if ~isnan(m),
|
wolffd@0
|
169 % add the labels
|
wolffd@0
|
170 for j=1:length(inds),
|
wolffd@0
|
171 if ~isempty(sFrom.labels{f,inds(j)}),
|
wolffd@0
|
172 Labels{t}{length(Labels{t})+1} = sFrom.labels{f,inds(j)};
|
wolffd@0
|
173 end
|
wolffd@0
|
174 end
|
wolffd@0
|
175 end
|
wolffd@0
|
176 end
|
wolffd@0
|
177
|
wolffd@0
|
178 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
179 %% insert the labels to sTo
|
wolffd@0
|
180
|
wolffd@0
|
181
|
wolffd@0
|
182 if strcmp(mode,'add1') | strcmp(mode,'freq') | strcmp(mode,'vote'),
|
wolffd@0
|
183
|
wolffd@0
|
184 % modify the Labels array apprpriately
|
wolffd@0
|
185
|
wolffd@0
|
186 for i=1:length(Labels),
|
wolffd@0
|
187
|
wolffd@0
|
188 % calculate frequency of each label in each node
|
wolffd@0
|
189 new_labels = {};
|
wolffd@0
|
190 new_freq = [];
|
wolffd@0
|
191 for j=1:length(Labels{i}),
|
wolffd@0
|
192 if isempty(Labels{i}{j}), % ignore
|
wolffd@0
|
193 elseif ~any(strcmp(Labels{i}{j},new_labels)), % a new one!
|
wolffd@0
|
194 k = length(new_labels) + 1;
|
wolffd@0
|
195 new_labels{k} = Labels{i}{j};
|
wolffd@0
|
196 new_freq(k) = sum(strcmp(new_labels{k},Labels{i}));
|
wolffd@0
|
197 else, % an old one, ignore
|
wolffd@0
|
198 end
|
wolffd@0
|
199 end
|
wolffd@0
|
200
|
wolffd@0
|
201 % based on frequency, select label(s) to be added
|
wolffd@0
|
202 if length(new_labels) > 0,
|
wolffd@0
|
203 if strcmp(mode,'add1'),
|
wolffd@0
|
204 Labels{i} = new_labels;
|
wolffd@0
|
205 else
|
wolffd@0
|
206
|
wolffd@0
|
207 % sort labels according to frequency
|
wolffd@0
|
208 [dummy order] = sort(1./(1+new_freq));
|
wolffd@0
|
209 new_labels = new_labels(order);
|
wolffd@0
|
210 new_freq = new_freq(order);
|
wolffd@0
|
211
|
wolffd@0
|
212 switch mode,
|
wolffd@0
|
213 case 'freq',
|
wolffd@0
|
214 % replace each label with 'label(#)' where # is the frequency
|
wolffd@0
|
215 for j=1:length(new_labels),
|
wolffd@0
|
216 labf = sprintf('%s(%d)',new_labels{j},new_freq(j));
|
wolffd@0
|
217 new_labels{j} = labf;
|
wolffd@0
|
218 end
|
wolffd@0
|
219 Labels{i} = new_labels;
|
wolffd@0
|
220 case 'vote',
|
wolffd@0
|
221 % place only the one with most votes
|
wolffd@0
|
222 Labels{i} = {new_labels{1}};
|
wolffd@0
|
223 end
|
wolffd@0
|
224 end
|
wolffd@0
|
225 end
|
wolffd@0
|
226
|
wolffd@0
|
227 end
|
wolffd@0
|
228
|
wolffd@0
|
229 end
|
wolffd@0
|
230
|
wolffd@0
|
231 sTo = som_label(sTo,'add',[1:length(Labels)]',Labels);
|
wolffd@0
|
232
|
wolffd@0
|
233 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
wolffd@0
|
234
|
wolffd@0
|
235
|
wolffd@0
|
236
|
wolffd@0
|
237
|