wolffd@0
|
1 % The thesaurus class is a basic component of all
|
wolffd@0
|
2 % genre and tag information managing the whole Vocabulary.
|
wolffd@0
|
3
|
wolffd@0
|
4 classdef AnnotDB < handle
|
wolffd@0
|
5
|
wolffd@0
|
6 % public properties
|
wolffd@0
|
7 properties (SetAccess = private)
|
wolffd@0
|
8 lexicon = {};
|
wolffd@0
|
9
|
wolffd@0
|
10 end
|
wolffd@0
|
11
|
wolffd@0
|
12 properties(Hidden, Access = private)
|
wolffd@0
|
13
|
wolffd@0
|
14 annotsdb; % a numowners x numannots sparse binary / prob matrix
|
wolffd@0
|
15 annots_oid; % ownerid to pos in annots conversion
|
wolffd@0
|
16
|
wolffd@0
|
17 binary = 0; % indicator whether the db contains binary or scored annots
|
wolffd@0
|
18 end
|
wolffd@0
|
19
|
wolffd@0
|
20 methods
|
wolffd@0
|
21
|
wolffd@0
|
22 % ---
|
wolffd@0
|
23 % simple constructor
|
wolffd@0
|
24 % ---
|
wolffd@0
|
25 function db = AnnotDB(lexicon, annots, ids)
|
wolffd@0
|
26 % db = AnnotDB(lexicon, annots, annotation_ids)
|
wolffd@0
|
27 % lexicon: the list of all individual annotation elements or
|
wolffd@0
|
28 % clips_by_annot or
|
wolffd@0
|
29 % lexids_by_clip
|
wolffd@0
|
30
|
wolffd@0
|
31 % annots: either a clips x numel(lexicon) binary matrix or
|
wolffd@0
|
32 % ...
|
wolffd@0
|
33 %
|
wolffd@0
|
34 % annotation_ids: clip ids for the binary case,
|
wolffd@0
|
35
|
wolffd@0
|
36 if nargin >= 1
|
wolffd@0
|
37
|
wolffd@0
|
38 % ---
|
wolffd@0
|
39 % NOTE: two ways of supplying the annots are allowed:
|
wolffd@0
|
40 % 1. clip ids for each lexical element
|
wolffd@0
|
41 % 2. binary matrix
|
wolffd@0
|
42 % ---
|
wolffd@0
|
43 if ischar(lexicon)
|
wolffd@0
|
44
|
wolffd@0
|
45 if strcmp(lexicon, 'clips_by_annot')
|
wolffd@0
|
46
|
wolffd@0
|
47 % ---
|
wolffd@0
|
48 % preset the lexicon and hash ids
|
wolffd@0
|
49 % ---
|
wolffd@0
|
50 db.lexicon = unique(lower(annots));
|
wolffd@0
|
51
|
wolffd@0
|
52 if iscell(ids)
|
wolffd@0
|
53 db.annots_oid = unique([ids{:}]);
|
wolffd@0
|
54 else
|
wolffd@0
|
55 db.annots_oid = unique(ids);
|
wolffd@0
|
56 end
|
wolffd@0
|
57
|
wolffd@0
|
58 db.annotsdb = sparse(numel(db.annots_oid),...
|
wolffd@0
|
59 numel( db.lexicon));
|
wolffd@0
|
60
|
wolffd@0
|
61 % for all annotations
|
wolffd@0
|
62 for i = 1:numel(annots)
|
wolffd@0
|
63
|
wolffd@0
|
64 % for all ids in set
|
wolffd@0
|
65 % is this a cell or just a single index<
|
wolffd@0
|
66 if iscell(ids)
|
wolffd@0
|
67 for j = 1:numel(ids{i})
|
wolffd@0
|
68
|
wolffd@0
|
69 db.add_pair(ids{i}(j), annots{i});
|
wolffd@0
|
70 end
|
wolffd@0
|
71 else
|
wolffd@0
|
72 % single ndex case
|
wolffd@0
|
73 db.add_pair(ids(i), annots{i});
|
wolffd@0
|
74 end
|
wolffd@0
|
75 end
|
wolffd@0
|
76
|
wolffd@0
|
77 elseif strcmp(lexicon, 'annots_by_clip')
|
wolffd@0
|
78
|
wolffd@0
|
79 end
|
wolffd@0
|
80 % this is the binary case
|
wolffd@0
|
81 else
|
wolffd@0
|
82
|
wolffd@0
|
83 db.lexicon = lexicon;
|
wolffd@0
|
84 db.annotsdb = sparse(0,0);
|
wolffd@0
|
85 if nargin >= 2
|
wolffd@0
|
86
|
wolffd@0
|
87 db.annotsdb = sparse(annots);
|
wolffd@0
|
88 db.annots_oid = ids;
|
wolffd@0
|
89 else
|
wolffd@0
|
90 db.annotsdb = sparse(0, numel(db.lexicon));
|
wolffd@0
|
91 end
|
wolffd@0
|
92 end
|
wolffd@0
|
93 end
|
wolffd@0
|
94 end
|
wolffd@0
|
95
|
wolffd@0
|
96 % ---
|
wolffd@0
|
97 % retrieve annot-substructure for given clip ids,
|
wolffd@0
|
98 % collecting std = [or = all] ,[and = common]
|
wolffd@0
|
99 % annots for these
|
wolffd@0
|
100 % ---
|
wolffd@0
|
101 function new_db = subset(db, ownerids, mode)
|
wolffd@0
|
102 % new_db = subset(db, ownerids, {'and', ['or']})
|
wolffd@0
|
103
|
wolffd@0
|
104 if nargin < 3
|
wolffd@0
|
105 mode = 'or';
|
wolffd@0
|
106 end
|
wolffd@0
|
107
|
wolffd@0
|
108 % ---
|
wolffd@0
|
109 % create new DB
|
wolffd@0
|
110 % we make sure the tag id index keeps
|
wolffd@0
|
111 % the same for subsets by copying the whole
|
wolffd@0
|
112 % lexicon
|
wolffd@0
|
113 % ---
|
wolffd@0
|
114 new_db = AnnotDB(db.lexicon);
|
wolffd@0
|
115
|
wolffd@0
|
116 switch lower(mode)
|
wolffd@0
|
117 case 'and'
|
wolffd@0
|
118
|
wolffd@0
|
119 % ---
|
wolffd@0
|
120 % TODO: implement this and
|
wolffd@0
|
121 % improve speed below
|
wolffd@0
|
122 % ---
|
wolffd@0
|
123 case 'or'
|
wolffd@0
|
124
|
wolffd@0
|
125 % successively fill with given annots
|
wolffd@0
|
126 for i = 1:numel(ownerids)
|
wolffd@0
|
127
|
wolffd@0
|
128 % ---
|
wolffd@0
|
129 % we retrieve annots for each clip
|
wolffd@0
|
130 % and add them to the new database
|
wolffd@0
|
131 % ---
|
wolffd@0
|
132 [annot, score] = annots(db, ownerids(i));
|
wolffd@0
|
133 for j = 1:numel(annot)
|
wolffd@0
|
134
|
wolffd@0
|
135 new_db.add_pair(ownerids(i), annot{j}, score(j));
|
wolffd@0
|
136 end
|
wolffd@0
|
137 end
|
wolffd@0
|
138 otherwise
|
wolffd@0
|
139 error 'illegal owner id combination mode. possibly forgot brackets';
|
wolffd@0
|
140 end
|
wolffd@0
|
141 end
|
wolffd@0
|
142
|
wolffd@0
|
143 % retrieve annot-substructure for complement
|
wolffd@0
|
144 % of given clip ids
|
wolffd@0
|
145 function [new_db] = exclude(db, ownerids)
|
wolffd@0
|
146
|
wolffd@0
|
147 % get complement of clip ids
|
wolffd@0
|
148 ownerids = setdiff(db.annots_oid, ownerids);
|
wolffd@0
|
149
|
wolffd@0
|
150 new_db = subset(db, ownerids);
|
wolffd@0
|
151 end
|
wolffd@0
|
152
|
wolffd@0
|
153 % ---
|
wolffd@0
|
154 % retrieve clip by annot.
|
wolffd@0
|
155 % if multiple annots are given, the clips
|
wolffd@0
|
156 % containing all of them (logical and) are
|
wolffd@0
|
157 % returned
|
wolffd@0
|
158 % ---
|
wolffd@0
|
159 function oids = owner(db, annotstr, mode)
|
wolffd@0
|
160
|
wolffd@0
|
161 if nargin < 3
|
wolffd@0
|
162 mode = 'and';
|
wolffd@0
|
163 end
|
wolffd@0
|
164
|
wolffd@0
|
165 if ~iscell(annotstr)
|
wolffd@0
|
166 annotstr = {annotstr};
|
wolffd@0
|
167 end
|
wolffd@0
|
168
|
wolffd@0
|
169 annotid = [];
|
wolffd@0
|
170 for i = 1:numel(annotstr)
|
wolffd@0
|
171
|
wolffd@0
|
172 annotid = [annotid strcellfind(db.lexicon, annotstr{i})];
|
wolffd@0
|
173 end
|
wolffd@0
|
174
|
wolffd@0
|
175 oids = owner_for_annotid(db, annotid, mode);
|
wolffd@0
|
176 end
|
wolffd@0
|
177
|
wolffd@0
|
178 % retrieve owner ids by clip
|
wolffd@0
|
179 function ownerids = owner_for_annotid(db, annotid, mode)
|
wolffd@0
|
180 % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'})
|
wolffd@0
|
181
|
wolffd@0
|
182 if isempty(annotid)
|
wolffd@0
|
183 ownerids = [];
|
wolffd@0
|
184 return
|
wolffd@0
|
185 end
|
wolffd@0
|
186 if nargin < 3
|
wolffd@0
|
187 mode = 'and';
|
wolffd@0
|
188 end
|
wolffd@0
|
189
|
wolffd@0
|
190 switch lower(mode)
|
wolffd@0
|
191 case 'or'
|
wolffd@0
|
192 % search for all appearing owners
|
wolffd@0
|
193 candidates = sum(db.annotsdb(:, annotid), 2) > 0;
|
wolffd@0
|
194
|
wolffd@0
|
195 case 'and'
|
wolffd@0
|
196 % search for the common owners
|
wolffd@0
|
197 candidates = sum(db.annotsdb(:, annotid), 2) == ...
|
wolffd@0
|
198 numel(annotid);
|
wolffd@0
|
199 otherwise
|
wolffd@0
|
200 error 'illegal tag combination mode';
|
wolffd@0
|
201 end
|
wolffd@0
|
202
|
wolffd@0
|
203
|
wolffd@0
|
204 % get positions in database
|
wolffd@0
|
205 pos = find(candidates);
|
wolffd@0
|
206
|
wolffd@0
|
207 % return owner ids
|
wolffd@0
|
208 ownerids = db.annots_oid(pos);
|
wolffd@0
|
209 end
|
wolffd@0
|
210
|
wolffd@0
|
211 % retrieve annotid by clip
|
wolffd@0
|
212 function [aid, score] = annotids_for_owner(db, ownerid, mode)
|
wolffd@0
|
213
|
wolffd@0
|
214 % single query case
|
wolffd@0
|
215 if numel(ownerid) == 1
|
wolffd@0
|
216
|
wolffd@0
|
217 pos = owner_pos(db, ownerid);
|
wolffd@0
|
218
|
wolffd@0
|
219 % get positions in database
|
wolffd@0
|
220 aid = find(db.annotsdb(pos, :) > 0);
|
wolffd@0
|
221
|
wolffd@0
|
222 score = db.annotsdb(pos, aid);
|
wolffd@0
|
223
|
wolffd@0
|
224
|
wolffd@0
|
225 % sort ids for output
|
wolffd@0
|
226 if ~db.binary
|
wolffd@0
|
227
|
wolffd@0
|
228 [score, idx] = sort(score, 'descend');
|
wolffd@0
|
229 aid = aid(idx);
|
wolffd@0
|
230 end
|
wolffd@0
|
231 else
|
wolffd@0
|
232 if nargin < 3
|
wolffd@0
|
233 mode = 'or';
|
wolffd@0
|
234 end
|
wolffd@0
|
235
|
wolffd@0
|
236 % ---
|
wolffd@0
|
237 % the query contained multiple ids
|
wolffd@0
|
238 %
|
wolffd@0
|
239 % we dont return the single results but
|
wolffd@0
|
240 % the statistics for this subset of clips
|
wolffd@0
|
241 % ---
|
wolffd@0
|
242 new_db = db.subset(ownerid, mode);
|
wolffd@0
|
243 [null, score, aid] = new_db.stats_count();
|
wolffd@0
|
244
|
wolffd@0
|
245 % cut off at score > 0 to abandon unused tags
|
wolffd@0
|
246 u = find(score > 0,1,'last');
|
wolffd@0
|
247 score = score(1:u);
|
wolffd@0
|
248 aid = aid(1:u);
|
wolffd@0
|
249 end
|
wolffd@0
|
250 end
|
wolffd@0
|
251
|
wolffd@0
|
252 % retrieve annotation by clip
|
wolffd@0
|
253 function [out, score, aid] = annots(db, ownerid)
|
wolffd@0
|
254
|
wolffd@0
|
255 [aid, score] = db.annotids_for_owner( ownerid);
|
wolffd@0
|
256
|
wolffd@0
|
257 out = db.get_annot_name(aid);
|
wolffd@0
|
258 end
|
wolffd@0
|
259
|
wolffd@0
|
260
|
wolffd@0
|
261 % retrieve annot name given a annot id
|
wolffd@0
|
262 function out = get_annot_name(db, annotid)
|
wolffd@0
|
263
|
wolffd@0
|
264 out = {};
|
wolffd@0
|
265 for i = 1:numel(annotid)
|
wolffd@0
|
266
|
wolffd@0
|
267 out{i} = db.lexicon{annotid(i)};
|
wolffd@0
|
268 end
|
wolffd@0
|
269 end
|
wolffd@0
|
270
|
wolffd@0
|
271 % return annotation id for annotation string
|
wolffd@0
|
272 function aid = get_annot_id(db, annotstr)
|
wolffd@0
|
273
|
wolffd@0
|
274 if ~iscell(annotstr)
|
wolffd@0
|
275
|
wolffd@0
|
276 % expensive search within annot list
|
wolffd@0
|
277 aid = strcellfind(db.lexicon, annotstr);
|
wolffd@0
|
278 else
|
wolffd@0
|
279
|
wolffd@0
|
280 % search seperately for each annot
|
wolffd@0
|
281 for i = 1:numel(annotstr)
|
wolffd@0
|
282 aid(i) = strcellfind(db.lexicon, annotstr{i});
|
wolffd@0
|
283 end
|
wolffd@0
|
284 end
|
wolffd@0
|
285 end
|
wolffd@0
|
286
|
wolffd@0
|
287 % ---
|
wolffd@0
|
288 % return statistics on saved annotations.
|
wolffd@0
|
289 % = returns the sum of the scores and
|
wolffd@0
|
290 % sortec lexicon
|
wolffd@0
|
291 % ---
|
wolffd@0
|
292 function [labels, score, annotids] = stats(db)
|
wolffd@0
|
293
|
wolffd@0
|
294 % out = zeros(1, size(db.annotsdb,2));
|
wolffd@0
|
295 score = full(sum(db.annotsdb, 1));
|
wolffd@0
|
296 [score, annotids] = sort(score,'descend');
|
wolffd@0
|
297
|
wolffd@0
|
298 % prepare labels
|
wolffd@0
|
299 labels = db.lexicon(annotids);
|
wolffd@0
|
300 end
|
wolffd@0
|
301
|
wolffd@0
|
302 % ---
|
wolffd@0
|
303 % return statistics on saved annotations.
|
wolffd@0
|
304 % = returns the number of annotations and
|
wolffd@0
|
305 % sortec lexicon
|
wolffd@0
|
306 % ---
|
wolffd@0
|
307 function [labels, score, annotids] = stats_count(db)
|
wolffd@0
|
308
|
wolffd@0
|
309 % out = zeros(1, size(db.annotsdb,2));
|
wolffd@0
|
310 score = full(sum(db.annotsdb > 0, 1));
|
wolffd@0
|
311 [score, annotids] = sort(score,'descend');
|
wolffd@0
|
312
|
wolffd@0
|
313 % prepare labels
|
wolffd@0
|
314 labels = db.lexicon(annotids);
|
wolffd@0
|
315 end
|
wolffd@0
|
316
|
wolffd@0
|
317 % this is a stub for a tag cloud-like output
|
wolffd@0
|
318 function [out] = annots_cloud(db, ownerid)
|
wolffd@0
|
319
|
wolffd@0
|
320 % ---
|
wolffd@0
|
321 % TODO: actually output tag-cloud
|
wolffd@0
|
322 % this output is aimed at input into a web interface
|
wolffd@0
|
323 % we successfully used http://www.wordle.net/
|
wolffd@0
|
324 % ---
|
wolffd@0
|
325
|
wolffd@0
|
326 if nargin > 1
|
wolffd@0
|
327 db2 = db.subset(ownerid);
|
wolffd@0
|
328 else
|
wolffd@0
|
329 db2 = db;
|
wolffd@0
|
330 end
|
wolffd@0
|
331
|
wolffd@0
|
332 [labels, score, annotids] = stats(db2);
|
wolffd@0
|
333
|
wolffd@0
|
334 % ---
|
wolffd@0
|
335 % Note: for performance issues we compress this data
|
wolffd@0
|
336 % to a maximum value of 1001
|
wolffd@0
|
337 % ---
|
wolffd@0
|
338 score = ceil((score./max(score))*100);
|
wolffd@0
|
339
|
wolffd@0
|
340 out = '';
|
wolffd@0
|
341 for i = 1:numel(annotids)
|
wolffd@0
|
342
|
wolffd@0
|
343 % repeat the tag according to score
|
wolffd@0
|
344 annot = strrep(labels{i},' ','-');
|
wolffd@0
|
345 for j = 1:score(i)
|
wolffd@0
|
346 out = sprintf('%s; %s',annot, out);
|
wolffd@0
|
347 end
|
wolffd@0
|
348 end
|
wolffd@0
|
349 end
|
wolffd@0
|
350
|
wolffd@0
|
351
|
wolffd@0
|
352 function out = size(db)
|
wolffd@0
|
353 % returns the size of this db
|
wolffd@0
|
354
|
wolffd@0
|
355 out = numel(db.lexicon);
|
wolffd@0
|
356 end
|
wolffd@0
|
357
|
wolffd@0
|
358 function add_pair(db, ownerid, annot, score)
|
wolffd@0
|
359 % add_pair(db, owner, annot) adds an annot and owner and can
|
wolffd@0
|
360 % increase the lexicon size
|
wolffd@0
|
361
|
wolffd@0
|
362 if nargin < 4
|
wolffd@0
|
363 score = 1;
|
wolffd@0
|
364 end
|
wolffd@0
|
365
|
wolffd@0
|
366 aid = strcellfind(db.lexicon, annot);
|
wolffd@0
|
367
|
wolffd@0
|
368 % create new position for annotation if neccesary
|
wolffd@0
|
369 if isempty(aid)
|
wolffd@0
|
370
|
wolffd@0
|
371 aid = numel(db.lexicon) + 1;
|
wolffd@0
|
372
|
wolffd@0
|
373 % add to lexicon
|
wolffd@0
|
374 db.lexicon = {db.lexicon{:}, annot};
|
wolffd@0
|
375
|
wolffd@0
|
376 % enhance annotation matrix
|
wolffd@0
|
377 db.annotsdb = [db.annotsdb, ...
|
wolffd@0
|
378 sparse(size(db.annotsdb,1), 1)];
|
wolffd@0
|
379 end
|
wolffd@0
|
380
|
wolffd@0
|
381
|
wolffd@0
|
382 % create new position for clip if neccesary
|
wolffd@0
|
383 pos = owner_pos(db, ownerid);
|
wolffd@0
|
384 if isempty(pos)
|
wolffd@0
|
385
|
wolffd@0
|
386 pos = numel(db.annots_oid) +1;
|
wolffd@0
|
387
|
wolffd@0
|
388 % add to oid
|
wolffd@0
|
389 db.annots_oid = [db.annots_oid, ownerid];
|
wolffd@0
|
390
|
wolffd@0
|
391 % enhance annotation matrix
|
wolffd@0
|
392 db.annotsdb = [db.annotsdb; ...
|
wolffd@0
|
393 sparse(1, size(db.annotsdb, 2))];
|
wolffd@0
|
394 end
|
wolffd@0
|
395
|
wolffd@0
|
396 % save data to database
|
wolffd@0
|
397 db.annotsdb(pos, aid) = score;
|
wolffd@0
|
398 end
|
wolffd@0
|
399
|
wolffd@0
|
400 end
|
wolffd@0
|
401
|
wolffd@0
|
402
|
wolffd@0
|
403 methods(Hidden)
|
wolffd@0
|
404
|
wolffd@0
|
405 function pos = owner_pos(db, ownerid)
|
wolffd@0
|
406
|
wolffd@0
|
407 % returns database position for owner id
|
wolffd@0
|
408 pos = find(db.annots_oid == ownerid);
|
wolffd@0
|
409 end
|
wolffd@0
|
410 end
|
wolffd@0
|
411
|
wolffd@0
|
412 end |