Mercurial > hg > camir-aes2014
comparison core/magnatagatune/AnnotDB.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 % The thesaurus class is a basic component of all | |
2 % genre and tag information managing the whole Vocabulary. | |
3 | |
4 classdef AnnotDB < handle | |
5 | |
6 % public properties | |
7 properties (SetAccess = private) | |
8 lexicon = {}; | |
9 | |
10 end | |
11 | |
12 properties(Hidden, Access = private) | |
13 | |
14 annotsdb; % a numowners x numannots sparse binary / prob matrix | |
15 annots_oid; % ownerid to pos in annots conversion | |
16 | |
17 binary = 0; % indicator whether the db contains binary or scored annots | |
18 end | |
19 | |
20 methods | |
21 | |
22 % --- | |
23 % simple constructor | |
24 % --- | |
25 function db = AnnotDB(lexicon, annots, ids) | |
26 % db = AnnotDB(lexicon, annots, annotation_ids) | |
27 % lexicon: the list of all individual annotation elements or | |
28 % clips_by_annot or | |
29 % lexids_by_clip | |
30 | |
31 % annots: either a clips x numel(lexicon) binary matrix or | |
32 % ... | |
33 % | |
34 % annotation_ids: clip ids for the binary case, | |
35 | |
36 if nargin >= 1 | |
37 | |
38 % --- | |
39 % NOTE: two ways of supplying the annots are allowed: | |
40 % 1. clip ids for each lexical element | |
41 % 2. binary matrix | |
42 % --- | |
43 if ischar(lexicon) | |
44 | |
45 if strcmp(lexicon, 'clips_by_annot') | |
46 | |
47 % --- | |
48 % preset the lexicon and hash ids | |
49 % --- | |
50 db.lexicon = unique(lower(annots)); | |
51 | |
52 if iscell(ids) | |
53 db.annots_oid = unique([ids{:}]); | |
54 else | |
55 db.annots_oid = unique(ids); | |
56 end | |
57 | |
58 db.annotsdb = sparse(numel(db.annots_oid),... | |
59 numel( db.lexicon)); | |
60 | |
61 % for all annotations | |
62 for i = 1:numel(annots) | |
63 | |
64 % for all ids in set | |
65 % is this a cell or just a single index< | |
66 if iscell(ids) | |
67 for j = 1:numel(ids{i}) | |
68 | |
69 db.add_pair(ids{i}(j), annots{i}); | |
70 end | |
71 else | |
72 % single ndex case | |
73 db.add_pair(ids(i), annots{i}); | |
74 end | |
75 end | |
76 | |
77 elseif strcmp(lexicon, 'annots_by_clip') | |
78 | |
79 end | |
80 % this is the binary case | |
81 else | |
82 | |
83 db.lexicon = lexicon; | |
84 db.annotsdb = sparse(0,0); | |
85 if nargin >= 2 | |
86 | |
87 db.annotsdb = sparse(annots); | |
88 db.annots_oid = ids; | |
89 else | |
90 db.annotsdb = sparse(0, numel(db.lexicon)); | |
91 end | |
92 end | |
93 end | |
94 end | |
95 | |
96 % --- | |
97 % retrieve annot-substructure for given clip ids, | |
98 % collecting std = [or = all] ,[and = common] | |
99 % annots for these | |
100 % --- | |
101 function new_db = subset(db, ownerids, mode) | |
102 % new_db = subset(db, ownerids, {'and', ['or']}) | |
103 | |
104 if nargin < 3 | |
105 mode = 'or'; | |
106 end | |
107 | |
108 % --- | |
109 % create new DB | |
110 % we make sure the tag id index keeps | |
111 % the same for subsets by copying the whole | |
112 % lexicon | |
113 % --- | |
114 new_db = AnnotDB(db.lexicon); | |
115 | |
116 switch lower(mode) | |
117 case 'and' | |
118 | |
119 % --- | |
120 % TODO: implement this and | |
121 % improve speed below | |
122 % --- | |
123 case 'or' | |
124 | |
125 % successively fill with given annots | |
126 for i = 1:numel(ownerids) | |
127 | |
128 % --- | |
129 % we retrieve annots for each clip | |
130 % and add them to the new database | |
131 % --- | |
132 [annot, score] = annots(db, ownerids(i)); | |
133 for j = 1:numel(annot) | |
134 | |
135 new_db.add_pair(ownerids(i), annot{j}, score(j)); | |
136 end | |
137 end | |
138 otherwise | |
139 error 'illegal owner id combination mode. possibly forgot brackets'; | |
140 end | |
141 end | |
142 | |
143 % retrieve annot-substructure for complement | |
144 % of given clip ids | |
145 function [new_db] = exclude(db, ownerids) | |
146 | |
147 % get complement of clip ids | |
148 ownerids = setdiff(db.annots_oid, ownerids); | |
149 | |
150 new_db = subset(db, ownerids); | |
151 end | |
152 | |
153 % --- | |
154 % retrieve clip by annot. | |
155 % if multiple annots are given, the clips | |
156 % containing all of them (logical and) are | |
157 % returned | |
158 % --- | |
159 function oids = owner(db, annotstr, mode) | |
160 | |
161 if nargin < 3 | |
162 mode = 'and'; | |
163 end | |
164 | |
165 if ~iscell(annotstr) | |
166 annotstr = {annotstr}; | |
167 end | |
168 | |
169 annotid = []; | |
170 for i = 1:numel(annotstr) | |
171 | |
172 annotid = [annotid strcellfind(db.lexicon, annotstr{i})]; | |
173 end | |
174 | |
175 oids = owner_for_annotid(db, annotid, mode); | |
176 end | |
177 | |
178 % retrieve owner ids by clip | |
179 function ownerids = owner_for_annotid(db, annotid, mode) | |
180 % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'}) | |
181 | |
182 if isempty(annotid) | |
183 ownerids = []; | |
184 return | |
185 end | |
186 if nargin < 3 | |
187 mode = 'and'; | |
188 end | |
189 | |
190 switch lower(mode) | |
191 case 'or' | |
192 % search for all appearing owners | |
193 candidates = sum(db.annotsdb(:, annotid), 2) > 0; | |
194 | |
195 case 'and' | |
196 % search for the common owners | |
197 candidates = sum(db.annotsdb(:, annotid), 2) == ... | |
198 numel(annotid); | |
199 otherwise | |
200 error 'illegal tag combination mode'; | |
201 end | |
202 | |
203 | |
204 % get positions in database | |
205 pos = find(candidates); | |
206 | |
207 % return owner ids | |
208 ownerids = db.annots_oid(pos); | |
209 end | |
210 | |
211 % retrieve annotid by clip | |
212 function [aid, score] = annotids_for_owner(db, ownerid, mode) | |
213 | |
214 % single query case | |
215 if numel(ownerid) == 1 | |
216 | |
217 pos = owner_pos(db, ownerid); | |
218 | |
219 % get positions in database | |
220 aid = find(db.annotsdb(pos, :) > 0); | |
221 | |
222 score = db.annotsdb(pos, aid); | |
223 | |
224 | |
225 % sort ids for output | |
226 if ~db.binary | |
227 | |
228 [score, idx] = sort(score, 'descend'); | |
229 aid = aid(idx); | |
230 end | |
231 else | |
232 if nargin < 3 | |
233 mode = 'or'; | |
234 end | |
235 | |
236 % --- | |
237 % the query contained multiple ids | |
238 % | |
239 % we dont return the single results but | |
240 % the statistics for this subset of clips | |
241 % --- | |
242 new_db = db.subset(ownerid, mode); | |
243 [null, score, aid] = new_db.stats_count(); | |
244 | |
245 % cut off at score > 0 to abandon unused tags | |
246 u = find(score > 0,1,'last'); | |
247 score = score(1:u); | |
248 aid = aid(1:u); | |
249 end | |
250 end | |
251 | |
252 % retrieve annotation by clip | |
253 function [out, score, aid] = annots(db, ownerid) | |
254 | |
255 [aid, score] = db.annotids_for_owner( ownerid); | |
256 | |
257 out = db.get_annot_name(aid); | |
258 end | |
259 | |
260 | |
261 % retrieve annot name given a annot id | |
262 function out = get_annot_name(db, annotid) | |
263 | |
264 out = {}; | |
265 for i = 1:numel(annotid) | |
266 | |
267 out{i} = db.lexicon{annotid(i)}; | |
268 end | |
269 end | |
270 | |
271 % return annotation id for annotation string | |
272 function aid = get_annot_id(db, annotstr) | |
273 | |
274 if ~iscell(annotstr) | |
275 | |
276 % expensive search within annot list | |
277 aid = strcellfind(db.lexicon, annotstr); | |
278 else | |
279 | |
280 % search seperately for each annot | |
281 for i = 1:numel(annotstr) | |
282 aid(i) = strcellfind(db.lexicon, annotstr{i}); | |
283 end | |
284 end | |
285 end | |
286 | |
287 % --- | |
288 % return statistics on saved annotations. | |
289 % = returns the sum of the scores and | |
290 % sortec lexicon | |
291 % --- | |
292 function [labels, score, annotids] = stats(db) | |
293 | |
294 % out = zeros(1, size(db.annotsdb,2)); | |
295 score = full(sum(db.annotsdb, 1)); | |
296 [score, annotids] = sort(score,'descend'); | |
297 | |
298 % prepare labels | |
299 labels = db.lexicon(annotids); | |
300 end | |
301 | |
302 % --- | |
303 % return statistics on saved annotations. | |
304 % = returns the number of annotations and | |
305 % sortec lexicon | |
306 % --- | |
307 function [labels, score, annotids] = stats_count(db) | |
308 | |
309 % out = zeros(1, size(db.annotsdb,2)); | |
310 score = full(sum(db.annotsdb > 0, 1)); | |
311 [score, annotids] = sort(score,'descend'); | |
312 | |
313 % prepare labels | |
314 labels = db.lexicon(annotids); | |
315 end | |
316 | |
317 % this is a stub for a tag cloud-like output | |
318 function [out] = annots_cloud(db, ownerid) | |
319 | |
320 % --- | |
321 % TODO: actually output tag-cloud | |
322 % this output is aimed at input into a web interface | |
323 % we successfully used http://www.wordle.net/ | |
324 % --- | |
325 | |
326 if nargin > 1 | |
327 db2 = db.subset(ownerid); | |
328 else | |
329 db2 = db; | |
330 end | |
331 | |
332 [labels, score, annotids] = stats(db2); | |
333 | |
334 % --- | |
335 % Note: for performance issues we compress this data | |
336 % to a maximum value of 1001 | |
337 % --- | |
338 score = ceil((score./max(score))*100); | |
339 | |
340 out = ''; | |
341 for i = 1:numel(annotids) | |
342 | |
343 % repeat the tag according to score | |
344 annot = strrep(labels{i},' ','-'); | |
345 for j = 1:score(i) | |
346 out = sprintf('%s; %s',annot, out); | |
347 end | |
348 end | |
349 end | |
350 | |
351 | |
352 function out = size(db) | |
353 % returns the size of this db | |
354 | |
355 out = numel(db.lexicon); | |
356 end | |
357 | |
358 function add_pair(db, ownerid, annot, score) | |
359 % add_pair(db, owner, annot) adds an annot and owner and can | |
360 % increase the lexicon size | |
361 | |
362 if nargin < 4 | |
363 score = 1; | |
364 end | |
365 | |
366 aid = strcellfind(db.lexicon, annot); | |
367 | |
368 % create new position for annotation if neccesary | |
369 if isempty(aid) | |
370 | |
371 aid = numel(db.lexicon) + 1; | |
372 | |
373 % add to lexicon | |
374 db.lexicon = {db.lexicon{:}, annot}; | |
375 | |
376 % enhance annotation matrix | |
377 db.annotsdb = [db.annotsdb, ... | |
378 sparse(size(db.annotsdb,1), 1)]; | |
379 end | |
380 | |
381 | |
382 % create new position for clip if neccesary | |
383 pos = owner_pos(db, ownerid); | |
384 if isempty(pos) | |
385 | |
386 pos = numel(db.annots_oid) +1; | |
387 | |
388 % add to oid | |
389 db.annots_oid = [db.annots_oid, ownerid]; | |
390 | |
391 % enhance annotation matrix | |
392 db.annotsdb = [db.annotsdb; ... | |
393 sparse(1, size(db.annotsdb, 2))]; | |
394 end | |
395 | |
396 % save data to database | |
397 db.annotsdb(pos, aid) = score; | |
398 end | |
399 | |
400 end | |
401 | |
402 | |
403 methods(Hidden) | |
404 | |
405 function pos = owner_pos(db, ownerid) | |
406 | |
407 % returns database position for owner id | |
408 pos = find(db.annots_oid == ownerid); | |
409 end | |
410 end | |
411 | |
412 end |