Mercurial > hg > camir-aes2014
view core/magnatagatune/AnnotDB.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
% The thesaurus class is a basic component of all % genre and tag information managing the whole Vocabulary. classdef AnnotDB < handle % public properties properties (SetAccess = private) lexicon = {}; end properties(Hidden, Access = private) annotsdb; % a numowners x numannots sparse binary / prob matrix annots_oid; % ownerid to pos in annots conversion binary = 0; % indicator whether the db contains binary or scored annots end methods % --- % simple constructor % --- function db = AnnotDB(lexicon, annots, ids) % db = AnnotDB(lexicon, annots, annotation_ids) % lexicon: the list of all individual annotation elements or % clips_by_annot or % lexids_by_clip % annots: either a clips x numel(lexicon) binary matrix or % ... % % annotation_ids: clip ids for the binary case, if nargin >= 1 % --- % NOTE: two ways of supplying the annots are allowed: % 1. clip ids for each lexical element % 2. binary matrix % --- if ischar(lexicon) if strcmp(lexicon, 'clips_by_annot') % --- % preset the lexicon and hash ids % --- db.lexicon = unique(lower(annots)); if iscell(ids) db.annots_oid = unique([ids{:}]); else db.annots_oid = unique(ids); end db.annotsdb = sparse(numel(db.annots_oid),... numel( db.lexicon)); % for all annotations for i = 1:numel(annots) % for all ids in set % is this a cell or just a single index< if iscell(ids) for j = 1:numel(ids{i}) db.add_pair(ids{i}(j), annots{i}); end else % single ndex case db.add_pair(ids(i), annots{i}); end end elseif strcmp(lexicon, 'annots_by_clip') end % this is the binary case else db.lexicon = lexicon; db.annotsdb = sparse(0,0); if nargin >= 2 db.annotsdb = sparse(annots); db.annots_oid = ids; else db.annotsdb = sparse(0, numel(db.lexicon)); end end end end % --- % retrieve annot-substructure for given clip ids, % collecting std = [or = all] ,[and = common] % annots for these % --- function new_db = subset(db, ownerids, mode) % new_db = subset(db, ownerids, {'and', ['or']}) if nargin < 3 mode = 'or'; end % --- % create new DB % we make sure the tag id index keeps % the same for subsets by copying the whole % lexicon % --- new_db = AnnotDB(db.lexicon); switch lower(mode) case 'and' % --- % TODO: implement this and % improve speed below % --- case 'or' % successively fill with given annots for i = 1:numel(ownerids) % --- % we retrieve annots for each clip % and add them to the new database % --- [annot, score] = annots(db, ownerids(i)); for j = 1:numel(annot) new_db.add_pair(ownerids(i), annot{j}, score(j)); end end otherwise error 'illegal owner id combination mode. possibly forgot brackets'; end end % retrieve annot-substructure for complement % of given clip ids function [new_db] = exclude(db, ownerids) % get complement of clip ids ownerids = setdiff(db.annots_oid, ownerids); new_db = subset(db, ownerids); end % --- % retrieve clip by annot. % if multiple annots are given, the clips % containing all of them (logical and) are % returned % --- function oids = owner(db, annotstr, mode) if nargin < 3 mode = 'and'; end if ~iscell(annotstr) annotstr = {annotstr}; end annotid = []; for i = 1:numel(annotstr) annotid = [annotid strcellfind(db.lexicon, annotstr{i})]; end oids = owner_for_annotid(db, annotid, mode); end % retrieve owner ids by clip function ownerids = owner_for_annotid(db, annotid, mode) % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'}) if isempty(annotid) ownerids = []; return end if nargin < 3 mode = 'and'; end switch lower(mode) case 'or' % search for all appearing owners candidates = sum(db.annotsdb(:, annotid), 2) > 0; case 'and' % search for the common owners candidates = sum(db.annotsdb(:, annotid), 2) == ... numel(annotid); otherwise error 'illegal tag combination mode'; end % get positions in database pos = find(candidates); % return owner ids ownerids = db.annots_oid(pos); end % retrieve annotid by clip function [aid, score] = annotids_for_owner(db, ownerid, mode) % single query case if numel(ownerid) == 1 pos = owner_pos(db, ownerid); % get positions in database aid = find(db.annotsdb(pos, :) > 0); score = db.annotsdb(pos, aid); % sort ids for output if ~db.binary [score, idx] = sort(score, 'descend'); aid = aid(idx); end else if nargin < 3 mode = 'or'; end % --- % the query contained multiple ids % % we dont return the single results but % the statistics for this subset of clips % --- new_db = db.subset(ownerid, mode); [null, score, aid] = new_db.stats_count(); % cut off at score > 0 to abandon unused tags u = find(score > 0,1,'last'); score = score(1:u); aid = aid(1:u); end end % retrieve annotation by clip function [out, score, aid] = annots(db, ownerid) [aid, score] = db.annotids_for_owner( ownerid); out = db.get_annot_name(aid); end % retrieve annot name given a annot id function out = get_annot_name(db, annotid) out = {}; for i = 1:numel(annotid) out{i} = db.lexicon{annotid(i)}; end end % return annotation id for annotation string function aid = get_annot_id(db, annotstr) if ~iscell(annotstr) % expensive search within annot list aid = strcellfind(db.lexicon, annotstr); else % search seperately for each annot for i = 1:numel(annotstr) aid(i) = strcellfind(db.lexicon, annotstr{i}); end end end % --- % return statistics on saved annotations. % = returns the sum of the scores and % sortec lexicon % --- function [labels, score, annotids] = stats(db) % out = zeros(1, size(db.annotsdb,2)); score = full(sum(db.annotsdb, 1)); [score, annotids] = sort(score,'descend'); % prepare labels labels = db.lexicon(annotids); end % --- % return statistics on saved annotations. % = returns the number of annotations and % sortec lexicon % --- function [labels, score, annotids] = stats_count(db) % out = zeros(1, size(db.annotsdb,2)); score = full(sum(db.annotsdb > 0, 1)); [score, annotids] = sort(score,'descend'); % prepare labels labels = db.lexicon(annotids); end % this is a stub for a tag cloud-like output function [out] = annots_cloud(db, ownerid) % --- % TODO: actually output tag-cloud % this output is aimed at input into a web interface % we successfully used http://www.wordle.net/ % --- if nargin > 1 db2 = db.subset(ownerid); else db2 = db; end [labels, score, annotids] = stats(db2); % --- % Note: for performance issues we compress this data % to a maximum value of 1001 % --- score = ceil((score./max(score))*100); out = ''; for i = 1:numel(annotids) % repeat the tag according to score annot = strrep(labels{i},' ','-'); for j = 1:score(i) out = sprintf('%s; %s',annot, out); end end end function out = size(db) % returns the size of this db out = numel(db.lexicon); end function add_pair(db, ownerid, annot, score) % add_pair(db, owner, annot) adds an annot and owner and can % increase the lexicon size if nargin < 4 score = 1; end aid = strcellfind(db.lexicon, annot); % create new position for annotation if neccesary if isempty(aid) aid = numel(db.lexicon) + 1; % add to lexicon db.lexicon = {db.lexicon{:}, annot}; % enhance annotation matrix db.annotsdb = [db.annotsdb, ... sparse(size(db.annotsdb,1), 1)]; end % create new position for clip if neccesary pos = owner_pos(db, ownerid); if isempty(pos) pos = numel(db.annots_oid) +1; % add to oid db.annots_oid = [db.annots_oid, ownerid]; % enhance annotation matrix db.annotsdb = [db.annotsdb; ... sparse(1, size(db.annotsdb, 2))]; end % save data to database db.annotsdb(pos, aid) = score; end end methods(Hidden) function pos = owner_pos(db, ownerid) % returns database position for owner id pos = find(db.annots_oid == ownerid); end end end