wolffd@0: % The thesaurus class is a basic component of all wolffd@0: % genre and tag information managing the whole Vocabulary. wolffd@0: wolffd@0: classdef AnnotDB < handle wolffd@0: wolffd@0: % public properties wolffd@0: properties (SetAccess = private) wolffd@0: lexicon = {}; wolffd@0: wolffd@0: end wolffd@0: wolffd@0: properties(Hidden, Access = private) wolffd@0: wolffd@0: annotsdb; % a numowners x numannots sparse binary / prob matrix wolffd@0: annots_oid; % ownerid to pos in annots conversion wolffd@0: wolffd@0: binary = 0; % indicator whether the db contains binary or scored annots wolffd@0: end wolffd@0: wolffd@0: methods wolffd@0: wolffd@0: % --- wolffd@0: % simple constructor wolffd@0: % --- wolffd@0: function db = AnnotDB(lexicon, annots, ids) wolffd@0: % db = AnnotDB(lexicon, annots, annotation_ids) wolffd@0: % lexicon: the list of all individual annotation elements or wolffd@0: % clips_by_annot or wolffd@0: % lexids_by_clip wolffd@0: wolffd@0: % annots: either a clips x numel(lexicon) binary matrix or wolffd@0: % ... wolffd@0: % wolffd@0: % annotation_ids: clip ids for the binary case, wolffd@0: wolffd@0: if nargin >= 1 wolffd@0: wolffd@0: % --- wolffd@0: % NOTE: two ways of supplying the annots are allowed: wolffd@0: % 1. clip ids for each lexical element wolffd@0: % 2. binary matrix wolffd@0: % --- wolffd@0: if ischar(lexicon) wolffd@0: wolffd@0: if strcmp(lexicon, 'clips_by_annot') wolffd@0: wolffd@0: % --- wolffd@0: % preset the lexicon and hash ids wolffd@0: % --- wolffd@0: db.lexicon = unique(lower(annots)); wolffd@0: wolffd@0: if iscell(ids) wolffd@0: db.annots_oid = unique([ids{:}]); wolffd@0: else wolffd@0: db.annots_oid = unique(ids); wolffd@0: end wolffd@0: wolffd@0: db.annotsdb = sparse(numel(db.annots_oid),... wolffd@0: numel( db.lexicon)); wolffd@0: wolffd@0: % for all annotations wolffd@0: for i = 1:numel(annots) wolffd@0: wolffd@0: % for all ids in set wolffd@0: % is this a cell or just a single index< wolffd@0: if iscell(ids) wolffd@0: for j = 1:numel(ids{i}) wolffd@0: wolffd@0: db.add_pair(ids{i}(j), annots{i}); wolffd@0: end wolffd@0: else wolffd@0: % single ndex case wolffd@0: db.add_pair(ids(i), annots{i}); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: elseif strcmp(lexicon, 'annots_by_clip') wolffd@0: wolffd@0: end wolffd@0: % this is the binary case wolffd@0: else wolffd@0: wolffd@0: db.lexicon = lexicon; wolffd@0: db.annotsdb = sparse(0,0); wolffd@0: if nargin >= 2 wolffd@0: wolffd@0: db.annotsdb = sparse(annots); wolffd@0: db.annots_oid = ids; wolffd@0: else wolffd@0: db.annotsdb = sparse(0, numel(db.lexicon)); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % retrieve annot-substructure for given clip ids, wolffd@0: % collecting std = [or = all] ,[and = common] wolffd@0: % annots for these wolffd@0: % --- wolffd@0: function new_db = subset(db, ownerids, mode) wolffd@0: % new_db = subset(db, ownerids, {'and', ['or']}) wolffd@0: wolffd@0: if nargin < 3 wolffd@0: mode = 'or'; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % create new DB wolffd@0: % we make sure the tag id index keeps wolffd@0: % the same for subsets by copying the whole wolffd@0: % lexicon wolffd@0: % --- wolffd@0: new_db = AnnotDB(db.lexicon); wolffd@0: wolffd@0: switch lower(mode) wolffd@0: case 'and' wolffd@0: wolffd@0: % --- wolffd@0: % TODO: implement this and wolffd@0: % improve speed below wolffd@0: % --- wolffd@0: case 'or' wolffd@0: wolffd@0: % successively fill with given annots wolffd@0: for i = 1:numel(ownerids) wolffd@0: wolffd@0: % --- wolffd@0: % we retrieve annots for each clip wolffd@0: % and add them to the new database wolffd@0: % --- wolffd@0: [annot, score] = annots(db, ownerids(i)); wolffd@0: for j = 1:numel(annot) wolffd@0: wolffd@0: new_db.add_pair(ownerids(i), annot{j}, score(j)); wolffd@0: end wolffd@0: end wolffd@0: otherwise wolffd@0: error 'illegal owner id combination mode. possibly forgot brackets'; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % retrieve annot-substructure for complement wolffd@0: % of given clip ids wolffd@0: function [new_db] = exclude(db, ownerids) wolffd@0: wolffd@0: % get complement of clip ids wolffd@0: ownerids = setdiff(db.annots_oid, ownerids); wolffd@0: wolffd@0: new_db = subset(db, ownerids); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % retrieve clip by annot. wolffd@0: % if multiple annots are given, the clips wolffd@0: % containing all of them (logical and) are wolffd@0: % returned wolffd@0: % --- wolffd@0: function oids = owner(db, annotstr, mode) wolffd@0: wolffd@0: if nargin < 3 wolffd@0: mode = 'and'; wolffd@0: end wolffd@0: wolffd@0: if ~iscell(annotstr) wolffd@0: annotstr = {annotstr}; wolffd@0: end wolffd@0: wolffd@0: annotid = []; wolffd@0: for i = 1:numel(annotstr) wolffd@0: wolffd@0: annotid = [annotid strcellfind(db.lexicon, annotstr{i})]; wolffd@0: end wolffd@0: wolffd@0: oids = owner_for_annotid(db, annotid, mode); wolffd@0: end wolffd@0: wolffd@0: % retrieve owner ids by clip wolffd@0: function ownerids = owner_for_annotid(db, annotid, mode) wolffd@0: % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'}) wolffd@0: wolffd@0: if isempty(annotid) wolffd@0: ownerids = []; wolffd@0: return wolffd@0: end wolffd@0: if nargin < 3 wolffd@0: mode = 'and'; wolffd@0: end wolffd@0: wolffd@0: switch lower(mode) wolffd@0: case 'or' wolffd@0: % search for all appearing owners wolffd@0: candidates = sum(db.annotsdb(:, annotid), 2) > 0; wolffd@0: wolffd@0: case 'and' wolffd@0: % search for the common owners wolffd@0: candidates = sum(db.annotsdb(:, annotid), 2) == ... wolffd@0: numel(annotid); wolffd@0: otherwise wolffd@0: error 'illegal tag combination mode'; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % get positions in database wolffd@0: pos = find(candidates); wolffd@0: wolffd@0: % return owner ids wolffd@0: ownerids = db.annots_oid(pos); wolffd@0: end wolffd@0: wolffd@0: % retrieve annotid by clip wolffd@0: function [aid, score] = annotids_for_owner(db, ownerid, mode) wolffd@0: wolffd@0: % single query case wolffd@0: if numel(ownerid) == 1 wolffd@0: wolffd@0: pos = owner_pos(db, ownerid); wolffd@0: wolffd@0: % get positions in database wolffd@0: aid = find(db.annotsdb(pos, :) > 0); wolffd@0: wolffd@0: score = db.annotsdb(pos, aid); wolffd@0: wolffd@0: wolffd@0: % sort ids for output wolffd@0: if ~db.binary wolffd@0: wolffd@0: [score, idx] = sort(score, 'descend'); wolffd@0: aid = aid(idx); wolffd@0: end wolffd@0: else wolffd@0: if nargin < 3 wolffd@0: mode = 'or'; wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % the query contained multiple ids wolffd@0: % wolffd@0: % we dont return the single results but wolffd@0: % the statistics for this subset of clips wolffd@0: % --- wolffd@0: new_db = db.subset(ownerid, mode); wolffd@0: [null, score, aid] = new_db.stats_count(); wolffd@0: wolffd@0: % cut off at score > 0 to abandon unused tags wolffd@0: u = find(score > 0,1,'last'); wolffd@0: score = score(1:u); wolffd@0: aid = aid(1:u); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % retrieve annotation by clip wolffd@0: function [out, score, aid] = annots(db, ownerid) wolffd@0: wolffd@0: [aid, score] = db.annotids_for_owner( ownerid); wolffd@0: wolffd@0: out = db.get_annot_name(aid); wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % retrieve annot name given a annot id wolffd@0: function out = get_annot_name(db, annotid) wolffd@0: wolffd@0: out = {}; wolffd@0: for i = 1:numel(annotid) wolffd@0: wolffd@0: out{i} = db.lexicon{annotid(i)}; wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % return annotation id for annotation string wolffd@0: function aid = get_annot_id(db, annotstr) wolffd@0: wolffd@0: if ~iscell(annotstr) wolffd@0: wolffd@0: % expensive search within annot list wolffd@0: aid = strcellfind(db.lexicon, annotstr); wolffd@0: else wolffd@0: wolffd@0: % search seperately for each annot wolffd@0: for i = 1:numel(annotstr) wolffd@0: aid(i) = strcellfind(db.lexicon, annotstr{i}); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % return statistics on saved annotations. wolffd@0: % = returns the sum of the scores and wolffd@0: % sortec lexicon wolffd@0: % --- wolffd@0: function [labels, score, annotids] = stats(db) wolffd@0: wolffd@0: % out = zeros(1, size(db.annotsdb,2)); wolffd@0: score = full(sum(db.annotsdb, 1)); wolffd@0: [score, annotids] = sort(score,'descend'); wolffd@0: wolffd@0: % prepare labels wolffd@0: labels = db.lexicon(annotids); wolffd@0: end wolffd@0: wolffd@0: % --- wolffd@0: % return statistics on saved annotations. wolffd@0: % = returns the number of annotations and wolffd@0: % sortec lexicon wolffd@0: % --- wolffd@0: function [labels, score, annotids] = stats_count(db) wolffd@0: wolffd@0: % out = zeros(1, size(db.annotsdb,2)); wolffd@0: score = full(sum(db.annotsdb > 0, 1)); wolffd@0: [score, annotids] = sort(score,'descend'); wolffd@0: wolffd@0: % prepare labels wolffd@0: labels = db.lexicon(annotids); wolffd@0: end wolffd@0: wolffd@0: % this is a stub for a tag cloud-like output wolffd@0: function [out] = annots_cloud(db, ownerid) wolffd@0: wolffd@0: % --- wolffd@0: % TODO: actually output tag-cloud wolffd@0: % this output is aimed at input into a web interface wolffd@0: % we successfully used http://www.wordle.net/ wolffd@0: % --- wolffd@0: wolffd@0: if nargin > 1 wolffd@0: db2 = db.subset(ownerid); wolffd@0: else wolffd@0: db2 = db; wolffd@0: end wolffd@0: wolffd@0: [labels, score, annotids] = stats(db2); wolffd@0: wolffd@0: % --- wolffd@0: % Note: for performance issues we compress this data wolffd@0: % to a maximum value of 1001 wolffd@0: % --- wolffd@0: score = ceil((score./max(score))*100); wolffd@0: wolffd@0: out = ''; wolffd@0: for i = 1:numel(annotids) wolffd@0: wolffd@0: % repeat the tag according to score wolffd@0: annot = strrep(labels{i},' ','-'); wolffd@0: for j = 1:score(i) wolffd@0: out = sprintf('%s; %s',annot, out); wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: function out = size(db) wolffd@0: % returns the size of this db wolffd@0: wolffd@0: out = numel(db.lexicon); wolffd@0: end wolffd@0: wolffd@0: function add_pair(db, ownerid, annot, score) wolffd@0: % add_pair(db, owner, annot) adds an annot and owner and can wolffd@0: % increase the lexicon size wolffd@0: wolffd@0: if nargin < 4 wolffd@0: score = 1; wolffd@0: end wolffd@0: wolffd@0: aid = strcellfind(db.lexicon, annot); wolffd@0: wolffd@0: % create new position for annotation if neccesary wolffd@0: if isempty(aid) wolffd@0: wolffd@0: aid = numel(db.lexicon) + 1; wolffd@0: wolffd@0: % add to lexicon wolffd@0: db.lexicon = {db.lexicon{:}, annot}; wolffd@0: wolffd@0: % enhance annotation matrix wolffd@0: db.annotsdb = [db.annotsdb, ... wolffd@0: sparse(size(db.annotsdb,1), 1)]; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: % create new position for clip if neccesary wolffd@0: pos = owner_pos(db, ownerid); wolffd@0: if isempty(pos) wolffd@0: wolffd@0: pos = numel(db.annots_oid) +1; wolffd@0: wolffd@0: % add to oid wolffd@0: db.annots_oid = [db.annots_oid, ownerid]; wolffd@0: wolffd@0: % enhance annotation matrix wolffd@0: db.annotsdb = [db.annotsdb; ... wolffd@0: sparse(1, size(db.annotsdb, 2))]; wolffd@0: end wolffd@0: wolffd@0: % save data to database wolffd@0: db.annotsdb(pos, aid) = score; wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: wolffd@0: wolffd@0: methods(Hidden) wolffd@0: wolffd@0: function pos = owner_pos(db, ownerid) wolffd@0: wolffd@0: % returns database position for owner id wolffd@0: pos = find(db.annots_oid == ownerid); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: end