Mercurial > hg > camir-aes2014
diff core/magnatagatune/AnnotDB.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/AnnotDB.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,412 @@ +% The thesaurus class is a basic component of all +% genre and tag information managing the whole Vocabulary. + +classdef AnnotDB < handle + + % public properties + properties (SetAccess = private) + lexicon = {}; + + end + + properties(Hidden, Access = private) + + annotsdb; % a numowners x numannots sparse binary / prob matrix + annots_oid; % ownerid to pos in annots conversion + + binary = 0; % indicator whether the db contains binary or scored annots + end + + methods + + % --- + % simple constructor + % --- + function db = AnnotDB(lexicon, annots, ids) + % db = AnnotDB(lexicon, annots, annotation_ids) + % lexicon: the list of all individual annotation elements or + % clips_by_annot or + % lexids_by_clip + + % annots: either a clips x numel(lexicon) binary matrix or + % ... + % + % annotation_ids: clip ids for the binary case, + + if nargin >= 1 + + % --- + % NOTE: two ways of supplying the annots are allowed: + % 1. clip ids for each lexical element + % 2. binary matrix + % --- + if ischar(lexicon) + + if strcmp(lexicon, 'clips_by_annot') + + % --- + % preset the lexicon and hash ids + % --- + db.lexicon = unique(lower(annots)); + + if iscell(ids) + db.annots_oid = unique([ids{:}]); + else + db.annots_oid = unique(ids); + end + + db.annotsdb = sparse(numel(db.annots_oid),... + numel( db.lexicon)); + +% for all annotations + for i = 1:numel(annots) + +% for all ids in set + % is this a cell or just a single index< + if iscell(ids) + for j = 1:numel(ids{i}) + + db.add_pair(ids{i}(j), annots{i}); + end + else + % single ndex case + db.add_pair(ids(i), annots{i}); + end + end + + elseif strcmp(lexicon, 'annots_by_clip') + + end + % this is the binary case + else + + db.lexicon = lexicon; + db.annotsdb = sparse(0,0); + if nargin >= 2 + + db.annotsdb = sparse(annots); + db.annots_oid = ids; + else + db.annotsdb = sparse(0, numel(db.lexicon)); + end + end + end + end + + % --- + % retrieve annot-substructure for given clip ids, + % collecting std = [or = all] ,[and = common] + % annots for these + % --- + function new_db = subset(db, ownerids, mode) + % new_db = subset(db, ownerids, {'and', ['or']}) + + if nargin < 3 + mode = 'or'; + end + + % --- + % create new DB + % we make sure the tag id index keeps + % the same for subsets by copying the whole + % lexicon + % --- + new_db = AnnotDB(db.lexicon); + + switch lower(mode) + case 'and' + + % --- + % TODO: implement this and + % improve speed below + % --- + case 'or' + + % successively fill with given annots + for i = 1:numel(ownerids) + + % --- + % we retrieve annots for each clip + % and add them to the new database + % --- + [annot, score] = annots(db, ownerids(i)); + for j = 1:numel(annot) + + new_db.add_pair(ownerids(i), annot{j}, score(j)); + end + end + otherwise + error 'illegal owner id combination mode. possibly forgot brackets'; + end + end + + % retrieve annot-substructure for complement + % of given clip ids + function [new_db] = exclude(db, ownerids) + + % get complement of clip ids + ownerids = setdiff(db.annots_oid, ownerids); + + new_db = subset(db, ownerids); + end + + % --- + % retrieve clip by annot. + % if multiple annots are given, the clips + % containing all of them (logical and) are + % returned + % --- + function oids = owner(db, annotstr, mode) + + if nargin < 3 + mode = 'and'; + end + + if ~iscell(annotstr) + annotstr = {annotstr}; + end + + annotid = []; + for i = 1:numel(annotstr) + + annotid = [annotid strcellfind(db.lexicon, annotstr{i})]; + end + + oids = owner_for_annotid(db, annotid, mode); + end + + % retrieve owner ids by clip + function ownerids = owner_for_annotid(db, annotid, mode) + % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'}) + + if isempty(annotid) + ownerids = []; + return + end + if nargin < 3 + mode = 'and'; + end + + switch lower(mode) + case 'or' + % search for all appearing owners + candidates = sum(db.annotsdb(:, annotid), 2) > 0; + + case 'and' + % search for the common owners + candidates = sum(db.annotsdb(:, annotid), 2) == ... + numel(annotid); + otherwise + error 'illegal tag combination mode'; + end + + + % get positions in database + pos = find(candidates); + + % return owner ids + ownerids = db.annots_oid(pos); + end + + % retrieve annotid by clip + function [aid, score] = annotids_for_owner(db, ownerid, mode) + + % single query case + if numel(ownerid) == 1 + + pos = owner_pos(db, ownerid); + + % get positions in database + aid = find(db.annotsdb(pos, :) > 0); + + score = db.annotsdb(pos, aid); + + + % sort ids for output + if ~db.binary + + [score, idx] = sort(score, 'descend'); + aid = aid(idx); + end + else + if nargin < 3 + mode = 'or'; + end + + % --- + % the query contained multiple ids + % + % we dont return the single results but + % the statistics for this subset of clips + % --- + new_db = db.subset(ownerid, mode); + [null, score, aid] = new_db.stats_count(); + + % cut off at score > 0 to abandon unused tags + u = find(score > 0,1,'last'); + score = score(1:u); + aid = aid(1:u); + end + end + + % retrieve annotation by clip + function [out, score, aid] = annots(db, ownerid) + + [aid, score] = db.annotids_for_owner( ownerid); + + out = db.get_annot_name(aid); + end + + + % retrieve annot name given a annot id + function out = get_annot_name(db, annotid) + + out = {}; + for i = 1:numel(annotid) + + out{i} = db.lexicon{annotid(i)}; + end + end + + % return annotation id for annotation string + function aid = get_annot_id(db, annotstr) + + if ~iscell(annotstr) + + % expensive search within annot list + aid = strcellfind(db.lexicon, annotstr); + else + + % search seperately for each annot + for i = 1:numel(annotstr) + aid(i) = strcellfind(db.lexicon, annotstr{i}); + end + end + end + + % --- + % return statistics on saved annotations. + % = returns the sum of the scores and + % sortec lexicon + % --- + function [labels, score, annotids] = stats(db) + + % out = zeros(1, size(db.annotsdb,2)); + score = full(sum(db.annotsdb, 1)); + [score, annotids] = sort(score,'descend'); + + % prepare labels + labels = db.lexicon(annotids); + end + + % --- + % return statistics on saved annotations. + % = returns the number of annotations and + % sortec lexicon + % --- + function [labels, score, annotids] = stats_count(db) + + % out = zeros(1, size(db.annotsdb,2)); + score = full(sum(db.annotsdb > 0, 1)); + [score, annotids] = sort(score,'descend'); + + % prepare labels + labels = db.lexicon(annotids); + end + + % this is a stub for a tag cloud-like output + function [out] = annots_cloud(db, ownerid) + + % --- + % TODO: actually output tag-cloud + % this output is aimed at input into a web interface + % we successfully used http://www.wordle.net/ + % --- + + if nargin > 1 + db2 = db.subset(ownerid); + else + db2 = db; + end + + [labels, score, annotids] = stats(db2); + + % --- + % Note: for performance issues we compress this data + % to a maximum value of 1001 + % --- + score = ceil((score./max(score))*100); + + out = ''; + for i = 1:numel(annotids) + + % repeat the tag according to score + annot = strrep(labels{i},' ','-'); + for j = 1:score(i) + out = sprintf('%s; %s',annot, out); + end + end + end + + + function out = size(db) + % returns the size of this db + + out = numel(db.lexicon); + end + + function add_pair(db, ownerid, annot, score) +% add_pair(db, owner, annot) adds an annot and owner and can +% increase the lexicon size + + if nargin < 4 + score = 1; + end + + aid = strcellfind(db.lexicon, annot); + + % create new position for annotation if neccesary + if isempty(aid) + + aid = numel(db.lexicon) + 1; + + % add to lexicon + db.lexicon = {db.lexicon{:}, annot}; + + % enhance annotation matrix + db.annotsdb = [db.annotsdb, ... + sparse(size(db.annotsdb,1), 1)]; + end + + + % create new position for clip if neccesary + pos = owner_pos(db, ownerid); + if isempty(pos) + + pos = numel(db.annots_oid) +1; + + % add to oid + db.annots_oid = [db.annots_oid, ownerid]; + + % enhance annotation matrix + db.annotsdb = [db.annotsdb; ... + sparse(1, size(db.annotsdb, 2))]; + end + + % save data to database + db.annotsdb(pos, aid) = score; + end + + end + + + methods(Hidden) + + function pos = owner_pos(db, ownerid) + + % returns database position for owner id + pos = find(db.annots_oid == ownerid); + end + end + +end \ No newline at end of file