diff core/magnatagatune/AnnotDB.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/magnatagatune/AnnotDB.m	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,412 @@
+% The thesaurus class is a basic component of all 
+% genre and tag information managing the whole Vocabulary.
+
+classdef AnnotDB < handle
+    
+   % public properties
+   properties (SetAccess = private)
+       lexicon = {};
+      
+   end
+   
+   properties(Hidden, Access = private)
+
+       annotsdb; % a numowners x numannots sparse binary / prob matrix
+       annots_oid; % ownerid to pos in annots conversion
+       
+       binary = 0; % indicator whether the db contains binary or scored annots
+   end
+   
+   methods
+       
+       % ---
+       % simple constructor
+       % ---
+       function db = AnnotDB(lexicon, annots, ids)
+           % db = AnnotDB(lexicon, annots, annotation_ids)
+           % lexicon: the list of all individual annotation elements or
+           %          clips_by_annot or
+           %          lexids_by_clip 
+           
+           % annots: either a clips x numel(lexicon) binary matrix  or 
+           %            ...
+           %
+           % annotation_ids: clip ids for the binary case, 
+           
+           if nargin >= 1
+
+               % ---
+               % NOTE: two ways of supplying the annots are allowed:
+               % 1. clip ids for each lexical element
+               % 2. binary matrix
+               % ---
+               if ischar(lexicon)
+                   
+                   if strcmp(lexicon, 'clips_by_annot')
+                       
+                       % ---
+                       %  preset the lexicon and hash ids
+                       % ---
+                       db.lexicon = unique(lower(annots));
+                       
+                       if iscell(ids) 
+                           db.annots_oid = unique([ids{:}]);
+                       else
+                           db.annots_oid = unique(ids);
+                       end
+                       
+                       db.annotsdb = sparse(numel(db.annots_oid),...
+                           numel( db.lexicon));
+                       
+%                      for all annotations
+                       for i = 1:numel(annots)
+                           
+%                          for all ids in set
+                           % is this a cell or just a single index<
+                           if iscell(ids)
+                               for j = 1:numel(ids{i})
+
+                                   db.add_pair(ids{i}(j), annots{i});
+                               end
+                           else
+                               % single ndex case
+                               db.add_pair(ids(i), annots{i});
+                           end
+                       end
+
+                   elseif strcmp(lexicon, 'annots_by_clip')
+                       
+                   end
+                       % this is the binary case
+               else
+
+                   db.lexicon = lexicon;
+                   db.annotsdb = sparse(0,0);
+                   if nargin >= 2
+
+                       db.annotsdb = sparse(annots);
+                       db.annots_oid = ids;
+                   else
+                       db.annotsdb = sparse(0, numel(db.lexicon));
+                   end
+               end
+           end
+       end
+       
+       % ---
+       % retrieve annot-substructure for given clip ids, 
+       % collecting std = [or = all] ,[and = common]
+       % annots for these
+       % ---
+       function new_db = subset(db, ownerids, mode)   
+       % new_db = subset(db, ownerids, {'and', ['or']}) 
+       
+           if nargin < 3 
+               mode = 'or';
+           end
+       
+           % ---
+           % create new DB
+           % we make sure the tag id index keeps 
+           % the same for subsets by copying the whole 
+           % lexicon
+           % ---
+           new_db = AnnotDB(db.lexicon);
+           
+           switch lower(mode)
+               case 'and'
+
+               % ---
+               % TODO: implement this and 
+               % improve speed below
+               % ---
+               case 'or'
+                   
+                   % successively fill with given annots
+                   for i = 1:numel(ownerids)
+
+                       % ---
+                       % we retrieve annots for each clip
+                       % and add them to the new database
+                       % ---
+                       [annot, score] = annots(db, ownerids(i));
+                       for j = 1:numel(annot)
+
+                           new_db.add_pair(ownerids(i), annot{j}, score(j));
+                       end   
+                   end
+              otherwise
+                   error 'illegal owner id combination mode. possibly forgot brackets';
+           end       
+       end
+       
+        % retrieve annot-substructure for complement
+        % of given clip ids
+       function [new_db] = exclude(db, ownerids)
+           
+           % get complement of clip ids
+           ownerids = setdiff(db.annots_oid, ownerids);
+           
+           new_db = subset(db, ownerids);
+       end
+       
+       % ---
+       % retrieve clip by annot.
+       % if multiple annots are given, the clips 
+       % containing all of them (logical and) are 
+       % returned
+       % ---
+       function oids = owner(db, annotstr, mode)       
+           
+           if nargin < 3 
+               mode = 'and';
+           end
+           
+           if ~iscell(annotstr)
+               annotstr = {annotstr};
+           end
+           
+           annotid = [];
+           for i = 1:numel(annotstr)
+               
+                annotid = [annotid strcellfind(db.lexicon, annotstr{i})];
+           end
+           
+           oids = owner_for_annotid(db, annotid, mode);
+       end
+       
+      % retrieve owner ids by clip
+      function ownerids = owner_for_annotid(db, annotid, mode)
+      % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'})     
+          
+           if isempty(annotid)
+               ownerids = [];
+               return
+           end
+           if nargin < 3 
+               mode = 'and';
+           end
+ 
+           switch lower(mode)
+               case 'or'
+                   % search for all appearing owners
+                   candidates = sum(db.annotsdb(:, annotid), 2) > 0;
+                   
+               case 'and'
+                   % search for the common owners
+                   candidates = sum(db.annotsdb(:, annotid), 2) == ...
+                       numel(annotid);
+               otherwise
+                   error 'illegal tag combination mode';
+           end
+
+           
+           % get positions in database
+           pos = find(candidates);
+           
+           % return owner ids
+           ownerids = db.annots_oid(pos);
+       end
+       
+       % retrieve annotid by clip
+       function [aid, score] = annotids_for_owner(db, ownerid, mode)
+           
+           % single query case
+           if numel(ownerid) == 1
+               
+               pos = owner_pos(db, ownerid);
+
+               % get positions in database
+               aid = find(db.annotsdb(pos, :) > 0);
+
+               score = db.annotsdb(pos, aid);
+
+
+              % sort ids for output
+               if ~db.binary
+
+                  [score, idx] = sort(score, 'descend');
+                  aid = aid(idx);
+               end
+           else
+               if nargin < 3 
+                   mode = 'or';
+               end
+               
+               % ---
+               % the query contained multiple ids
+               %
+               % we dont return the single results but 
+               % the statistics for this subset of clips
+               % ---
+               new_db = db.subset(ownerid, mode);
+               [null, score, aid] = new_db.stats_count();
+               
+               % cut off at score > 0 to abandon unused tags
+               u = find(score > 0,1,'last');
+               score = score(1:u);
+               aid = aid(1:u);
+           end
+       end
+        
+      % retrieve annotation by clip
+       function [out, score, aid] = annots(db, ownerid)
+           
+           [aid, score] = db.annotids_for_owner( ownerid);
+           
+           out = db.get_annot_name(aid);
+       end
+       
+      
+       % retrieve annot name given a annot id
+       function out = get_annot_name(db, annotid)
+           
+           out = {};
+           for i = 1:numel(annotid)
+               
+               out{i} = db.lexicon{annotid(i)};
+           end
+       end
+       
+       % return annotation id for annotation string
+       function aid = get_annot_id(db, annotstr)
+           
+           if ~iscell(annotstr)
+               
+               % expensive search within annot list
+               aid = strcellfind(db.lexicon, annotstr);
+           else
+               
+               % search seperately for each annot 
+               for i = 1:numel(annotstr)
+                   aid(i) = strcellfind(db.lexicon, annotstr{i});
+               end
+           end
+       end 
+       
+       % ---
+       % return statistics on saved annotations.
+       % = returns the sum of the scores and 
+       % sortec lexicon
+       % ---
+       function [labels, score, annotids] = stats(db)
+           
+           % out = zeros(1, size(db.annotsdb,2));
+           score = full(sum(db.annotsdb, 1));
+           [score, annotids] = sort(score,'descend');
+           
+           % prepare labels
+           labels = db.lexicon(annotids);
+       end
+       
+       % ---
+       % return statistics on saved annotations.
+       % = returns the number of annotations and 
+       % sortec lexicon
+       % ---
+       function [labels, score, annotids] = stats_count(db)
+           
+           % out = zeros(1, size(db.annotsdb,2));
+           score = full(sum(db.annotsdb > 0, 1));
+           [score, annotids] = sort(score,'descend');
+           
+           % prepare labels
+           labels = db.lexicon(annotids);
+       end
+       
+        % this is a stub for a tag cloud-like output
+       function [out] = annots_cloud(db, ownerid)
+           
+       % ---
+       % TODO: actually output tag-cloud
+       % this output is aimed at input into a web interface
+       % we successfully used http://www.wordle.net/
+       % ---
+       
+       if nargin > 1
+           db2 = db.subset(ownerid);
+       else
+           db2 = db;
+       end
+
+           [labels, score, annotids] = stats(db2);
+           
+           % ---
+           % Note: for performance issues we compress this data
+           % to a maximum value of 1001
+           % ---
+           score = ceil((score./max(score))*100);
+           
+           out = '';
+           for i = 1:numel(annotids)
+               
+               % repeat the tag according to score
+               annot = strrep(labels{i},' ','-');
+               for j = 1:score(i)
+                out = sprintf('%s; %s',annot, out);
+               end
+           end
+       end
+       
+       
+       function out = size(db)
+           % returns the size of this db
+           
+           out = numel(db.lexicon);
+       end
+
+       function add_pair(db, ownerid, annot, score)
+%            add_pair(db, owner, annot) adds an annot and owner and can
+%            increase the lexicon size
+
+            if nargin < 4 
+                score = 1;
+            end
+            
+            aid = strcellfind(db.lexicon, annot);     
+            
+            % create new position for annotation if neccesary
+            if isempty(aid) 
+                
+                aid = numel(db.lexicon) + 1;
+                
+                % add to lexicon
+                db.lexicon = {db.lexicon{:}, annot};
+                
+                % enhance annotation matrix
+                db.annotsdb = [db.annotsdb, ...
+                    sparse(size(db.annotsdb,1), 1)];
+            end
+            
+            
+            % create new position for clip if neccesary
+            pos = owner_pos(db, ownerid);
+            if isempty(pos) 
+                
+                pos = numel(db.annots_oid) +1;
+                
+                % add to oid
+                db.annots_oid = [db.annots_oid, ownerid];
+                
+                % enhance annotation matrix
+                db.annotsdb = [db.annotsdb; ...
+                    sparse(1, size(db.annotsdb, 2))];
+            end
+            
+            % save data to database
+            db.annotsdb(pos, aid) = score;
+       end 
+       
+   end
+   
+   
+   methods(Hidden)
+       
+       function pos = owner_pos(db, ownerid)
+
+        % returns database position for owner id 
+            pos = find(db.annots_oid == ownerid);
+       end
+   end
+   
+end
\ No newline at end of file