wolffd@0: % The thesaurus class is a basic component of all 
wolffd@0: % genre and tag information managing the whole Vocabulary.
wolffd@0: 
wolffd@0: classdef AnnotDB < handle
wolffd@0:     
wolffd@0:    % public properties
wolffd@0:    properties (SetAccess = private)
wolffd@0:        lexicon = {};
wolffd@0:       
wolffd@0:    end
wolffd@0:    
wolffd@0:    properties(Hidden, Access = private)
wolffd@0: 
wolffd@0:        annotsdb; % a numowners x numannots sparse binary / prob matrix
wolffd@0:        annots_oid; % ownerid to pos in annots conversion
wolffd@0:        
wolffd@0:        binary = 0; % indicator whether the db contains binary or scored annots
wolffd@0:    end
wolffd@0:    
wolffd@0:    methods
wolffd@0:        
wolffd@0:        % ---
wolffd@0:        % simple constructor
wolffd@0:        % ---
wolffd@0:        function db = AnnotDB(lexicon, annots, ids)
wolffd@0:            % db = AnnotDB(lexicon, annots, annotation_ids)
wolffd@0:            % lexicon: the list of all individual annotation elements or
wolffd@0:            %          clips_by_annot or
wolffd@0:            %          lexids_by_clip 
wolffd@0:            
wolffd@0:            % annots: either a clips x numel(lexicon) binary matrix  or 
wolffd@0:            %            ...
wolffd@0:            %
wolffd@0:            % annotation_ids: clip ids for the binary case, 
wolffd@0:            
wolffd@0:            if nargin >= 1
wolffd@0: 
wolffd@0:                % ---
wolffd@0:                % NOTE: two ways of supplying the annots are allowed:
wolffd@0:                % 1. clip ids for each lexical element
wolffd@0:                % 2. binary matrix
wolffd@0:                % ---
wolffd@0:                if ischar(lexicon)
wolffd@0:                    
wolffd@0:                    if strcmp(lexicon, 'clips_by_annot')
wolffd@0:                        
wolffd@0:                        % ---
wolffd@0:                        %  preset the lexicon and hash ids
wolffd@0:                        % ---
wolffd@0:                        db.lexicon = unique(lower(annots));
wolffd@0:                        
wolffd@0:                        if iscell(ids) 
wolffd@0:                            db.annots_oid = unique([ids{:}]);
wolffd@0:                        else
wolffd@0:                            db.annots_oid = unique(ids);
wolffd@0:                        end
wolffd@0:                        
wolffd@0:                        db.annotsdb = sparse(numel(db.annots_oid),...
wolffd@0:                            numel( db.lexicon));
wolffd@0:                        
wolffd@0: %                      for all annotations
wolffd@0:                        for i = 1:numel(annots)
wolffd@0:                            
wolffd@0: %                          for all ids in set
wolffd@0:                            % is this a cell or just a single index<
wolffd@0:                            if iscell(ids)
wolffd@0:                                for j = 1:numel(ids{i})
wolffd@0: 
wolffd@0:                                    db.add_pair(ids{i}(j), annots{i});
wolffd@0:                                end
wolffd@0:                            else
wolffd@0:                                % single ndex case
wolffd@0:                                db.add_pair(ids(i), annots{i});
wolffd@0:                            end
wolffd@0:                        end
wolffd@0: 
wolffd@0:                    elseif strcmp(lexicon, 'annots_by_clip')
wolffd@0:                        
wolffd@0:                    end
wolffd@0:                        % this is the binary case
wolffd@0:                else
wolffd@0: 
wolffd@0:                    db.lexicon = lexicon;
wolffd@0:                    db.annotsdb = sparse(0,0);
wolffd@0:                    if nargin >= 2
wolffd@0: 
wolffd@0:                        db.annotsdb = sparse(annots);
wolffd@0:                        db.annots_oid = ids;
wolffd@0:                    else
wolffd@0:                        db.annotsdb = sparse(0, numel(db.lexicon));
wolffd@0:                    end
wolffd@0:                end
wolffd@0:            end
wolffd@0:        end
wolffd@0:        
wolffd@0:        % ---
wolffd@0:        % retrieve annot-substructure for given clip ids, 
wolffd@0:        % collecting std = [or = all] ,[and = common]
wolffd@0:        % annots for these
wolffd@0:        % ---
wolffd@0:        function new_db = subset(db, ownerids, mode)   
wolffd@0:        % new_db = subset(db, ownerids, {'and', ['or']}) 
wolffd@0:        
wolffd@0:            if nargin < 3 
wolffd@0:                mode = 'or';
wolffd@0:            end
wolffd@0:        
wolffd@0:            % ---
wolffd@0:            % create new DB
wolffd@0:            % we make sure the tag id index keeps 
wolffd@0:            % the same for subsets by copying the whole 
wolffd@0:            % lexicon
wolffd@0:            % ---
wolffd@0:            new_db = AnnotDB(db.lexicon);
wolffd@0:            
wolffd@0:            switch lower(mode)
wolffd@0:                case 'and'
wolffd@0: 
wolffd@0:                % ---
wolffd@0:                % TODO: implement this and 
wolffd@0:                % improve speed below
wolffd@0:                % ---
wolffd@0:                case 'or'
wolffd@0:                    
wolffd@0:                    % successively fill with given annots
wolffd@0:                    for i = 1:numel(ownerids)
wolffd@0: 
wolffd@0:                        % ---
wolffd@0:                        % we retrieve annots for each clip
wolffd@0:                        % and add them to the new database
wolffd@0:                        % ---
wolffd@0:                        [annot, score] = annots(db, ownerids(i));
wolffd@0:                        for j = 1:numel(annot)
wolffd@0: 
wolffd@0:                            new_db.add_pair(ownerids(i), annot{j}, score(j));
wolffd@0:                        end   
wolffd@0:                    end
wolffd@0:               otherwise
wolffd@0:                    error 'illegal owner id combination mode. possibly forgot brackets';
wolffd@0:            end       
wolffd@0:        end
wolffd@0:        
wolffd@0:         % retrieve annot-substructure for complement
wolffd@0:         % of given clip ids
wolffd@0:        function [new_db] = exclude(db, ownerids)
wolffd@0:            
wolffd@0:            % get complement of clip ids
wolffd@0:            ownerids = setdiff(db.annots_oid, ownerids);
wolffd@0:            
wolffd@0:            new_db = subset(db, ownerids);
wolffd@0:        end
wolffd@0:        
wolffd@0:        % ---
wolffd@0:        % retrieve clip by annot.
wolffd@0:        % if multiple annots are given, the clips 
wolffd@0:        % containing all of them (logical and) are 
wolffd@0:        % returned
wolffd@0:        % ---
wolffd@0:        function oids = owner(db, annotstr, mode)       
wolffd@0:            
wolffd@0:            if nargin < 3 
wolffd@0:                mode = 'and';
wolffd@0:            end
wolffd@0:            
wolffd@0:            if ~iscell(annotstr)
wolffd@0:                annotstr = {annotstr};
wolffd@0:            end
wolffd@0:            
wolffd@0:            annotid = [];
wolffd@0:            for i = 1:numel(annotstr)
wolffd@0:                
wolffd@0:                 annotid = [annotid strcellfind(db.lexicon, annotstr{i})];
wolffd@0:            end
wolffd@0:            
wolffd@0:            oids = owner_for_annotid(db, annotid, mode);
wolffd@0:        end
wolffd@0:        
wolffd@0:       % retrieve owner ids by clip
wolffd@0:       function ownerids = owner_for_annotid(db, annotid, mode)
wolffd@0:       % ownerids = ownerids_for_annotid(db, annotid, {['and'], 'or'})     
wolffd@0:           
wolffd@0:            if isempty(annotid)
wolffd@0:                ownerids = [];
wolffd@0:                return
wolffd@0:            end
wolffd@0:            if nargin < 3 
wolffd@0:                mode = 'and';
wolffd@0:            end
wolffd@0:  
wolffd@0:            switch lower(mode)
wolffd@0:                case 'or'
wolffd@0:                    % search for all appearing owners
wolffd@0:                    candidates = sum(db.annotsdb(:, annotid), 2) > 0;
wolffd@0:                    
wolffd@0:                case 'and'
wolffd@0:                    % search for the common owners
wolffd@0:                    candidates = sum(db.annotsdb(:, annotid), 2) == ...
wolffd@0:                        numel(annotid);
wolffd@0:                otherwise
wolffd@0:                    error 'illegal tag combination mode';
wolffd@0:            end
wolffd@0: 
wolffd@0:            
wolffd@0:            % get positions in database
wolffd@0:            pos = find(candidates);
wolffd@0:            
wolffd@0:            % return owner ids
wolffd@0:            ownerids = db.annots_oid(pos);
wolffd@0:        end
wolffd@0:        
wolffd@0:        % retrieve annotid by clip
wolffd@0:        function [aid, score] = annotids_for_owner(db, ownerid, mode)
wolffd@0:            
wolffd@0:            % single query case
wolffd@0:            if numel(ownerid) == 1
wolffd@0:                
wolffd@0:                pos = owner_pos(db, ownerid);
wolffd@0: 
wolffd@0:                % get positions in database
wolffd@0:                aid = find(db.annotsdb(pos, :) > 0);
wolffd@0: 
wolffd@0:                score = db.annotsdb(pos, aid);
wolffd@0: 
wolffd@0: 
wolffd@0:               % sort ids for output
wolffd@0:                if ~db.binary
wolffd@0: 
wolffd@0:                   [score, idx] = sort(score, 'descend');
wolffd@0:                   aid = aid(idx);
wolffd@0:                end
wolffd@0:            else
wolffd@0:                if nargin < 3 
wolffd@0:                    mode = 'or';
wolffd@0:                end
wolffd@0:                
wolffd@0:                % ---
wolffd@0:                % the query contained multiple ids
wolffd@0:                %
wolffd@0:                % we dont return the single results but 
wolffd@0:                % the statistics for this subset of clips
wolffd@0:                % ---
wolffd@0:                new_db = db.subset(ownerid, mode);
wolffd@0:                [null, score, aid] = new_db.stats_count();
wolffd@0:                
wolffd@0:                % cut off at score > 0 to abandon unused tags
wolffd@0:                u = find(score > 0,1,'last');
wolffd@0:                score = score(1:u);
wolffd@0:                aid = aid(1:u);
wolffd@0:            end
wolffd@0:        end
wolffd@0:         
wolffd@0:       % retrieve annotation by clip
wolffd@0:        function [out, score, aid] = annots(db, ownerid)
wolffd@0:            
wolffd@0:            [aid, score] = db.annotids_for_owner( ownerid);
wolffd@0:            
wolffd@0:            out = db.get_annot_name(aid);
wolffd@0:        end
wolffd@0:        
wolffd@0:       
wolffd@0:        % retrieve annot name given a annot id
wolffd@0:        function out = get_annot_name(db, annotid)
wolffd@0:            
wolffd@0:            out = {};
wolffd@0:            for i = 1:numel(annotid)
wolffd@0:                
wolffd@0:                out{i} = db.lexicon{annotid(i)};
wolffd@0:            end
wolffd@0:        end
wolffd@0:        
wolffd@0:        % return annotation id for annotation string
wolffd@0:        function aid = get_annot_id(db, annotstr)
wolffd@0:            
wolffd@0:            if ~iscell(annotstr)
wolffd@0:                
wolffd@0:                % expensive search within annot list
wolffd@0:                aid = strcellfind(db.lexicon, annotstr);
wolffd@0:            else
wolffd@0:                
wolffd@0:                % search seperately for each annot 
wolffd@0:                for i = 1:numel(annotstr)
wolffd@0:                    aid(i) = strcellfind(db.lexicon, annotstr{i});
wolffd@0:                end
wolffd@0:            end
wolffd@0:        end 
wolffd@0:        
wolffd@0:        % ---
wolffd@0:        % return statistics on saved annotations.
wolffd@0:        % = returns the sum of the scores and 
wolffd@0:        % sortec lexicon
wolffd@0:        % ---
wolffd@0:        function [labels, score, annotids] = stats(db)
wolffd@0:            
wolffd@0:            % out = zeros(1, size(db.annotsdb,2));
wolffd@0:            score = full(sum(db.annotsdb, 1));
wolffd@0:            [score, annotids] = sort(score,'descend');
wolffd@0:            
wolffd@0:            % prepare labels
wolffd@0:            labels = db.lexicon(annotids);
wolffd@0:        end
wolffd@0:        
wolffd@0:        % ---
wolffd@0:        % return statistics on saved annotations.
wolffd@0:        % = returns the number of annotations and 
wolffd@0:        % sortec lexicon
wolffd@0:        % ---
wolffd@0:        function [labels, score, annotids] = stats_count(db)
wolffd@0:            
wolffd@0:            % out = zeros(1, size(db.annotsdb,2));
wolffd@0:            score = full(sum(db.annotsdb > 0, 1));
wolffd@0:            [score, annotids] = sort(score,'descend');
wolffd@0:            
wolffd@0:            % prepare labels
wolffd@0:            labels = db.lexicon(annotids);
wolffd@0:        end
wolffd@0:        
wolffd@0:         % this is a stub for a tag cloud-like output
wolffd@0:        function [out] = annots_cloud(db, ownerid)
wolffd@0:            
wolffd@0:        % ---
wolffd@0:        % TODO: actually output tag-cloud
wolffd@0:        % this output is aimed at input into a web interface
wolffd@0:        % we successfully used http://www.wordle.net/
wolffd@0:        % ---
wolffd@0:        
wolffd@0:        if nargin > 1
wolffd@0:            db2 = db.subset(ownerid);
wolffd@0:        else
wolffd@0:            db2 = db;
wolffd@0:        end
wolffd@0: 
wolffd@0:            [labels, score, annotids] = stats(db2);
wolffd@0:            
wolffd@0:            % ---
wolffd@0:            % Note: for performance issues we compress this data
wolffd@0:            % to a maximum value of 1001
wolffd@0:            % ---
wolffd@0:            score = ceil((score./max(score))*100);
wolffd@0:            
wolffd@0:            out = '';
wolffd@0:            for i = 1:numel(annotids)
wolffd@0:                
wolffd@0:                % repeat the tag according to score
wolffd@0:                annot = strrep(labels{i},' ','-');
wolffd@0:                for j = 1:score(i)
wolffd@0:                 out = sprintf('%s; %s',annot, out);
wolffd@0:                end
wolffd@0:            end
wolffd@0:        end
wolffd@0:        
wolffd@0:        
wolffd@0:        function out = size(db)
wolffd@0:            % returns the size of this db
wolffd@0:            
wolffd@0:            out = numel(db.lexicon);
wolffd@0:        end
wolffd@0: 
wolffd@0:        function add_pair(db, ownerid, annot, score)
wolffd@0: %            add_pair(db, owner, annot) adds an annot and owner and can
wolffd@0: %            increase the lexicon size
wolffd@0: 
wolffd@0:             if nargin < 4 
wolffd@0:                 score = 1;
wolffd@0:             end
wolffd@0:             
wolffd@0:             aid = strcellfind(db.lexicon, annot);     
wolffd@0:             
wolffd@0:             % create new position for annotation if neccesary
wolffd@0:             if isempty(aid) 
wolffd@0:                 
wolffd@0:                 aid = numel(db.lexicon) + 1;
wolffd@0:                 
wolffd@0:                 % add to lexicon
wolffd@0:                 db.lexicon = {db.lexicon{:}, annot};
wolffd@0:                 
wolffd@0:                 % enhance annotation matrix
wolffd@0:                 db.annotsdb = [db.annotsdb, ...
wolffd@0:                     sparse(size(db.annotsdb,1), 1)];
wolffd@0:             end
wolffd@0:             
wolffd@0:             
wolffd@0:             % create new position for clip if neccesary
wolffd@0:             pos = owner_pos(db, ownerid);
wolffd@0:             if isempty(pos) 
wolffd@0:                 
wolffd@0:                 pos = numel(db.annots_oid) +1;
wolffd@0:                 
wolffd@0:                 % add to oid
wolffd@0:                 db.annots_oid = [db.annots_oid, ownerid];
wolffd@0:                 
wolffd@0:                 % enhance annotation matrix
wolffd@0:                 db.annotsdb = [db.annotsdb; ...
wolffd@0:                     sparse(1, size(db.annotsdb, 2))];
wolffd@0:             end
wolffd@0:             
wolffd@0:             % save data to database
wolffd@0:             db.annotsdb(pos, aid) = score;
wolffd@0:        end 
wolffd@0:        
wolffd@0:    end
wolffd@0:    
wolffd@0:    
wolffd@0:    methods(Hidden)
wolffd@0:        
wolffd@0:        function pos = owner_pos(db, ownerid)
wolffd@0: 
wolffd@0:         % returns database position for owner id 
wolffd@0:             pos = find(db.annots_oid == ownerid);
wolffd@0:        end
wolffd@0:    end
wolffd@0:    
wolffd@0: end