annotate toolboxes/MIRtoolbox1.3.2/MIRToolbox/@mirclassify/mirclassify.m @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 function c = mirclassify(a,da,t,dt,varargin)
Daniel@0 2 % c = mirclassify(test,features_test,train,features_train) classifies the
Daniel@0 3 % audio sequence(s) contained in the audio object test, along the
Daniel@0 4 % analytic feature(s) features_test, following the supervised
Daniel@0 5 % learning of a training set defined by the audio object train and
Daniel@0 6 % the corresponding analytic feature(s) features_train.
Daniel@0 7 % * The analytic feature(s) features_test should *not* be frame
Daniel@0 8 % decomposed. Frame-decomposed data should first be
Daniel@0 9 % summarized, using for instance mirmean or mirstd.
Daniel@0 10 % * Multiple analytic features have to be grouped into one array
Daniel@0 11 % of cells.
Daniel@0 12 % You can also integrate your own arrays of numbers computed outside
Daniel@0 13 % MIRtoolbox as part of the features. These arrays should be
Daniel@0 14 % given as matrices where each successive column is the analysis
Daniel@0 15 % of each successive file.
Daniel@0 16 % Example:
Daniel@0 17 % mirclassify(test, mfcc(test), train, mfcc(train))
Daniel@0 18 % mirclassify(test, {mfcc(test), centroid(test)}, ...
Daniel@0 19 % train, {mfcc(train), centroid(train)})
Daniel@0 20 % Optional argument:
Daniel@0 21 % mirclassify(...,'Nearest') uses the minimum distance strategy.
Daniel@0 22 % (by default)
Daniel@0 23 % mirclassify(...,'Nearest',k) uses the k-nearest-neighbour strategy.
Daniel@0 24 % Default value: k = 1, corresponding to the minimum distance
Daniel@0 25 % strategy.
Daniel@0 26 % mirclassify(...,'GMM',ng) uses a gaussian mixture model. Each class is
Daniel@0 27 % modeled by at most ng gaussians.
Daniel@0 28 % Default value: ng = 1.
Daniel@0 29 % Additionnally, the type of mixture model can be specified,
Daniel@0 30 % using the set of value proposed in the gmm function: i.e.,
Daniel@0 31 % 'spherical','diag','full' (default value) and 'ppca'.
Daniel@0 32 % (cf. help gmm)
Daniel@0 33 % Requires the Netlab toolbox.
Daniel@0 34
Daniel@0 35 lab = get(t,'Label');
Daniel@0 36 c.labtraining = lab;
Daniel@0 37 rlab = get(a,'Label');
Daniel@0 38 c.labtest = rlab;
Daniel@0 39 [k,ncentres,covartype,kmiter,emiter,d,norml,mahl] = scanargin(varargin);
Daniel@0 40 disp('Classifying...')
Daniel@0 41 if not(iscell(dt))
Daniel@0 42 dt = {dt};
Daniel@0 43 end
Daniel@0 44 lvt = length(get(t,'Data'));
Daniel@0 45 vt = [];
Daniel@0 46 for i = 1:length(dt)
Daniel@0 47 if isnumeric(dt{i})
Daniel@0 48 d = cell(1,size(dt{i},2));
Daniel@0 49 for j = 1:size(dt{i},2)
Daniel@0 50 d{j} = dt{i}(:,j);
Daniel@0 51 end
Daniel@0 52 else
Daniel@0 53 d = get(dt{i},'Data');
Daniel@0 54 end
Daniel@0 55 vt = integrate(vt,d,lvt,norml);
Daniel@0 56 if isa(dt{i},'scalar')
Daniel@0 57 m = mode(dt{i});
Daniel@0 58 if not(isempty(m))
Daniel@0 59 vt = integrate(vt,m,lvt,norml);
Daniel@0 60 end
Daniel@0 61 end
Daniel@0 62 end
Daniel@0 63 c.training = vt;
Daniel@0 64 dim = size(vt,1);
Daniel@0 65 if not(iscell(da))
Daniel@0 66 da = {da};
Daniel@0 67 end
Daniel@0 68 lva = length(get(a,'Data'));
Daniel@0 69 va = [];
Daniel@0 70 for i = 1:length(da)
Daniel@0 71 if isnumeric(da{i})
Daniel@0 72 d = cell(1,size(da{i},2));
Daniel@0 73 for j = 1:size(da{i},2)
Daniel@0 74 d{j} = da{i}(:,j);
Daniel@0 75 end
Daniel@0 76 else
Daniel@0 77 d = get(da{i},'Data');
Daniel@0 78 end
Daniel@0 79 va = integrate(va,d,lva,norml);
Daniel@0 80 if isa(da{i},'scalar')
Daniel@0 81 m = mode(da{i});
Daniel@0 82 if not(isempty(m))
Daniel@0 83 va = integrate(va,m,lva,norml);
Daniel@0 84 end
Daniel@0 85 end
Daniel@0 86 end
Daniel@0 87 c.test = va;
Daniel@0 88 c.nbobs = lvt;
Daniel@0 89 totva = [vt va];
Daniel@0 90 mahl = cov(totva');
Daniel@0 91 if k % k-Nearest Neighbour
Daniel@0 92 c.nbparam = lvt;
Daniel@0 93 for l = 1:lva
Daniel@0 94 [sv,idx] = sort(distance(va(:,l),vt,d,mahl));
Daniel@0 95 labs = cell(0); % Class labels
Daniel@0 96 founds = []; % Number of found elements in each class
Daniel@0 97 for i = idx(1:k)
Daniel@0 98 labi = lab{i};
Daniel@0 99 found = 0;
Daniel@0 100 for j = 1:length(labs)
Daniel@0 101 if isequal(labi,labs{j})
Daniel@0 102 found = j;
Daniel@0 103 end
Daniel@0 104 end
Daniel@0 105 if found
Daniel@0 106 founds(found) = founds(found)+1;
Daniel@0 107 else
Daniel@0 108 labs{end+1} = labi;
Daniel@0 109 founds(end+1) = 1;
Daniel@0 110 end
Daniel@0 111 end
Daniel@0 112 [b ib] = max(founds);
Daniel@0 113 c.classes{l} = labs{ib};
Daniel@0 114 end
Daniel@0 115 elseif ncentres % Gaussian Mixture Model
Daniel@0 116 labs = cell(0); % Class labels
Daniel@0 117 founds = cell(0); % Elements associated to each label.
Daniel@0 118 for i = 1:lvt
Daniel@0 119 labi = lab{i};
Daniel@0 120 found = 0;
Daniel@0 121 for j = 1:length(labs)
Daniel@0 122 if isequal(labi,labs{j})
Daniel@0 123 founds{j}(end+1) = i;
Daniel@0 124 found = 1;
Daniel@0 125 end
Daniel@0 126 end
Daniel@0 127 if not(found)
Daniel@0 128 labs{end+1} = labi;
Daniel@0 129 founds{end+1} = i;
Daniel@0 130 end
Daniel@0 131 end
Daniel@0 132 options = zeros(1, 18);
Daniel@0 133 options(2:3) = 1e-4;
Daniel@0 134 options(4) = 1e-6;
Daniel@0 135 options(16) = 1e-8;
Daniel@0 136 options(17) = 0.1;
Daniel@0 137 options(1) = 0; %Prints out error values, -1 else
Daniel@0 138 c.nbparam = 0;
Daniel@0 139 OK = 0;
Daniel@0 140 while not(OK)
Daniel@0 141 OK = 1;
Daniel@0 142 for i = 1:length(labs)
Daniel@0 143 options(14) = kmiter;
Daniel@0 144 try
Daniel@0 145 mix{i} = gmm(dim,ncentres,covartype);
Daniel@0 146 catch
Daniel@0 147 error('ERROR IN CLASSIFY: Netlab toolbox not installed.');
Daniel@0 148 end
Daniel@0 149 mix{i} = netlabgmminit(mix{i},vt(:,founds{i})',options);
Daniel@0 150 options(5) = 1;
Daniel@0 151 options(14) = emiter;
Daniel@0 152 try
Daniel@0 153 mix{i} = gmmem(mix{i},vt(:,founds{i})',options);
Daniel@0 154 c.nbparam = c.nbparam + ...
Daniel@0 155 length(mix{i}.centres(:)) + length(mix{i}.covars(:));
Daniel@0 156 catch
Daniel@0 157 err = lasterr;
Daniel@0 158 warning('WARNING IN CLASSIFY: Problem when calling GMMEM:');
Daniel@0 159 disp(err);
Daniel@0 160 disp('Let us try again...');
Daniel@0 161 OK = 0;
Daniel@0 162 end
Daniel@0 163 end
Daniel@0 164 end
Daniel@0 165 pr = zeros(lva,length(labs));
Daniel@0 166 for i = 1:length(labs)
Daniel@0 167 prior = length(founds{i})/lvt;
Daniel@0 168 pr(:,i) = prior * gmmprob(mix{i},va');
Daniel@0 169 %c.post{i} = gmmpost(mix{i},va');
Daniel@0 170 end
Daniel@0 171 [mm ib] = max(pr');
Daniel@0 172 for i = 1:lva
Daniel@0 173 c.classes{i} = labs{ib(i)};
Daniel@0 174 end
Daniel@0 175 end
Daniel@0 176 if isempty(rlab)
Daniel@0 177 c.correct = NaN;
Daniel@0 178 else
Daniel@0 179 correct = 0;
Daniel@0 180 for i = 1:lva
Daniel@0 181 if isequal(c.classes{i},rlab{i})
Daniel@0 182 correct = correct + 1;
Daniel@0 183 end
Daniel@0 184 end
Daniel@0 185 c.correct = correct / lva;
Daniel@0 186 end
Daniel@0 187 c = class(c,'mirclassify');
Daniel@0 188
Daniel@0 189
Daniel@0 190 function vt = integrate(vt,v,lvt,norml)
Daniel@0 191 vtl = [];
Daniel@0 192 for l = 1:lvt
Daniel@0 193 vl = v{l};
Daniel@0 194 if iscell(vl)
Daniel@0 195 vl = vl{1};
Daniel@0 196 end
Daniel@0 197 if iscell(vl)
Daniel@0 198 vl = vl{1};
Daniel@0 199 end
Daniel@0 200 if size(vl,2) > 1
Daniel@0 201 mirerror('MIRCLASSIFY','The analytic features guiding the classification should not be frame-decomposed.');
Daniel@0 202 end
Daniel@0 203 vtl(:,l) = vl;
Daniel@0 204 end
Daniel@0 205 if norml
Daniel@0 206 dnom = repmat(std(vtl,0,2),[1 size(vtl,2)]);
Daniel@0 207 dnom = dnom + (dnom == 0); % In order to avoid division by 0
Daniel@0 208 vtl = (vtl - repmat(mean(vtl,2),[1 size(vtl,2)])) ./ dnom;
Daniel@0 209 end
Daniel@0 210 vt(end+1:end+size(vtl,1),:) = vtl;
Daniel@0 211
Daniel@0 212
Daniel@0 213 function [k,ncentres,covartype,kmiter,emiter,d,norml,mahl] = scanargin(v)
Daniel@0 214 k = 1;
Daniel@0 215 d = 0;
Daniel@0 216 i = 1;
Daniel@0 217 ncentres = 0;
Daniel@0 218 covartype = 'full';
Daniel@0 219 kmiter = 10;
Daniel@0 220 emiter = 100;
Daniel@0 221 norml = 1;
Daniel@0 222 mahl = 1;
Daniel@0 223 while i <= length(v)
Daniel@0 224 arg = v{i};
Daniel@0 225 if ischar(arg) && strcmpi(arg,'Nearest')
Daniel@0 226 k = 1;
Daniel@0 227 if length(v)>i && isnumeric(v{i+1})
Daniel@0 228 i = i+1;
Daniel@0 229 k = v{i};
Daniel@0 230 end
Daniel@0 231 elseif ischar(arg) && strcmpi(arg,'GMM')
Daniel@0 232 k = 0;
Daniel@0 233 ncentres = 1;
Daniel@0 234 if length(v)>i
Daniel@0 235 if isnumeric(v{i+1})
Daniel@0 236 i = i+1;
Daniel@0 237 ncentres = v{i};
Daniel@0 238 if length(v)>i && ischar(v{i+1})
Daniel@0 239 i = i+1;
Daniel@0 240 covartype = v{i};
Daniel@0 241 end
Daniel@0 242 elseif ischar(v{i+1})
Daniel@0 243 i = i+1;
Daniel@0 244 covartype = v{i};
Daniel@0 245 if length(v)>i && isnumeric(v{i+1})
Daniel@0 246 i = i+1;
Daniel@0 247 ncentres = v{i};
Daniel@0 248 end
Daniel@0 249 end
Daniel@0 250 end
Daniel@0 251 elseif isnumeric(arg)
Daniel@0 252 k = v{i};
Daniel@0 253 else
Daniel@0 254 error('ERROR IN MIRCLASSIFY: Syntax error. See help mirclassify.');
Daniel@0 255 end
Daniel@0 256 i = i+1;
Daniel@0 257 end
Daniel@0 258
Daniel@0 259
Daniel@0 260 function y = distance(a,t,d,mahl)
Daniel@0 261
Daniel@0 262 for i = 1:size(t,2)
Daniel@0 263 if det(mahl) > 0 % more generally, uses cond
Daniel@0 264 lham = inv(mahl);
Daniel@0 265 else
Daniel@0 266 lham = pinv(mahl);
Daniel@0 267 end
Daniel@0 268 y(i) = sqrt((a - t(:,i))'*lham*(a - t(:,i)));
Daniel@0 269 end
Daniel@0 270 %y = sqrt(sum(repmat(a,[1,size(t,2)])-t,1).^2);