annotate toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirmap.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta)
wolffd@0 2 % mirmap(predictors_file, ratings_file) performs a statistical mapping
wolffd@0 3 % between ratings associated to a set of audio recordings, and a set of
wolffd@0 4 % variables computed for the same set of audio recordings. It might be
wolffd@0 5 % useful in particular when the set of predictors is large and diverse.
wolffd@0 6
wolffd@0 7 if nargin<3
wolffd@0 8 dist = 'lse';
wolffd@0 9 end
wolffd@0 10 if nargin<4
wolffd@0 11 alpha = 1;
wolffd@0 12 end
wolffd@0 13 if nargin<5
wolffd@0 14 delta = .00001;
wolffd@0 15 end
wolffd@0 16
wolffd@0 17 crosscor_thres = .6;
wolffd@0 18
wolffd@0 19 dist = str2func(dist);
wolffd@0 20
wolffd@0 21 predics = import(predics_ref);
wolffd@0 22 ratings = import(ratings_ref);
wolffd@0 23
wolffd@0 24 predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref);
wolffd@0 25 if isempty(predics.data)
wolffd@0 26 res = [];
wolffd@0 27 return
wolffd@0 28 end
wolffd@0 29
wolffd@0 30 ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref);
wolffd@0 31
wolffd@0 32 res = map(predics,ratings,crosscor_thres);
wolffd@0 33
wolffd@0 34
wolffd@0 35 %%
wolffd@0 36 function output = import(ref)
wolffd@0 37 output.data = [];
wolffd@0 38 output.fields = {};
wolffd@0 39 output.normal = [];
wolffd@0 40 if not(iscell(ref))
wolffd@0 41 ref = {ref};
wolffd@0 42 end
wolffd@0 43 for i = 1:length(ref)
wolffd@0 44 if not(iscell(ref{i}))
wolffd@0 45 ref{i} = {ref{i}};
wolffd@0 46 end
wolffd@0 47 if ischar(ref{i}{1})
wolffd@0 48 for j = 1:length(ref{i})
wolffd@0 49 importj = importdata(ref{i}{j});
wolffd@0 50 if not(isstruct(importj))
wolffd@0 51 imp = importj;
wolffd@0 52 importj = struct;
wolffd@0 53 importj.data = imp;
wolffd@0 54 for k = 1:size(imp,2)
wolffd@0 55 importj.textdata{k} = [ref{i}{j},'_',num2str(k)];
wolffd@0 56 end
wolffd@0 57 end
wolffd@0 58 if j == 1
wolffd@0 59 import = importj;
wolffd@0 60 else
wolffd@0 61 [c ia ib] = intersect(import.textdata(1,2:end),...
wolffd@0 62 importj.textdata(1,2:end));
wolffd@0 63 import.textdata = [import.textdata(:,[1 ia+1]);...
wolffd@0 64 importj.textdata(2:end,[1 ib+1])];
wolffd@0 65 import.data = [import.data(:,ia);importj.data(:,[1 ib+1])];
wolffd@0 66 end
wolffd@0 67 end
wolffd@0 68 output.data = [output.data import.data];
wolffd@0 69 nbfields = size(import.data,2);
wolffd@0 70 field = import.textdata(1,:);
wolffd@0 71 field = field(end-nbfields+1 : end);
wolffd@0 72 output.fields = [output.fields field];
wolffd@0 73 else
wolffd@0 74 output.data = [output.data ref{i}{1}];
wolffd@0 75 for j = 1:size(ref{i},2)
wolffd@0 76 output.fields = [output.fields num2str(j)];
wolffd@0 77 end
wolffd@0 78 end
wolffd@0 79 end
wolffd@0 80 ndata = size(output.data,2);
wolffd@0 81 output.normal = NaN(1,ndata);
wolffd@0 82 warning('off','stats:lillietest:OutOfRangeP');
wolffd@0 83 for i = 1:ndata
wolffd@0 84 data = output.data(:,i);
wolffd@0 85 data = data(~isnan(data));
wolffd@0 86 if length(data) < 4
wolffd@0 87 output.normal(i) = 0;
wolffd@0 88 else
wolffd@0 89 output.normal(i) = ~lillietest(data);
wolffd@0 90 end
wolffd@0 91 end
wolffd@0 92
wolffd@0 93
wolffd@0 94 %%
wolffd@0 95 function res = map(predics,ratings,crosscor_thres)
wolffd@0 96 nbrates = size(ratings.data,2);
wolffd@0 97 nbpreds = size(predics.data,2);
wolffd@0 98 res.normal.cor = NaN(nbpreds,nbrates);
wolffd@0 99 res.normal.pval = NaN(nbpreds,nbrates);
wolffd@0 100
wolffd@0 101 res.significant = cell(nbrates,1);
wolffd@0 102 for j = 1:nbrates
wolffd@0 103 figure
wolffd@0 104 set(gca,'YDir','reverse')
wolffd@0 105 hold on
wolffd@0 106 select = []; % list of selected features
wolffd@0 107 for i = 1:nbpreds
wolffd@0 108 if ~predics.normal(i)
wolffd@0 109 res.normal.cor(i,j) = NaN;
wolffd@0 110 continue
wolffd@0 111 end
wolffd@0 112 [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),...
wolffd@0 113 'rows','pairwise');
wolffd@0 114 res.normal.pval(i,j) = P(2);
wolffd@0 115 if P(2)<.05 % statistically significant
wolffd@0 116 select(end+1) = i; % included into the list of selected features
wolffd@0 117 res.normal.cor(i,j) = R(2);
wolffd@0 118 else
wolffd@0 119 res.normal.cor(i,j) = NaN;
wolffd@0 120 end
wolffd@0 121 end
wolffd@0 122 %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise');
wolffd@0 123
wolffd@0 124 [unused index] = sort(abs(res.normal.cor(select,j)),'descend');
wolffd@0 125 index = select(index);
wolffd@0 126 res.significant{j}.cor = res.normal.cor(index,j);
wolffd@0 127 res.significant{j}.pval = res.normal.pval(index,j);
wolffd@0 128 res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise');
wolffd@0 129 %res.inter(index,index,j);
wolffd@0 130 res.significant{j}.fields = {predics.fields{index}};
wolffd@0 131 res.significant{j}.alpha = predics.alpha(index);
wolffd@0 132 res.significant{j}.lambda = predics.lambda(index);
wolffd@0 133 k = 0;
wolffd@0 134 corfields = {};
wolffd@0 135 corbest = [];
wolffd@0 136 for i = 1:length(select)
wolffd@0 137 inter = max(abs(res.significant{j}.inter(1:i-1,i)));
wolffd@0 138 if i == 1 || inter < crosscor_thres
wolffd@0 139 % Features sufficiently independent to the better scoring ones
wolffd@0 140 % are selected
wolffd@0 141 k = k+1;
wolffd@0 142 if i == 1
wolffd@0 143 own = 1;
wolffd@0 144 else
wolffd@0 145 own = 1-inter;
wolffd@0 146 end
wolffd@0 147 col = res.significant{j}.pval(i);
wolffd@0 148 x1 = min(0,res.significant{j}.cor(i));
wolffd@0 149 y1 = k-own/2;
wolffd@0 150 x2 = x1+abs(res.significant{j}.cor(i));
wolffd@0 151 y2 = k+own/2;
wolffd@0 152 pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col])
wolffd@0 153 corbest = [corbest i];
wolffd@0 154 corfields = [corfields res.significant{j}.fields{i}];
wolffd@0 155 i
wolffd@0 156 res.significant{j}.fields{i}
wolffd@0 157 res.significant{j}.cor(i)
wolffd@0 158 inter
wolffd@0 159 end
wolffd@0 160 end
wolffd@0 161
wolffd@0 162 set(gca,'YTick',1:k,'YTickLabel',corfields)%,...
wolffd@0 163 % 'Position',[.4 .11 .5 .815])
wolffd@0 164 colorbar
wolffd@0 165 grid on
wolffd@0 166 title(['Correlation with rating ',ratings.fields{j}])
wolffd@0 167
wolffd@0 168 predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop
wolffd@0 169 res.best{j}.index = corbest;
wolffd@0 170 res.best{j}.fields = {res.significant{j}.fields{corbest}};
wolffd@0 171 res.best{j}.alpha = res.significant{j}.alpha(corbest);
wolffd@0 172 res.best{j}.lambda = res.significant{j}.lambda(corbest);
wolffd@0 173 if isempty(select)
wolffd@0 174 close
wolffd@0 175 elseif length(corbest)>1
wolffd@0 176 display('****************************************')
wolffd@0 177 display(['Regression for rating ',ratings.fields{j}])
wolffd@0 178 ratej = standardize(ratings.data(:,j));
wolffd@0 179 stepwisefit(predata,ratej,'maxiter',10);
wolffd@0 180 [b,se,pval,inmodel,stats,nextstep,history] ...
wolffd@0 181 = stepwisefit(predata,ratej,'maxiter',10,'scale','on');
wolffd@0 182 res.best{j}.fields{find(inmodel)}
wolffd@0 183 %stepwise(predata,ratej,[]);%,.001,.001);
wolffd@0 184 pause
wolffd@0 185 end
wolffd@0 186 end
wolffd@0 187
wolffd@0 188 function x = standardize(x)
wolffd@0 189 %mx = mean(x);
wolffd@0 190 %x = x-repmat(mx,[size(x,1) 1]);
wolffd@0 191 %sx = std(x);
wolffd@0 192 %sx(sx==0) = 1;
wolffd@0 193 %x = x./repmat(sx,[size(x,1) 1]);
wolffd@0 194
wolffd@0 195
wolffd@0 196 %%
wolffd@0 197 function output = normalize(input,dist,alpha,delta,txt,filename)
wolffd@0 198 fields = input.fields;
wolffd@0 199 nbfields = length(fields);
wolffd@0 200 output = input;
wolffd@0 201 h = waitbar(0,['Normalizing ',txt]);
wolffd@0 202 for i = 1:nbfields
wolffd@0 203 waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]);
wolffd@0 204 d = input.data(:,i);
wolffd@0 205 if length(d(~isnan(d)))>3 && ~input.normal(i)
wolffd@0 206 %figure(1)
wolffd@0 207 %hist(d,round(7.5*log(size(d,1))));
wolffd@0 208 %figure(2)
wolffd@0 209 %normplot(d)
wolffd@0 210
wolffd@0 211 [output.data(:,i) output.alpha(i) output.lambda(i)] = ...
wolffd@0 212 boxcox_search(d,dist,alpha,delta);
wolffd@0 213 %figure(3)
wolffd@0 214 %hist(output.data(:,i),round(7.5*log(size(d,1))));
wolffd@0 215 output.normal(i) = ~lillietest(output.data(:,i),.01);
wolffd@0 216 %figure(4)
wolffd@0 217 %normplot(data(:,i))
wolffd@0 218 end
wolffd@0 219 end
wolffd@0 220 display('Excluded:')
wolffd@0 221 output.fields{~output.normal}
wolffd@0 222 display('..')
wolffd@0 223 output.data(:,~output.normal) = [];
wolffd@0 224 output.fields(~output.normal) = [];
wolffd@0 225 output.normal(~output.normal) = [];
wolffd@0 226 if max(~output.normal)
wolffd@0 227 output.alpha(~output.normal) = [];
wolffd@0 228 output.lambda(~output.normal) = [];
wolffd@0 229 end
wolffd@0 230 %if strcmpi(filename(end-3:end),'.txt')
wolffd@0 231 % filename = filename(1:end-4);
wolffd@0 232 %end
wolffd@0 233 %filename = [filename '.mir.txt'];
wolffd@0 234 %mirexport(filename,output);
wolffd@0 235 close(h);
wolffd@0 236
wolffd@0 237
wolffd@0 238 function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta)
wolffd@0 239 if nargin<2
wolffd@0 240 dist = @lse;
wolffd@0 241 end
wolffd@0 242 if nargin<3
wolffd@0 243 alpha = 0;
wolffd@0 244 end
wolffd@0 245 lambda = -10:.01:10;
wolffd@0 246 if min(x)<0
wolffd@0 247 x = x-min(x);
wolffd@0 248 resalpha = -min(x);
wolffd@0 249 else
wolffd@0 250 resalpha = 0;
wolffd@0 251 end
wolffd@0 252 y = boxcox_transf(x,lambda);
wolffd@0 253 [y lambda d] = optimize(y,lambda,dist,x);
wolffd@0 254 %y = y(~isnan(y));
wolffd@0 255 while lillietest(y,.01) && alpha > delta
wolffd@0 256 yp = boxcox_transf(x+alpha,lambda);
wolffd@0 257 %plot(alpha,lse(yp,lambda,x))
wolffd@0 258 if dist(yp,lambda,x)<d
wolffd@0 259 x = x+alpha;
wolffd@0 260 resalpha = resalpha + alpha;
wolffd@0 261 y = yp;
wolffd@0 262 alpha = alpha*.999;
wolffd@0 263 d = dist(y,lambda,x);
wolffd@0 264 else
wolffd@0 265 minalpha = min(alpha,min(x)); % to avoid negative values
wolffd@0 266 yn = boxcox_transf(x - minalpha,lambda);
wolffd@0 267 %plot(alpha,lse(yn,lambda,x))
wolffd@0 268 if dist(yn,lambda,x)<d
wolffd@0 269 x = minalpha;
wolffd@0 270 resalpha = resalpha - minalpha;
wolffd@0 271 y = yn;
wolffd@0 272 alpha = alpha*.999;
wolffd@0 273 d = dist(y,lambda,x);
wolffd@0 274 else
wolffd@0 275 alpha = alpha/2;
wolffd@0 276 end
wolffd@0 277 end
wolffd@0 278 end
wolffd@0 279 %plot(lambda,dist(y,lambda,x),'.r')
wolffd@0 280
wolffd@0 281
wolffd@0 282 function y = boxcox_transf(x,lambda)
wolffd@0 283 lambda0 = find(~lambda);
wolffd@0 284 if min(x)<0
wolffd@0 285 x = x-min(x);
wolffd@0 286 end
wolffd@0 287 x = repmat(x,[1 length(lambda)]);
wolffd@0 288 lambda = repmat(lambda,[size(x,1) 1]);
wolffd@0 289 y = (x.^lambda-1)./lambda;
wolffd@0 290 y(:,lambda0) = log(x(:,lambda0));
wolffd@0 291
wolffd@0 292
wolffd@0 293 function [z,lambda,d] = optimize(y,lambda,dist,x)
wolffd@0 294 [d index] = min(dist(y,lambda,x));
wolffd@0 295 lambda = lambda(index);
wolffd@0 296 z = y(:,index);
wolffd@0 297
wolffd@0 298
wolffd@0 299 function d = lse(y,lambda,x)
wolffd@0 300 d = skewness(y).^2 + (kurtosis(y)-3).^2/4; % equation used in Jarque-Bera test
wolffd@0 301 %plot(lambda,d,'.')
wolffd@0 302
wolffd@0 303
wolffd@0 304 function d = mle(y,lambda,x)
wolffd@0 305 d = -size(y,1)/2*log(var(y))+(lambda-1)*sum(log(x));