comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirmap.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta)
2 % mirmap(predictors_file, ratings_file) performs a statistical mapping
3 % between ratings associated to a set of audio recordings, and a set of
4 % variables computed for the same set of audio recordings. It might be
5 % useful in particular when the set of predictors is large and diverse.
6
7 if nargin<3
8 dist = 'lse';
9 end
10 if nargin<4
11 alpha = 1;
12 end
13 if nargin<5
14 delta = .00001;
15 end
16
17 crosscor_thres = .6;
18
19 dist = str2func(dist);
20
21 predics = import(predics_ref);
22 ratings = import(ratings_ref);
23
24 predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref);
25 if isempty(predics.data)
26 res = [];
27 return
28 end
29
30 ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref);
31
32 res = map(predics,ratings,crosscor_thres);
33
34
35 %%
36 function output = import(ref)
37 output.data = [];
38 output.fields = {};
39 output.normal = [];
40 if not(iscell(ref))
41 ref = {ref};
42 end
43 for i = 1:length(ref)
44 if not(iscell(ref{i}))
45 ref{i} = {ref{i}};
46 end
47 if ischar(ref{i}{1})
48 for j = 1:length(ref{i})
49 importj = importdata(ref{i}{j});
50 if not(isstruct(importj))
51 imp = importj;
52 importj = struct;
53 importj.data = imp;
54 for k = 1:size(imp,2)
55 importj.textdata{k} = [ref{i}{j},'_',num2str(k)];
56 end
57 end
58 if j == 1
59 import = importj;
60 else
61 [c ia ib] = intersect(import.textdata(1,2:end),...
62 importj.textdata(1,2:end));
63 import.textdata = [import.textdata(:,[1 ia+1]);...
64 importj.textdata(2:end,[1 ib+1])];
65 import.data = [import.data(:,ia);importj.data(:,[1 ib+1])];
66 end
67 end
68 output.data = [output.data import.data];
69 nbfields = size(import.data,2);
70 field = import.textdata(1,:);
71 field = field(end-nbfields+1 : end);
72 output.fields = [output.fields field];
73 else
74 output.data = [output.data ref{i}{1}];
75 for j = 1:size(ref{i},2)
76 output.fields = [output.fields num2str(j)];
77 end
78 end
79 end
80 ndata = size(output.data,2);
81 output.normal = NaN(1,ndata);
82 warning('off','stats:lillietest:OutOfRangeP');
83 for i = 1:ndata
84 data = output.data(:,i);
85 data = data(~isnan(data));
86 if length(data) < 4
87 output.normal(i) = 0;
88 else
89 output.normal(i) = ~lillietest(data);
90 end
91 end
92
93
94 %%
95 function res = map(predics,ratings,crosscor_thres)
96 nbrates = size(ratings.data,2);
97 nbpreds = size(predics.data,2);
98 res.normal.cor = NaN(nbpreds,nbrates);
99 res.normal.pval = NaN(nbpreds,nbrates);
100
101 res.significant = cell(nbrates,1);
102 for j = 1:nbrates
103 figure
104 set(gca,'YDir','reverse')
105 hold on
106 select = []; % list of selected features
107 for i = 1:nbpreds
108 if ~predics.normal(i)
109 res.normal.cor(i,j) = NaN;
110 continue
111 end
112 [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),...
113 'rows','pairwise');
114 res.normal.pval(i,j) = P(2);
115 if P(2)<.05 % statistically significant
116 select(end+1) = i; % included into the list of selected features
117 res.normal.cor(i,j) = R(2);
118 else
119 res.normal.cor(i,j) = NaN;
120 end
121 end
122 %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise');
123
124 [unused index] = sort(abs(res.normal.cor(select,j)),'descend');
125 index = select(index);
126 res.significant{j}.cor = res.normal.cor(index,j);
127 res.significant{j}.pval = res.normal.pval(index,j);
128 res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise');
129 %res.inter(index,index,j);
130 res.significant{j}.fields = {predics.fields{index}};
131 res.significant{j}.alpha = predics.alpha(index);
132 res.significant{j}.lambda = predics.lambda(index);
133 k = 0;
134 corfields = {};
135 corbest = [];
136 for i = 1:length(select)
137 inter = max(abs(res.significant{j}.inter(1:i-1,i)));
138 if i == 1 || inter < crosscor_thres
139 % Features sufficiently independent to the better scoring ones
140 % are selected
141 k = k+1;
142 if i == 1
143 own = 1;
144 else
145 own = 1-inter;
146 end
147 col = res.significant{j}.pval(i);
148 x1 = min(0,res.significant{j}.cor(i));
149 y1 = k-own/2;
150 x2 = x1+abs(res.significant{j}.cor(i));
151 y2 = k+own/2;
152 pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col])
153 corbest = [corbest i];
154 corfields = [corfields res.significant{j}.fields{i}];
155 i
156 res.significant{j}.fields{i}
157 res.significant{j}.cor(i)
158 inter
159 end
160 end
161
162 set(gca,'YTick',1:k,'YTickLabel',corfields)%,...
163 % 'Position',[.4 .11 .5 .815])
164 colorbar
165 grid on
166 title(['Correlation with rating ',ratings.fields{j}])
167
168 predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop
169 res.best{j}.index = corbest;
170 res.best{j}.fields = {res.significant{j}.fields{corbest}};
171 res.best{j}.alpha = res.significant{j}.alpha(corbest);
172 res.best{j}.lambda = res.significant{j}.lambda(corbest);
173 if isempty(select)
174 close
175 elseif length(corbest)>1
176 display('****************************************')
177 display(['Regression for rating ',ratings.fields{j}])
178 ratej = standardize(ratings.data(:,j));
179 stepwisefit(predata,ratej,'maxiter',10);
180 [b,se,pval,inmodel,stats,nextstep,history] ...
181 = stepwisefit(predata,ratej,'maxiter',10,'scale','on');
182 res.best{j}.fields{find(inmodel)}
183 %stepwise(predata,ratej,[]);%,.001,.001);
184 pause
185 end
186 end
187
188 function x = standardize(x)
189 %mx = mean(x);
190 %x = x-repmat(mx,[size(x,1) 1]);
191 %sx = std(x);
192 %sx(sx==0) = 1;
193 %x = x./repmat(sx,[size(x,1) 1]);
194
195
196 %%
197 function output = normalize(input,dist,alpha,delta,txt,filename)
198 fields = input.fields;
199 nbfields = length(fields);
200 output = input;
201 h = waitbar(0,['Normalizing ',txt]);
202 for i = 1:nbfields
203 waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]);
204 d = input.data(:,i);
205 if length(d(~isnan(d)))>3 && ~input.normal(i)
206 %figure(1)
207 %hist(d,round(7.5*log(size(d,1))));
208 %figure(2)
209 %normplot(d)
210
211 [output.data(:,i) output.alpha(i) output.lambda(i)] = ...
212 boxcox_search(d,dist,alpha,delta);
213 %figure(3)
214 %hist(output.data(:,i),round(7.5*log(size(d,1))));
215 output.normal(i) = ~lillietest(output.data(:,i),.01);
216 %figure(4)
217 %normplot(data(:,i))
218 end
219 end
220 display('Excluded:')
221 output.fields{~output.normal}
222 display('..')
223 output.data(:,~output.normal) = [];
224 output.fields(~output.normal) = [];
225 output.normal(~output.normal) = [];
226 if max(~output.normal)
227 output.alpha(~output.normal) = [];
228 output.lambda(~output.normal) = [];
229 end
230 %if strcmpi(filename(end-3:end),'.txt')
231 % filename = filename(1:end-4);
232 %end
233 %filename = [filename '.mir.txt'];
234 %mirexport(filename,output);
235 close(h);
236
237
238 function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta)
239 if nargin<2
240 dist = @lse;
241 end
242 if nargin<3
243 alpha = 0;
244 end
245 lambda = -10:.01:10;
246 if min(x)<0
247 x = x-min(x);
248 resalpha = -min(x);
249 else
250 resalpha = 0;
251 end
252 y = boxcox_transf(x,lambda);
253 [y lambda d] = optimize(y,lambda,dist,x);
254 %y = y(~isnan(y));
255 while lillietest(y,.01) && alpha > delta
256 yp = boxcox_transf(x+alpha,lambda);
257 %plot(alpha,lse(yp,lambda,x))
258 if dist(yp,lambda,x)<d
259 x = x+alpha;
260 resalpha = resalpha + alpha;
261 y = yp;
262 alpha = alpha*.999;
263 d = dist(y,lambda,x);
264 else
265 minalpha = min(alpha,min(x)); % to avoid negative values
266 yn = boxcox_transf(x - minalpha,lambda);
267 %plot(alpha,lse(yn,lambda,x))
268 if dist(yn,lambda,x)<d
269 x = minalpha;
270 resalpha = resalpha - minalpha;
271 y = yn;
272 alpha = alpha*.999;
273 d = dist(y,lambda,x);
274 else
275 alpha = alpha/2;
276 end
277 end
278 end
279 %plot(lambda,dist(y,lambda,x),'.r')
280
281
282 function y = boxcox_transf(x,lambda)
283 lambda0 = find(~lambda);
284 if min(x)<0
285 x = x-min(x);
286 end
287 x = repmat(x,[1 length(lambda)]);
288 lambda = repmat(lambda,[size(x,1) 1]);
289 y = (x.^lambda-1)./lambda;
290 y(:,lambda0) = log(x(:,lambda0));
291
292
293 function [z,lambda,d] = optimize(y,lambda,dist,x)
294 [d index] = min(dist(y,lambda,x));
295 lambda = lambda(index);
296 z = y(:,index);
297
298
299 function d = lse(y,lambda,x)
300 d = skewness(y).^2 + (kurtosis(y)-3).^2/4; % equation used in Jarque-Bera test
301 %plot(lambda,d,'.')
302
303
304 function d = mle(y,lambda,x)
305 d = -size(y,1)/2*log(var(y))+(lambda-1)*sum(log(x));