Mercurial > hg > camir-aes2014
comparison toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirmap.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta) | |
2 % mirmap(predictors_file, ratings_file) performs a statistical mapping | |
3 % between ratings associated to a set of audio recordings, and a set of | |
4 % variables computed for the same set of audio recordings. It might be | |
5 % useful in particular when the set of predictors is large and diverse. | |
6 | |
7 if nargin<3 | |
8 dist = 'lse'; | |
9 end | |
10 if nargin<4 | |
11 alpha = 1; | |
12 end | |
13 if nargin<5 | |
14 delta = .00001; | |
15 end | |
16 | |
17 crosscor_thres = .6; | |
18 | |
19 dist = str2func(dist); | |
20 | |
21 predics = import(predics_ref); | |
22 ratings = import(ratings_ref); | |
23 | |
24 predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref); | |
25 if isempty(predics.data) | |
26 res = []; | |
27 return | |
28 end | |
29 | |
30 ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref); | |
31 | |
32 res = map(predics,ratings,crosscor_thres); | |
33 | |
34 | |
35 %% | |
36 function output = import(ref) | |
37 output.data = []; | |
38 output.fields = {}; | |
39 output.normal = []; | |
40 if not(iscell(ref)) | |
41 ref = {ref}; | |
42 end | |
43 for i = 1:length(ref) | |
44 if not(iscell(ref{i})) | |
45 ref{i} = {ref{i}}; | |
46 end | |
47 if ischar(ref{i}{1}) | |
48 for j = 1:length(ref{i}) | |
49 importj = importdata(ref{i}{j}); | |
50 if not(isstruct(importj)) | |
51 imp = importj; | |
52 importj = struct; | |
53 importj.data = imp; | |
54 for k = 1:size(imp,2) | |
55 importj.textdata{k} = [ref{i}{j},'_',num2str(k)]; | |
56 end | |
57 end | |
58 if j == 1 | |
59 import = importj; | |
60 else | |
61 [c ia ib] = intersect(import.textdata(1,2:end),... | |
62 importj.textdata(1,2:end)); | |
63 import.textdata = [import.textdata(:,[1 ia+1]);... | |
64 importj.textdata(2:end,[1 ib+1])]; | |
65 import.data = [import.data(:,ia);importj.data(:,[1 ib+1])]; | |
66 end | |
67 end | |
68 output.data = [output.data import.data]; | |
69 nbfields = size(import.data,2); | |
70 field = import.textdata(1,:); | |
71 field = field(end-nbfields+1 : end); | |
72 output.fields = [output.fields field]; | |
73 else | |
74 output.data = [output.data ref{i}{1}]; | |
75 for j = 1:size(ref{i},2) | |
76 output.fields = [output.fields num2str(j)]; | |
77 end | |
78 end | |
79 end | |
80 ndata = size(output.data,2); | |
81 output.normal = NaN(1,ndata); | |
82 warning('off','stats:lillietest:OutOfRangeP'); | |
83 for i = 1:ndata | |
84 data = output.data(:,i); | |
85 data = data(~isnan(data)); | |
86 if length(data) < 4 | |
87 output.normal(i) = 0; | |
88 else | |
89 output.normal(i) = ~lillietest(data); | |
90 end | |
91 end | |
92 | |
93 | |
94 %% | |
95 function res = map(predics,ratings,crosscor_thres) | |
96 nbrates = size(ratings.data,2); | |
97 nbpreds = size(predics.data,2); | |
98 res.normal.cor = NaN(nbpreds,nbrates); | |
99 res.normal.pval = NaN(nbpreds,nbrates); | |
100 | |
101 res.significant = cell(nbrates,1); | |
102 for j = 1:nbrates | |
103 figure | |
104 set(gca,'YDir','reverse') | |
105 hold on | |
106 select = []; % list of selected features | |
107 for i = 1:nbpreds | |
108 if ~predics.normal(i) | |
109 res.normal.cor(i,j) = NaN; | |
110 continue | |
111 end | |
112 [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),... | |
113 'rows','pairwise'); | |
114 res.normal.pval(i,j) = P(2); | |
115 if P(2)<.05 % statistically significant | |
116 select(end+1) = i; % included into the list of selected features | |
117 res.normal.cor(i,j) = R(2); | |
118 else | |
119 res.normal.cor(i,j) = NaN; | |
120 end | |
121 end | |
122 %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise'); | |
123 | |
124 [unused index] = sort(abs(res.normal.cor(select,j)),'descend'); | |
125 index = select(index); | |
126 res.significant{j}.cor = res.normal.cor(index,j); | |
127 res.significant{j}.pval = res.normal.pval(index,j); | |
128 res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise'); | |
129 %res.inter(index,index,j); | |
130 res.significant{j}.fields = {predics.fields{index}}; | |
131 res.significant{j}.alpha = predics.alpha(index); | |
132 res.significant{j}.lambda = predics.lambda(index); | |
133 k = 0; | |
134 corfields = {}; | |
135 corbest = []; | |
136 for i = 1:length(select) | |
137 inter = max(abs(res.significant{j}.inter(1:i-1,i))); | |
138 if i == 1 || inter < crosscor_thres | |
139 % Features sufficiently independent to the better scoring ones | |
140 % are selected | |
141 k = k+1; | |
142 if i == 1 | |
143 own = 1; | |
144 else | |
145 own = 1-inter; | |
146 end | |
147 col = res.significant{j}.pval(i); | |
148 x1 = min(0,res.significant{j}.cor(i)); | |
149 y1 = k-own/2; | |
150 x2 = x1+abs(res.significant{j}.cor(i)); | |
151 y2 = k+own/2; | |
152 pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col]) | |
153 corbest = [corbest i]; | |
154 corfields = [corfields res.significant{j}.fields{i}]; | |
155 i | |
156 res.significant{j}.fields{i} | |
157 res.significant{j}.cor(i) | |
158 inter | |
159 end | |
160 end | |
161 | |
162 set(gca,'YTick',1:k,'YTickLabel',corfields)%,... | |
163 % 'Position',[.4 .11 .5 .815]) | |
164 colorbar | |
165 grid on | |
166 title(['Correlation with rating ',ratings.fields{j}]) | |
167 | |
168 predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop | |
169 res.best{j}.index = corbest; | |
170 res.best{j}.fields = {res.significant{j}.fields{corbest}}; | |
171 res.best{j}.alpha = res.significant{j}.alpha(corbest); | |
172 res.best{j}.lambda = res.significant{j}.lambda(corbest); | |
173 if isempty(select) | |
174 close | |
175 elseif length(corbest)>1 | |
176 display('****************************************') | |
177 display(['Regression for rating ',ratings.fields{j}]) | |
178 ratej = standardize(ratings.data(:,j)); | |
179 stepwisefit(predata,ratej,'maxiter',10); | |
180 [b,se,pval,inmodel,stats,nextstep,history] ... | |
181 = stepwisefit(predata,ratej,'maxiter',10,'scale','on'); | |
182 res.best{j}.fields{find(inmodel)} | |
183 %stepwise(predata,ratej,[]);%,.001,.001); | |
184 pause | |
185 end | |
186 end | |
187 | |
188 function x = standardize(x) | |
189 %mx = mean(x); | |
190 %x = x-repmat(mx,[size(x,1) 1]); | |
191 %sx = std(x); | |
192 %sx(sx==0) = 1; | |
193 %x = x./repmat(sx,[size(x,1) 1]); | |
194 | |
195 | |
196 %% | |
197 function output = normalize(input,dist,alpha,delta,txt,filename) | |
198 fields = input.fields; | |
199 nbfields = length(fields); | |
200 output = input; | |
201 h = waitbar(0,['Normalizing ',txt]); | |
202 for i = 1:nbfields | |
203 waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]); | |
204 d = input.data(:,i); | |
205 if length(d(~isnan(d)))>3 && ~input.normal(i) | |
206 %figure(1) | |
207 %hist(d,round(7.5*log(size(d,1)))); | |
208 %figure(2) | |
209 %normplot(d) | |
210 | |
211 [output.data(:,i) output.alpha(i) output.lambda(i)] = ... | |
212 boxcox_search(d,dist,alpha,delta); | |
213 %figure(3) | |
214 %hist(output.data(:,i),round(7.5*log(size(d,1)))); | |
215 output.normal(i) = ~lillietest(output.data(:,i),.01); | |
216 %figure(4) | |
217 %normplot(data(:,i)) | |
218 end | |
219 end | |
220 display('Excluded:') | |
221 output.fields{~output.normal} | |
222 display('..') | |
223 output.data(:,~output.normal) = []; | |
224 output.fields(~output.normal) = []; | |
225 output.normal(~output.normal) = []; | |
226 if max(~output.normal) | |
227 output.alpha(~output.normal) = []; | |
228 output.lambda(~output.normal) = []; | |
229 end | |
230 %if strcmpi(filename(end-3:end),'.txt') | |
231 % filename = filename(1:end-4); | |
232 %end | |
233 %filename = [filename '.mir.txt']; | |
234 %mirexport(filename,output); | |
235 close(h); | |
236 | |
237 | |
238 function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta) | |
239 if nargin<2 | |
240 dist = @lse; | |
241 end | |
242 if nargin<3 | |
243 alpha = 0; | |
244 end | |
245 lambda = -10:.01:10; | |
246 if min(x)<0 | |
247 x = x-min(x); | |
248 resalpha = -min(x); | |
249 else | |
250 resalpha = 0; | |
251 end | |
252 y = boxcox_transf(x,lambda); | |
253 [y lambda d] = optimize(y,lambda,dist,x); | |
254 %y = y(~isnan(y)); | |
255 while lillietest(y,.01) && alpha > delta | |
256 yp = boxcox_transf(x+alpha,lambda); | |
257 %plot(alpha,lse(yp,lambda,x)) | |
258 if dist(yp,lambda,x)<d | |
259 x = x+alpha; | |
260 resalpha = resalpha + alpha; | |
261 y = yp; | |
262 alpha = alpha*.999; | |
263 d = dist(y,lambda,x); | |
264 else | |
265 minalpha = min(alpha,min(x)); % to avoid negative values | |
266 yn = boxcox_transf(x - minalpha,lambda); | |
267 %plot(alpha,lse(yn,lambda,x)) | |
268 if dist(yn,lambda,x)<d | |
269 x = minalpha; | |
270 resalpha = resalpha - minalpha; | |
271 y = yn; | |
272 alpha = alpha*.999; | |
273 d = dist(y,lambda,x); | |
274 else | |
275 alpha = alpha/2; | |
276 end | |
277 end | |
278 end | |
279 %plot(lambda,dist(y,lambda,x),'.r') | |
280 | |
281 | |
282 function y = boxcox_transf(x,lambda) | |
283 lambda0 = find(~lambda); | |
284 if min(x)<0 | |
285 x = x-min(x); | |
286 end | |
287 x = repmat(x,[1 length(lambda)]); | |
288 lambda = repmat(lambda,[size(x,1) 1]); | |
289 y = (x.^lambda-1)./lambda; | |
290 y(:,lambda0) = log(x(:,lambda0)); | |
291 | |
292 | |
293 function [z,lambda,d] = optimize(y,lambda,dist,x) | |
294 [d index] = min(dist(y,lambda,x)); | |
295 lambda = lambda(index); | |
296 z = y(:,index); | |
297 | |
298 | |
299 function d = lse(y,lambda,x) | |
300 d = skewness(y).^2 + (kurtosis(y)-3).^2/4; % equation used in Jarque-Bera test | |
301 %plot(lambda,d,'.') | |
302 | |
303 | |
304 function d = mle(y,lambda,x) | |
305 d = -size(y,1)/2*log(var(y))+(lambda-1)*sum(log(x)); |