wolffd@0
|
1 function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta)
|
wolffd@0
|
2 % mirmap(predictors_file, ratings_file) performs a statistical mapping
|
wolffd@0
|
3 % between ratings associated to a set of audio recordings, and a set of
|
wolffd@0
|
4 % variables computed for the same set of audio recordings. It might be
|
wolffd@0
|
5 % useful in particular when the set of predictors is large and diverse.
|
wolffd@0
|
6
|
wolffd@0
|
7 if nargin<3
|
wolffd@0
|
8 dist = 'lse';
|
wolffd@0
|
9 end
|
wolffd@0
|
10 if nargin<4
|
wolffd@0
|
11 alpha = 1;
|
wolffd@0
|
12 end
|
wolffd@0
|
13 if nargin<5
|
wolffd@0
|
14 delta = .00001;
|
wolffd@0
|
15 end
|
wolffd@0
|
16
|
wolffd@0
|
17 crosscor_thres = .6;
|
wolffd@0
|
18
|
wolffd@0
|
19 dist = str2func(dist);
|
wolffd@0
|
20
|
wolffd@0
|
21 predics = import(predics_ref);
|
wolffd@0
|
22 ratings = import(ratings_ref);
|
wolffd@0
|
23
|
wolffd@0
|
24 predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref);
|
wolffd@0
|
25 if isempty(predics.data)
|
wolffd@0
|
26 res = [];
|
wolffd@0
|
27 return
|
wolffd@0
|
28 end
|
wolffd@0
|
29
|
wolffd@0
|
30 ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref);
|
wolffd@0
|
31
|
wolffd@0
|
32 res = map(predics,ratings,crosscor_thres);
|
wolffd@0
|
33
|
wolffd@0
|
34
|
wolffd@0
|
35 %%
|
wolffd@0
|
36 function output = import(ref)
|
wolffd@0
|
37 output.data = [];
|
wolffd@0
|
38 output.fields = {};
|
wolffd@0
|
39 output.normal = [];
|
wolffd@0
|
40 if not(iscell(ref))
|
wolffd@0
|
41 ref = {ref};
|
wolffd@0
|
42 end
|
wolffd@0
|
43 for i = 1:length(ref)
|
wolffd@0
|
44 if not(iscell(ref{i}))
|
wolffd@0
|
45 ref{i} = {ref{i}};
|
wolffd@0
|
46 end
|
wolffd@0
|
47 if ischar(ref{i}{1})
|
wolffd@0
|
48 for j = 1:length(ref{i})
|
wolffd@0
|
49 importj = importdata(ref{i}{j});
|
wolffd@0
|
50 if not(isstruct(importj))
|
wolffd@0
|
51 imp = importj;
|
wolffd@0
|
52 importj = struct;
|
wolffd@0
|
53 importj.data = imp;
|
wolffd@0
|
54 for k = 1:size(imp,2)
|
wolffd@0
|
55 importj.textdata{k} = [ref{i}{j},'_',num2str(k)];
|
wolffd@0
|
56 end
|
wolffd@0
|
57 end
|
wolffd@0
|
58 if j == 1
|
wolffd@0
|
59 import = importj;
|
wolffd@0
|
60 else
|
wolffd@0
|
61 [c ia ib] = intersect(import.textdata(1,2:end),...
|
wolffd@0
|
62 importj.textdata(1,2:end));
|
wolffd@0
|
63 import.textdata = [import.textdata(:,[1 ia+1]);...
|
wolffd@0
|
64 importj.textdata(2:end,[1 ib+1])];
|
wolffd@0
|
65 import.data = [import.data(:,ia);importj.data(:,[1 ib+1])];
|
wolffd@0
|
66 end
|
wolffd@0
|
67 end
|
wolffd@0
|
68 output.data = [output.data import.data];
|
wolffd@0
|
69 nbfields = size(import.data,2);
|
wolffd@0
|
70 field = import.textdata(1,:);
|
wolffd@0
|
71 field = field(end-nbfields+1 : end);
|
wolffd@0
|
72 output.fields = [output.fields field];
|
wolffd@0
|
73 else
|
wolffd@0
|
74 output.data = [output.data ref{i}{1}];
|
wolffd@0
|
75 for j = 1:size(ref{i},2)
|
wolffd@0
|
76 output.fields = [output.fields num2str(j)];
|
wolffd@0
|
77 end
|
wolffd@0
|
78 end
|
wolffd@0
|
79 end
|
wolffd@0
|
80 ndata = size(output.data,2);
|
wolffd@0
|
81 output.normal = NaN(1,ndata);
|
wolffd@0
|
82 warning('off','stats:lillietest:OutOfRangeP');
|
wolffd@0
|
83 for i = 1:ndata
|
wolffd@0
|
84 data = output.data(:,i);
|
wolffd@0
|
85 data = data(~isnan(data));
|
wolffd@0
|
86 if length(data) < 4
|
wolffd@0
|
87 output.normal(i) = 0;
|
wolffd@0
|
88 else
|
wolffd@0
|
89 output.normal(i) = ~lillietest(data);
|
wolffd@0
|
90 end
|
wolffd@0
|
91 end
|
wolffd@0
|
92
|
wolffd@0
|
93
|
wolffd@0
|
94 %%
|
wolffd@0
|
95 function res = map(predics,ratings,crosscor_thres)
|
wolffd@0
|
96 nbrates = size(ratings.data,2);
|
wolffd@0
|
97 nbpreds = size(predics.data,2);
|
wolffd@0
|
98 res.normal.cor = NaN(nbpreds,nbrates);
|
wolffd@0
|
99 res.normal.pval = NaN(nbpreds,nbrates);
|
wolffd@0
|
100
|
wolffd@0
|
101 res.significant = cell(nbrates,1);
|
wolffd@0
|
102 for j = 1:nbrates
|
wolffd@0
|
103 figure
|
wolffd@0
|
104 set(gca,'YDir','reverse')
|
wolffd@0
|
105 hold on
|
wolffd@0
|
106 select = []; % list of selected features
|
wolffd@0
|
107 for i = 1:nbpreds
|
wolffd@0
|
108 if ~predics.normal(i)
|
wolffd@0
|
109 res.normal.cor(i,j) = NaN;
|
wolffd@0
|
110 continue
|
wolffd@0
|
111 end
|
wolffd@0
|
112 [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),...
|
wolffd@0
|
113 'rows','pairwise');
|
wolffd@0
|
114 res.normal.pval(i,j) = P(2);
|
wolffd@0
|
115 if P(2)<.05 % statistically significant
|
wolffd@0
|
116 select(end+1) = i; % included into the list of selected features
|
wolffd@0
|
117 res.normal.cor(i,j) = R(2);
|
wolffd@0
|
118 else
|
wolffd@0
|
119 res.normal.cor(i,j) = NaN;
|
wolffd@0
|
120 end
|
wolffd@0
|
121 end
|
wolffd@0
|
122 %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise');
|
wolffd@0
|
123
|
wolffd@0
|
124 [unused index] = sort(abs(res.normal.cor(select,j)),'descend');
|
wolffd@0
|
125 index = select(index);
|
wolffd@0
|
126 res.significant{j}.cor = res.normal.cor(index,j);
|
wolffd@0
|
127 res.significant{j}.pval = res.normal.pval(index,j);
|
wolffd@0
|
128 res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise');
|
wolffd@0
|
129 %res.inter(index,index,j);
|
wolffd@0
|
130 res.significant{j}.fields = {predics.fields{index}};
|
wolffd@0
|
131 res.significant{j}.alpha = predics.alpha(index);
|
wolffd@0
|
132 res.significant{j}.lambda = predics.lambda(index);
|
wolffd@0
|
133 k = 0;
|
wolffd@0
|
134 corfields = {};
|
wolffd@0
|
135 corbest = [];
|
wolffd@0
|
136 for i = 1:length(select)
|
wolffd@0
|
137 inter = max(abs(res.significant{j}.inter(1:i-1,i)));
|
wolffd@0
|
138 if i == 1 || inter < crosscor_thres
|
wolffd@0
|
139 % Features sufficiently independent to the better scoring ones
|
wolffd@0
|
140 % are selected
|
wolffd@0
|
141 k = k+1;
|
wolffd@0
|
142 if i == 1
|
wolffd@0
|
143 own = 1;
|
wolffd@0
|
144 else
|
wolffd@0
|
145 own = 1-inter;
|
wolffd@0
|
146 end
|
wolffd@0
|
147 col = res.significant{j}.pval(i);
|
wolffd@0
|
148 x1 = min(0,res.significant{j}.cor(i));
|
wolffd@0
|
149 y1 = k-own/2;
|
wolffd@0
|
150 x2 = x1+abs(res.significant{j}.cor(i));
|
wolffd@0
|
151 y2 = k+own/2;
|
wolffd@0
|
152 pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col])
|
wolffd@0
|
153 corbest = [corbest i];
|
wolffd@0
|
154 corfields = [corfields res.significant{j}.fields{i}];
|
wolffd@0
|
155 i
|
wolffd@0
|
156 res.significant{j}.fields{i}
|
wolffd@0
|
157 res.significant{j}.cor(i)
|
wolffd@0
|
158 inter
|
wolffd@0
|
159 end
|
wolffd@0
|
160 end
|
wolffd@0
|
161
|
wolffd@0
|
162 set(gca,'YTick',1:k,'YTickLabel',corfields)%,...
|
wolffd@0
|
163 % 'Position',[.4 .11 .5 .815])
|
wolffd@0
|
164 colorbar
|
wolffd@0
|
165 grid on
|
wolffd@0
|
166 title(['Correlation with rating ',ratings.fields{j}])
|
wolffd@0
|
167
|
wolffd@0
|
168 predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop
|
wolffd@0
|
169 res.best{j}.index = corbest;
|
wolffd@0
|
170 res.best{j}.fields = {res.significant{j}.fields{corbest}};
|
wolffd@0
|
171 res.best{j}.alpha = res.significant{j}.alpha(corbest);
|
wolffd@0
|
172 res.best{j}.lambda = res.significant{j}.lambda(corbest);
|
wolffd@0
|
173 if isempty(select)
|
wolffd@0
|
174 close
|
wolffd@0
|
175 elseif length(corbest)>1
|
wolffd@0
|
176 display('****************************************')
|
wolffd@0
|
177 display(['Regression for rating ',ratings.fields{j}])
|
wolffd@0
|
178 ratej = standardize(ratings.data(:,j));
|
wolffd@0
|
179 stepwisefit(predata,ratej,'maxiter',10);
|
wolffd@0
|
180 [b,se,pval,inmodel,stats,nextstep,history] ...
|
wolffd@0
|
181 = stepwisefit(predata,ratej,'maxiter',10,'scale','on');
|
wolffd@0
|
182 res.best{j}.fields{find(inmodel)}
|
wolffd@0
|
183 %stepwise(predata,ratej,[]);%,.001,.001);
|
wolffd@0
|
184 pause
|
wolffd@0
|
185 end
|
wolffd@0
|
186 end
|
wolffd@0
|
187
|
wolffd@0
|
188 function x = standardize(x)
|
wolffd@0
|
189 %mx = mean(x);
|
wolffd@0
|
190 %x = x-repmat(mx,[size(x,1) 1]);
|
wolffd@0
|
191 %sx = std(x);
|
wolffd@0
|
192 %sx(sx==0) = 1;
|
wolffd@0
|
193 %x = x./repmat(sx,[size(x,1) 1]);
|
wolffd@0
|
194
|
wolffd@0
|
195
|
wolffd@0
|
196 %%
|
wolffd@0
|
197 function output = normalize(input,dist,alpha,delta,txt,filename)
|
wolffd@0
|
198 fields = input.fields;
|
wolffd@0
|
199 nbfields = length(fields);
|
wolffd@0
|
200 output = input;
|
wolffd@0
|
201 h = waitbar(0,['Normalizing ',txt]);
|
wolffd@0
|
202 for i = 1:nbfields
|
wolffd@0
|
203 waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]);
|
wolffd@0
|
204 d = input.data(:,i);
|
wolffd@0
|
205 if length(d(~isnan(d)))>3 && ~input.normal(i)
|
wolffd@0
|
206 %figure(1)
|
wolffd@0
|
207 %hist(d,round(7.5*log(size(d,1))));
|
wolffd@0
|
208 %figure(2)
|
wolffd@0
|
209 %normplot(d)
|
wolffd@0
|
210
|
wolffd@0
|
211 [output.data(:,i) output.alpha(i) output.lambda(i)] = ...
|
wolffd@0
|
212 boxcox_search(d,dist,alpha,delta);
|
wolffd@0
|
213 %figure(3)
|
wolffd@0
|
214 %hist(output.data(:,i),round(7.5*log(size(d,1))));
|
wolffd@0
|
215 output.normal(i) = ~lillietest(output.data(:,i),.01);
|
wolffd@0
|
216 %figure(4)
|
wolffd@0
|
217 %normplot(data(:,i))
|
wolffd@0
|
218 end
|
wolffd@0
|
219 end
|
wolffd@0
|
220 display('Excluded:')
|
wolffd@0
|
221 output.fields{~output.normal}
|
wolffd@0
|
222 display('..')
|
wolffd@0
|
223 output.data(:,~output.normal) = [];
|
wolffd@0
|
224 output.fields(~output.normal) = [];
|
wolffd@0
|
225 output.normal(~output.normal) = [];
|
wolffd@0
|
226 if max(~output.normal)
|
wolffd@0
|
227 output.alpha(~output.normal) = [];
|
wolffd@0
|
228 output.lambda(~output.normal) = [];
|
wolffd@0
|
229 end
|
wolffd@0
|
230 %if strcmpi(filename(end-3:end),'.txt')
|
wolffd@0
|
231 % filename = filename(1:end-4);
|
wolffd@0
|
232 %end
|
wolffd@0
|
233 %filename = [filename '.mir.txt'];
|
wolffd@0
|
234 %mirexport(filename,output);
|
wolffd@0
|
235 close(h);
|
wolffd@0
|
236
|
wolffd@0
|
237
|
wolffd@0
|
238 function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta)
|
wolffd@0
|
239 if nargin<2
|
wolffd@0
|
240 dist = @lse;
|
wolffd@0
|
241 end
|
wolffd@0
|
242 if nargin<3
|
wolffd@0
|
243 alpha = 0;
|
wolffd@0
|
244 end
|
wolffd@0
|
245 lambda = -10:.01:10;
|
wolffd@0
|
246 if min(x)<0
|
wolffd@0
|
247 x = x-min(x);
|
wolffd@0
|
248 resalpha = -min(x);
|
wolffd@0
|
249 else
|
wolffd@0
|
250 resalpha = 0;
|
wolffd@0
|
251 end
|
wolffd@0
|
252 y = boxcox_transf(x,lambda);
|
wolffd@0
|
253 [y lambda d] = optimize(y,lambda,dist,x);
|
wolffd@0
|
254 %y = y(~isnan(y));
|
wolffd@0
|
255 while lillietest(y,.01) && alpha > delta
|
wolffd@0
|
256 yp = boxcox_transf(x+alpha,lambda);
|
wolffd@0
|
257 %plot(alpha,lse(yp,lambda,x))
|
wolffd@0
|
258 if dist(yp,lambda,x)<d
|
wolffd@0
|
259 x = x+alpha;
|
wolffd@0
|
260 resalpha = resalpha + alpha;
|
wolffd@0
|
261 y = yp;
|
wolffd@0
|
262 alpha = alpha*.999;
|
wolffd@0
|
263 d = dist(y,lambda,x);
|
wolffd@0
|
264 else
|
wolffd@0
|
265 minalpha = min(alpha,min(x)); % to avoid negative values
|
wolffd@0
|
266 yn = boxcox_transf(x - minalpha,lambda);
|
wolffd@0
|
267 %plot(alpha,lse(yn,lambda,x))
|
wolffd@0
|
268 if dist(yn,lambda,x)<d
|
wolffd@0
|
269 x = minalpha;
|
wolffd@0
|
270 resalpha = resalpha - minalpha;
|
wolffd@0
|
271 y = yn;
|
wolffd@0
|
272 alpha = alpha*.999;
|
wolffd@0
|
273 d = dist(y,lambda,x);
|
wolffd@0
|
274 else
|
wolffd@0
|
275 alpha = alpha/2;
|
wolffd@0
|
276 end
|
wolffd@0
|
277 end
|
wolffd@0
|
278 end
|
wolffd@0
|
279 %plot(lambda,dist(y,lambda,x),'.r')
|
wolffd@0
|
280
|
wolffd@0
|
281
|
wolffd@0
|
282 function y = boxcox_transf(x,lambda)
|
wolffd@0
|
283 lambda0 = find(~lambda);
|
wolffd@0
|
284 if min(x)<0
|
wolffd@0
|
285 x = x-min(x);
|
wolffd@0
|
286 end
|
wolffd@0
|
287 x = repmat(x,[1 length(lambda)]);
|
wolffd@0
|
288 lambda = repmat(lambda,[size(x,1) 1]);
|
wolffd@0
|
289 y = (x.^lambda-1)./lambda;
|
wolffd@0
|
290 y(:,lambda0) = log(x(:,lambda0));
|
wolffd@0
|
291
|
wolffd@0
|
292
|
wolffd@0
|
293 function [z,lambda,d] = optimize(y,lambda,dist,x)
|
wolffd@0
|
294 [d index] = min(dist(y,lambda,x));
|
wolffd@0
|
295 lambda = lambda(index);
|
wolffd@0
|
296 z = y(:,index);
|
wolffd@0
|
297
|
wolffd@0
|
298
|
wolffd@0
|
299 function d = lse(y,lambda,x)
|
wolffd@0
|
300 d = skewness(y).^2 + (kurtosis(y)-3).^2/4; % equation used in Jarque-Bera test
|
wolffd@0
|
301 %plot(lambda,d,'.')
|
wolffd@0
|
302
|
wolffd@0
|
303
|
wolffd@0
|
304 function d = mle(y,lambda,x)
|
wolffd@0
|
305 d = -size(y,1)/2*log(var(y))+(lambda-1)*sum(log(x)); |