wolffd@0: function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta) wolffd@0: % mirmap(predictors_file, ratings_file) performs a statistical mapping wolffd@0: % between ratings associated to a set of audio recordings, and a set of wolffd@0: % variables computed for the same set of audio recordings. It might be wolffd@0: % useful in particular when the set of predictors is large and diverse. wolffd@0: wolffd@0: if nargin<3 wolffd@0: dist = 'lse'; wolffd@0: end wolffd@0: if nargin<4 wolffd@0: alpha = 1; wolffd@0: end wolffd@0: if nargin<5 wolffd@0: delta = .00001; wolffd@0: end wolffd@0: wolffd@0: crosscor_thres = .6; wolffd@0: wolffd@0: dist = str2func(dist); wolffd@0: wolffd@0: predics = import(predics_ref); wolffd@0: ratings = import(ratings_ref); wolffd@0: wolffd@0: predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref); wolffd@0: if isempty(predics.data) wolffd@0: res = []; wolffd@0: return wolffd@0: end wolffd@0: wolffd@0: ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref); wolffd@0: wolffd@0: res = map(predics,ratings,crosscor_thres); wolffd@0: wolffd@0: wolffd@0: %% wolffd@0: function output = import(ref) wolffd@0: output.data = []; wolffd@0: output.fields = {}; wolffd@0: output.normal = []; wolffd@0: if not(iscell(ref)) wolffd@0: ref = {ref}; wolffd@0: end wolffd@0: for i = 1:length(ref) wolffd@0: if not(iscell(ref{i})) wolffd@0: ref{i} = {ref{i}}; wolffd@0: end wolffd@0: if ischar(ref{i}{1}) wolffd@0: for j = 1:length(ref{i}) wolffd@0: importj = importdata(ref{i}{j}); wolffd@0: if not(isstruct(importj)) wolffd@0: imp = importj; wolffd@0: importj = struct; wolffd@0: importj.data = imp; wolffd@0: for k = 1:size(imp,2) wolffd@0: importj.textdata{k} = [ref{i}{j},'_',num2str(k)]; wolffd@0: end wolffd@0: end wolffd@0: if j == 1 wolffd@0: import = importj; wolffd@0: else wolffd@0: [c ia ib] = intersect(import.textdata(1,2:end),... wolffd@0: importj.textdata(1,2:end)); wolffd@0: import.textdata = [import.textdata(:,[1 ia+1]);... wolffd@0: importj.textdata(2:end,[1 ib+1])]; wolffd@0: import.data = [import.data(:,ia);importj.data(:,[1 ib+1])]; wolffd@0: end wolffd@0: end wolffd@0: output.data = [output.data import.data]; wolffd@0: nbfields = size(import.data,2); wolffd@0: field = import.textdata(1,:); wolffd@0: field = field(end-nbfields+1 : end); wolffd@0: output.fields = [output.fields field]; wolffd@0: else wolffd@0: output.data = [output.data ref{i}{1}]; wolffd@0: for j = 1:size(ref{i},2) wolffd@0: output.fields = [output.fields num2str(j)]; wolffd@0: end wolffd@0: end wolffd@0: end wolffd@0: ndata = size(output.data,2); wolffd@0: output.normal = NaN(1,ndata); wolffd@0: warning('off','stats:lillietest:OutOfRangeP'); wolffd@0: for i = 1:ndata wolffd@0: data = output.data(:,i); wolffd@0: data = data(~isnan(data)); wolffd@0: if length(data) < 4 wolffd@0: output.normal(i) = 0; wolffd@0: else wolffd@0: output.normal(i) = ~lillietest(data); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: wolffd@0: %% wolffd@0: function res = map(predics,ratings,crosscor_thres) wolffd@0: nbrates = size(ratings.data,2); wolffd@0: nbpreds = size(predics.data,2); wolffd@0: res.normal.cor = NaN(nbpreds,nbrates); wolffd@0: res.normal.pval = NaN(nbpreds,nbrates); wolffd@0: wolffd@0: res.significant = cell(nbrates,1); wolffd@0: for j = 1:nbrates wolffd@0: figure wolffd@0: set(gca,'YDir','reverse') wolffd@0: hold on wolffd@0: select = []; % list of selected features wolffd@0: for i = 1:nbpreds wolffd@0: if ~predics.normal(i) wolffd@0: res.normal.cor(i,j) = NaN; wolffd@0: continue wolffd@0: end wolffd@0: [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),... wolffd@0: 'rows','pairwise'); wolffd@0: res.normal.pval(i,j) = P(2); wolffd@0: if P(2)<.05 % statistically significant wolffd@0: select(end+1) = i; % included into the list of selected features wolffd@0: res.normal.cor(i,j) = R(2); wolffd@0: else wolffd@0: res.normal.cor(i,j) = NaN; wolffd@0: end wolffd@0: end wolffd@0: %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise'); wolffd@0: wolffd@0: [unused index] = sort(abs(res.normal.cor(select,j)),'descend'); wolffd@0: index = select(index); wolffd@0: res.significant{j}.cor = res.normal.cor(index,j); wolffd@0: res.significant{j}.pval = res.normal.pval(index,j); wolffd@0: res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise'); wolffd@0: %res.inter(index,index,j); wolffd@0: res.significant{j}.fields = {predics.fields{index}}; wolffd@0: res.significant{j}.alpha = predics.alpha(index); wolffd@0: res.significant{j}.lambda = predics.lambda(index); wolffd@0: k = 0; wolffd@0: corfields = {}; wolffd@0: corbest = []; wolffd@0: for i = 1:length(select) wolffd@0: inter = max(abs(res.significant{j}.inter(1:i-1,i))); wolffd@0: if i == 1 || inter < crosscor_thres wolffd@0: % Features sufficiently independent to the better scoring ones wolffd@0: % are selected wolffd@0: k = k+1; wolffd@0: if i == 1 wolffd@0: own = 1; wolffd@0: else wolffd@0: own = 1-inter; wolffd@0: end wolffd@0: col = res.significant{j}.pval(i); wolffd@0: x1 = min(0,res.significant{j}.cor(i)); wolffd@0: y1 = k-own/2; wolffd@0: x2 = x1+abs(res.significant{j}.cor(i)); wolffd@0: y2 = k+own/2; wolffd@0: pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col]) wolffd@0: corbest = [corbest i]; wolffd@0: corfields = [corfields res.significant{j}.fields{i}]; wolffd@0: i wolffd@0: res.significant{j}.fields{i} wolffd@0: res.significant{j}.cor(i) wolffd@0: inter wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: set(gca,'YTick',1:k,'YTickLabel',corfields)%,... wolffd@0: % 'Position',[.4 .11 .5 .815]) wolffd@0: colorbar wolffd@0: grid on wolffd@0: title(['Correlation with rating ',ratings.fields{j}]) wolffd@0: wolffd@0: predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop wolffd@0: res.best{j}.index = corbest; wolffd@0: res.best{j}.fields = {res.significant{j}.fields{corbest}}; wolffd@0: res.best{j}.alpha = res.significant{j}.alpha(corbest); wolffd@0: res.best{j}.lambda = res.significant{j}.lambda(corbest); wolffd@0: if isempty(select) wolffd@0: close wolffd@0: elseif length(corbest)>1 wolffd@0: display('****************************************') wolffd@0: display(['Regression for rating ',ratings.fields{j}]) wolffd@0: ratej = standardize(ratings.data(:,j)); wolffd@0: stepwisefit(predata,ratej,'maxiter',10); wolffd@0: [b,se,pval,inmodel,stats,nextstep,history] ... wolffd@0: = stepwisefit(predata,ratej,'maxiter',10,'scale','on'); wolffd@0: res.best{j}.fields{find(inmodel)} wolffd@0: %stepwise(predata,ratej,[]);%,.001,.001); wolffd@0: pause wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: function x = standardize(x) wolffd@0: %mx = mean(x); wolffd@0: %x = x-repmat(mx,[size(x,1) 1]); wolffd@0: %sx = std(x); wolffd@0: %sx(sx==0) = 1; wolffd@0: %x = x./repmat(sx,[size(x,1) 1]); wolffd@0: wolffd@0: wolffd@0: %% wolffd@0: function output = normalize(input,dist,alpha,delta,txt,filename) wolffd@0: fields = input.fields; wolffd@0: nbfields = length(fields); wolffd@0: output = input; wolffd@0: h = waitbar(0,['Normalizing ',txt]); wolffd@0: for i = 1:nbfields wolffd@0: waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]); wolffd@0: d = input.data(:,i); wolffd@0: if length(d(~isnan(d)))>3 && ~input.normal(i) wolffd@0: %figure(1) wolffd@0: %hist(d,round(7.5*log(size(d,1)))); wolffd@0: %figure(2) wolffd@0: %normplot(d) wolffd@0: wolffd@0: [output.data(:,i) output.alpha(i) output.lambda(i)] = ... wolffd@0: boxcox_search(d,dist,alpha,delta); wolffd@0: %figure(3) wolffd@0: %hist(output.data(:,i),round(7.5*log(size(d,1)))); wolffd@0: output.normal(i) = ~lillietest(output.data(:,i),.01); wolffd@0: %figure(4) wolffd@0: %normplot(data(:,i)) wolffd@0: end wolffd@0: end wolffd@0: display('Excluded:') wolffd@0: output.fields{~output.normal} wolffd@0: display('..') wolffd@0: output.data(:,~output.normal) = []; wolffd@0: output.fields(~output.normal) = []; wolffd@0: output.normal(~output.normal) = []; wolffd@0: if max(~output.normal) wolffd@0: output.alpha(~output.normal) = []; wolffd@0: output.lambda(~output.normal) = []; wolffd@0: end wolffd@0: %if strcmpi(filename(end-3:end),'.txt') wolffd@0: % filename = filename(1:end-4); wolffd@0: %end wolffd@0: %filename = [filename '.mir.txt']; wolffd@0: %mirexport(filename,output); wolffd@0: close(h); wolffd@0: wolffd@0: wolffd@0: function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta) wolffd@0: if nargin<2 wolffd@0: dist = @lse; wolffd@0: end wolffd@0: if nargin<3 wolffd@0: alpha = 0; wolffd@0: end wolffd@0: lambda = -10:.01:10; wolffd@0: if min(x)<0 wolffd@0: x = x-min(x); wolffd@0: resalpha = -min(x); wolffd@0: else wolffd@0: resalpha = 0; wolffd@0: end wolffd@0: y = boxcox_transf(x,lambda); wolffd@0: [y lambda d] = optimize(y,lambda,dist,x); wolffd@0: %y = y(~isnan(y)); wolffd@0: while lillietest(y,.01) && alpha > delta wolffd@0: yp = boxcox_transf(x+alpha,lambda); wolffd@0: %plot(alpha,lse(yp,lambda,x)) wolffd@0: if dist(yp,lambda,x)