Daniel@0: function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta) Daniel@0: % mirmap(predictors_file, ratings_file) performs a statistical mapping Daniel@0: % between ratings associated to a set of audio recordings, and a set of Daniel@0: % variables computed for the same set of audio recordings. It might be Daniel@0: % useful in particular when the set of predictors is large and diverse. Daniel@0: Daniel@0: if nargin<3 Daniel@0: dist = 'lse'; Daniel@0: end Daniel@0: if nargin<4 Daniel@0: alpha = 1; Daniel@0: end Daniel@0: if nargin<5 Daniel@0: delta = .00001; Daniel@0: end Daniel@0: Daniel@0: crosscor_thres = .6; Daniel@0: Daniel@0: dist = str2func(dist); Daniel@0: Daniel@0: predics = import(predics_ref); Daniel@0: ratings = import(ratings_ref); Daniel@0: Daniel@0: predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref); Daniel@0: if isempty(predics.data) Daniel@0: res = []; Daniel@0: return Daniel@0: end Daniel@0: Daniel@0: ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref); Daniel@0: Daniel@0: res = map(predics,ratings,crosscor_thres); Daniel@0: Daniel@0: Daniel@0: %% Daniel@0: function output = import(ref) Daniel@0: output.data = []; Daniel@0: output.fields = {}; Daniel@0: output.normal = []; Daniel@0: if not(iscell(ref)) Daniel@0: ref = {ref}; Daniel@0: end Daniel@0: for i = 1:length(ref) Daniel@0: if not(iscell(ref{i})) Daniel@0: ref{i} = {ref{i}}; Daniel@0: end Daniel@0: if ischar(ref{i}{1}) Daniel@0: for j = 1:length(ref{i}) Daniel@0: importj = importdata(ref{i}{j}); Daniel@0: if not(isstruct(importj)) Daniel@0: imp = importj; Daniel@0: importj = struct; Daniel@0: importj.data = imp; Daniel@0: for k = 1:size(imp,2) Daniel@0: importj.textdata{k} = [ref{i}{j},'_',num2str(k)]; Daniel@0: end Daniel@0: end Daniel@0: if j == 1 Daniel@0: import = importj; Daniel@0: else Daniel@0: [c ia ib] = intersect(import.textdata(1,2:end),... Daniel@0: importj.textdata(1,2:end)); Daniel@0: import.textdata = [import.textdata(:,[1 ia+1]);... Daniel@0: importj.textdata(2:end,[1 ib+1])]; Daniel@0: import.data = [import.data(:,ia);importj.data(:,[1 ib+1])]; Daniel@0: end Daniel@0: end Daniel@0: output.data = [output.data import.data]; Daniel@0: nbfields = size(import.data,2); Daniel@0: field = import.textdata(1,:); Daniel@0: field = field(end-nbfields+1 : end); Daniel@0: output.fields = [output.fields field]; Daniel@0: else Daniel@0: output.data = [output.data ref{i}{1}]; Daniel@0: for j = 1:size(ref{i},2) Daniel@0: output.fields = [output.fields num2str(j)]; Daniel@0: end Daniel@0: end Daniel@0: end Daniel@0: ndata = size(output.data,2); Daniel@0: output.normal = NaN(1,ndata); Daniel@0: warning('off','stats:lillietest:OutOfRangeP'); Daniel@0: for i = 1:ndata Daniel@0: data = output.data(:,i); Daniel@0: data = data(~isnan(data)); Daniel@0: if length(data) < 4 Daniel@0: output.normal(i) = 0; Daniel@0: else Daniel@0: output.normal(i) = ~lillietest(data); Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: Daniel@0: %% Daniel@0: function res = map(predics,ratings,crosscor_thres) Daniel@0: nbrates = size(ratings.data,2); Daniel@0: nbpreds = size(predics.data,2); Daniel@0: res.normal.cor = NaN(nbpreds,nbrates); Daniel@0: res.normal.pval = NaN(nbpreds,nbrates); Daniel@0: Daniel@0: res.significant = cell(nbrates,1); Daniel@0: for j = 1:nbrates Daniel@0: figure Daniel@0: set(gca,'YDir','reverse') Daniel@0: hold on Daniel@0: select = []; % list of selected features Daniel@0: for i = 1:nbpreds Daniel@0: if ~predics.normal(i) Daniel@0: res.normal.cor(i,j) = NaN; Daniel@0: continue Daniel@0: end Daniel@0: [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),... Daniel@0: 'rows','pairwise'); Daniel@0: res.normal.pval(i,j) = P(2); Daniel@0: if P(2)<.05 % statistically significant Daniel@0: select(end+1) = i; % included into the list of selected features Daniel@0: res.normal.cor(i,j) = R(2); Daniel@0: else Daniel@0: res.normal.cor(i,j) = NaN; Daniel@0: end Daniel@0: end Daniel@0: %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise'); Daniel@0: Daniel@0: [unused index] = sort(abs(res.normal.cor(select,j)),'descend'); Daniel@0: index = select(index); Daniel@0: res.significant{j}.cor = res.normal.cor(index,j); Daniel@0: res.significant{j}.pval = res.normal.pval(index,j); Daniel@0: res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise'); Daniel@0: %res.inter(index,index,j); Daniel@0: res.significant{j}.fields = {predics.fields{index}}; Daniel@0: res.significant{j}.alpha = predics.alpha(index); Daniel@0: res.significant{j}.lambda = predics.lambda(index); Daniel@0: k = 0; Daniel@0: corfields = {}; Daniel@0: corbest = []; Daniel@0: for i = 1:length(select) Daniel@0: inter = max(abs(res.significant{j}.inter(1:i-1,i))); Daniel@0: if i == 1 || inter < crosscor_thres Daniel@0: % Features sufficiently independent to the better scoring ones Daniel@0: % are selected Daniel@0: k = k+1; Daniel@0: if i == 1 Daniel@0: own = 1; Daniel@0: else Daniel@0: own = 1-inter; Daniel@0: end Daniel@0: col = res.significant{j}.pval(i); Daniel@0: x1 = min(0,res.significant{j}.cor(i)); Daniel@0: y1 = k-own/2; Daniel@0: x2 = x1+abs(res.significant{j}.cor(i)); Daniel@0: y2 = k+own/2; Daniel@0: pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col]) Daniel@0: corbest = [corbest i]; Daniel@0: corfields = [corfields res.significant{j}.fields{i}]; Daniel@0: i Daniel@0: res.significant{j}.fields{i} Daniel@0: res.significant{j}.cor(i) Daniel@0: inter Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: set(gca,'YTick',1:k,'YTickLabel',corfields)%,... Daniel@0: % 'Position',[.4 .11 .5 .815]) Daniel@0: colorbar Daniel@0: grid on Daniel@0: title(['Correlation with rating ',ratings.fields{j}]) Daniel@0: Daniel@0: predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop Daniel@0: res.best{j}.index = corbest; Daniel@0: res.best{j}.fields = {res.significant{j}.fields{corbest}}; Daniel@0: res.best{j}.alpha = res.significant{j}.alpha(corbest); Daniel@0: res.best{j}.lambda = res.significant{j}.lambda(corbest); Daniel@0: if isempty(select) Daniel@0: close Daniel@0: elseif length(corbest)>1 Daniel@0: display('****************************************') Daniel@0: display(['Regression for rating ',ratings.fields{j}]) Daniel@0: ratej = standardize(ratings.data(:,j)); Daniel@0: stepwisefit(predata,ratej,'maxiter',10); Daniel@0: [b,se,pval,inmodel,stats,nextstep,history] ... Daniel@0: = stepwisefit(predata,ratej,'maxiter',10,'scale','on'); Daniel@0: res.best{j}.fields{find(inmodel)} Daniel@0: %stepwise(predata,ratej,[]);%,.001,.001); Daniel@0: pause Daniel@0: end Daniel@0: end Daniel@0: Daniel@0: function x = standardize(x) Daniel@0: %mx = mean(x); Daniel@0: %x = x-repmat(mx,[size(x,1) 1]); Daniel@0: %sx = std(x); Daniel@0: %sx(sx==0) = 1; Daniel@0: %x = x./repmat(sx,[size(x,1) 1]); Daniel@0: Daniel@0: Daniel@0: %% Daniel@0: function output = normalize(input,dist,alpha,delta,txt,filename) Daniel@0: fields = input.fields; Daniel@0: nbfields = length(fields); Daniel@0: output = input; Daniel@0: h = waitbar(0,['Normalizing ',txt]); Daniel@0: for i = 1:nbfields Daniel@0: waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]); Daniel@0: d = input.data(:,i); Daniel@0: if length(d(~isnan(d)))>3 && ~input.normal(i) Daniel@0: %figure(1) Daniel@0: %hist(d,round(7.5*log(size(d,1)))); Daniel@0: %figure(2) Daniel@0: %normplot(d) Daniel@0: Daniel@0: [output.data(:,i) output.alpha(i) output.lambda(i)] = ... Daniel@0: boxcox_search(d,dist,alpha,delta); Daniel@0: %figure(3) Daniel@0: %hist(output.data(:,i),round(7.5*log(size(d,1)))); Daniel@0: output.normal(i) = ~lillietest(output.data(:,i),.01); Daniel@0: %figure(4) Daniel@0: %normplot(data(:,i)) Daniel@0: end Daniel@0: end Daniel@0: display('Excluded:') Daniel@0: output.fields{~output.normal} Daniel@0: display('..') Daniel@0: output.data(:,~output.normal) = []; Daniel@0: output.fields(~output.normal) = []; Daniel@0: output.normal(~output.normal) = []; Daniel@0: if max(~output.normal) Daniel@0: output.alpha(~output.normal) = []; Daniel@0: output.lambda(~output.normal) = []; Daniel@0: end Daniel@0: %if strcmpi(filename(end-3:end),'.txt') Daniel@0: % filename = filename(1:end-4); Daniel@0: %end Daniel@0: %filename = [filename '.mir.txt']; Daniel@0: %mirexport(filename,output); Daniel@0: close(h); Daniel@0: Daniel@0: Daniel@0: function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta) Daniel@0: if nargin<2 Daniel@0: dist = @lse; Daniel@0: end Daniel@0: if nargin<3 Daniel@0: alpha = 0; Daniel@0: end Daniel@0: lambda = -10:.01:10; Daniel@0: if min(x)<0 Daniel@0: x = x-min(x); Daniel@0: resalpha = -min(x); Daniel@0: else Daniel@0: resalpha = 0; Daniel@0: end Daniel@0: y = boxcox_transf(x,lambda); Daniel@0: [y lambda d] = optimize(y,lambda,dist,x); Daniel@0: %y = y(~isnan(y)); Daniel@0: while lillietest(y,.01) && alpha > delta Daniel@0: yp = boxcox_transf(x+alpha,lambda); Daniel@0: %plot(alpha,lse(yp,lambda,x)) Daniel@0: if dist(yp,lambda,x)