Mercurial > hg > camir-aes2014
view toolboxes/MIRtoolbox1.3.2/MIRToolbox/mirmap.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line source
function res = mirmap(predics_ref,ratings_ref,dist,alpha,delta) % mirmap(predictors_file, ratings_file) performs a statistical mapping % between ratings associated to a set of audio recordings, and a set of % variables computed for the same set of audio recordings. It might be % useful in particular when the set of predictors is large and diverse. if nargin<3 dist = 'lse'; end if nargin<4 alpha = 1; end if nargin<5 delta = .00001; end crosscor_thres = .6; dist = str2func(dist); predics = import(predics_ref); ratings = import(ratings_ref); predics = normalize(predics,dist,alpha,delta,'predictions',predics_ref); if isempty(predics.data) res = []; return end ratings = normalize(ratings,dist,alpha,delta,'ratings',ratings_ref); res = map(predics,ratings,crosscor_thres); %% function output = import(ref) output.data = []; output.fields = {}; output.normal = []; if not(iscell(ref)) ref = {ref}; end for i = 1:length(ref) if not(iscell(ref{i})) ref{i} = {ref{i}}; end if ischar(ref{i}{1}) for j = 1:length(ref{i}) importj = importdata(ref{i}{j}); if not(isstruct(importj)) imp = importj; importj = struct; importj.data = imp; for k = 1:size(imp,2) importj.textdata{k} = [ref{i}{j},'_',num2str(k)]; end end if j == 1 import = importj; else [c ia ib] = intersect(import.textdata(1,2:end),... importj.textdata(1,2:end)); import.textdata = [import.textdata(:,[1 ia+1]);... importj.textdata(2:end,[1 ib+1])]; import.data = [import.data(:,ia);importj.data(:,[1 ib+1])]; end end output.data = [output.data import.data]; nbfields = size(import.data,2); field = import.textdata(1,:); field = field(end-nbfields+1 : end); output.fields = [output.fields field]; else output.data = [output.data ref{i}{1}]; for j = 1:size(ref{i},2) output.fields = [output.fields num2str(j)]; end end end ndata = size(output.data,2); output.normal = NaN(1,ndata); warning('off','stats:lillietest:OutOfRangeP'); for i = 1:ndata data = output.data(:,i); data = data(~isnan(data)); if length(data) < 4 output.normal(i) = 0; else output.normal(i) = ~lillietest(data); end end %% function res = map(predics,ratings,crosscor_thres) nbrates = size(ratings.data,2); nbpreds = size(predics.data,2); res.normal.cor = NaN(nbpreds,nbrates); res.normal.pval = NaN(nbpreds,nbrates); res.significant = cell(nbrates,1); for j = 1:nbrates figure set(gca,'YDir','reverse') hold on select = []; % list of selected features for i = 1:nbpreds if ~predics.normal(i) res.normal.cor(i,j) = NaN; continue end [R P] = corrcoef(predics.data(:,i),ratings.data(:,j),... 'rows','pairwise'); res.normal.pval(i,j) = P(2); if P(2)<.05 % statistically significant select(end+1) = i; % included into the list of selected features res.normal.cor(i,j) = R(2); else res.normal.cor(i,j) = NaN; end end %res.inter(:,:,j) = corrcoef(predics.data,'rows','pairwise'); [unused index] = sort(abs(res.normal.cor(select,j)),'descend'); index = select(index); res.significant{j}.cor = res.normal.cor(index,j); res.significant{j}.pval = res.normal.pval(index,j); res.significant{j}.inter = corrcoef(predics.data(:,index),'rows','pairwise'); %res.inter(index,index,j); res.significant{j}.fields = {predics.fields{index}}; res.significant{j}.alpha = predics.alpha(index); res.significant{j}.lambda = predics.lambda(index); k = 0; corfields = {}; corbest = []; for i = 1:length(select) inter = max(abs(res.significant{j}.inter(1:i-1,i))); if i == 1 || inter < crosscor_thres % Features sufficiently independent to the better scoring ones % are selected k = k+1; if i == 1 own = 1; else own = 1-inter; end col = res.significant{j}.pval(i); x1 = min(0,res.significant{j}.cor(i)); y1 = k-own/2; x2 = x1+abs(res.significant{j}.cor(i)); y2 = k+own/2; pcolor([x1 x1;x2 x2],[y1 y2;y1 y2],[col col;col col]) corbest = [corbest i]; corfields = [corfields res.significant{j}.fields{i}]; i res.significant{j}.fields{i} res.significant{j}.cor(i) inter end end set(gca,'YTick',1:k,'YTickLabel',corfields)%,... % 'Position',[.4 .11 .5 .815]) colorbar grid on title(['Correlation with rating ',ratings.fields{j}]) predata = standardize(predics.data(:,index(corbest))); %% to be put out of the loop res.best{j}.index = corbest; res.best{j}.fields = {res.significant{j}.fields{corbest}}; res.best{j}.alpha = res.significant{j}.alpha(corbest); res.best{j}.lambda = res.significant{j}.lambda(corbest); if isempty(select) close elseif length(corbest)>1 display('****************************************') display(['Regression for rating ',ratings.fields{j}]) ratej = standardize(ratings.data(:,j)); stepwisefit(predata,ratej,'maxiter',10); [b,se,pval,inmodel,stats,nextstep,history] ... = stepwisefit(predata,ratej,'maxiter',10,'scale','on'); res.best{j}.fields{find(inmodel)} %stepwise(predata,ratej,[]);%,.001,.001); pause end end function x = standardize(x) %mx = mean(x); %x = x-repmat(mx,[size(x,1) 1]); %sx = std(x); %sx(sx==0) = 1; %x = x./repmat(sx,[size(x,1) 1]); %% function output = normalize(input,dist,alpha,delta,txt,filename) fields = input.fields; nbfields = length(fields); output = input; h = waitbar(0,['Normalizing ',txt]); for i = 1:nbfields waitbar((i-1)/nbfields,h,['Normalizing ',txt,fields{i}]); d = input.data(:,i); if length(d(~isnan(d)))>3 && ~input.normal(i) %figure(1) %hist(d,round(7.5*log(size(d,1)))); %figure(2) %normplot(d) [output.data(:,i) output.alpha(i) output.lambda(i)] = ... boxcox_search(d,dist,alpha,delta); %figure(3) %hist(output.data(:,i),round(7.5*log(size(d,1)))); output.normal(i) = ~lillietest(output.data(:,i),.01); %figure(4) %normplot(data(:,i)) end end display('Excluded:') output.fields{~output.normal} display('..') output.data(:,~output.normal) = []; output.fields(~output.normal) = []; output.normal(~output.normal) = []; if max(~output.normal) output.alpha(~output.normal) = []; output.lambda(~output.normal) = []; end %if strcmpi(filename(end-3:end),'.txt') % filename = filename(1:end-4); %end %filename = [filename '.mir.txt']; %mirexport(filename,output); close(h); function [y resalpha lambda] = boxcox_search(x,dist,alpha,delta) if nargin<2 dist = @lse; end if nargin<3 alpha = 0; end lambda = -10:.01:10; if min(x)<0 x = x-min(x); resalpha = -min(x); else resalpha = 0; end y = boxcox_transf(x,lambda); [y lambda d] = optimize(y,lambda,dist,x); %y = y(~isnan(y)); while lillietest(y,.01) && alpha > delta yp = boxcox_transf(x+alpha,lambda); %plot(alpha,lse(yp,lambda,x)) if dist(yp,lambda,x)<d x = x+alpha; resalpha = resalpha + alpha; y = yp; alpha = alpha*.999; d = dist(y,lambda,x); else minalpha = min(alpha,min(x)); % to avoid negative values yn = boxcox_transf(x - minalpha,lambda); %plot(alpha,lse(yn,lambda,x)) if dist(yn,lambda,x)<d x = minalpha; resalpha = resalpha - minalpha; y = yn; alpha = alpha*.999; d = dist(y,lambda,x); else alpha = alpha/2; end end end %plot(lambda,dist(y,lambda,x),'.r') function y = boxcox_transf(x,lambda) lambda0 = find(~lambda); if min(x)<0 x = x-min(x); end x = repmat(x,[1 length(lambda)]); lambda = repmat(lambda,[size(x,1) 1]); y = (x.^lambda-1)./lambda; y(:,lambda0) = log(x(:,lambda0)); function [z,lambda,d] = optimize(y,lambda,dist,x) [d index] = min(dist(y,lambda,x)); lambda = lambda(index); z = y(:,index); function d = lse(y,lambda,x) d = skewness(y).^2 + (kurtosis(y)-3).^2/4; % equation used in Jarque-Bera test %plot(lambda,d,'.') function d = mle(y,lambda,x) d = -size(y,1)/2*log(var(y))+(lambda-1)*sum(log(x));