view core/magnatagatune/tests_evals/rbm_subspace/Exp_template.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line source
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Experiment code templat                                                 %
% Project: sub-euclidean distance for music similarity                    %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Load features
feature_file = 'rel_music_raw_features.mat';
vars = whos('-file', feature_file);
A = load(feature_file,vars(1).name,vars(2).name,vars(3).name,vars(4).name);
raw_features = A.(vars(1).name);
indices      = A.(vars(2).name);
tst_inx      = A.(vars(3).name);
trn_inx      = A.(vars(4).name);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define directory to save parameters & results
% dir    = '/home/funzi/Documents/';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dmr    = [0 5 10 20 30 50];    % dimension reduction by PCA
ws     = [0 5 10 20 30 50 70]; % window size
% parameters of rbm (if it is used for extraction)
hidNum = [30 50 100 500];
lr_1   = [0.05 0.1 0.5];
lr_2   = [0.1 0.5 0.7];
mmt    = [0.02 0.05 0.1];
cost   = [0.00002 0.01 0.1];

%% Select parameters (if grid-search is not applied)
di  = 1;
wi  = 1;
hi  = 1;
l1i = 1;
l2i = 1;
mi  = 1;
ci  = 1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% If grid search is define
% log_file = strcat(dir,'exp_.mat');
% inx = resume_from_grid(log_file,8);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Feature extraction
EXT_TYPE = 2;
switch (EXT_TYPE)
    case 1  % Using PCA
        assert(~exist('OCTAVE_VERSION'),'This script cannot run in octave');
        coeff = princomp(raw_features);
        coeff = coeff(:,1:end-dmr(di));  % Change value of dmr(di) to reduce the dimensionality
        features = raw_features*coeff;
          % normalizing
        mm = minmax(features')';
        inn= (find(mm(1,:)~=mm(2,:)));
        mm = mm(:,inn);
        features = features(:,inn);
        features = (features-repmat(mm(1,:),size(features,1),1))./(repmat(mm(2,:),size(features,1),1)-repmat(mm(1,:),size(features,1),1));
    case 2  % Using rbm
        conf.hidNum = hidNum(hi);
        conf.eNum   = 100;
        conf.sNum   = size(raw_features,1);
        conf.bNum   = 1;
        conf.gNum   = 1;
        conf.params = [lr_1(l1i) lr_2(l2i) mmt(mi) cost(ci)];
        conf.N    = 50;
        conf.MAX_INC = 10;
        W1 = zeros(0,0);
        [W1 vB1 hB1] = training_rbm_(conf,W1,raw_features);
        features = raw_features*W1 + repmat(hB1,conf.sNum,1);
end

%% Sub-euclidean computation
num_case = size(trn_inx,1);
trnd_12 = cell(1,num_case);
trnd_13 = cell(1,num_case);
tstd_12 = cell(1,num_case);
tstd_13 = cell(1,num_case);

w = ws(wi);

% w = subspace window size
if w == 0 % trnd_12 = d(a,b) , trnd_13= d(a,c) 
  for i = 1:num_case % over all cross-validation folds (num_case)
    [trnd_12{i} trnd_13{i}] = simple_dist(trn_inx{i},features,indices);
    [tstd_12{i} tstd_13{i}] = simple_dist(tst_inx{i},features,indices);
  end
else
  for i = 1:num_case % for w > 1
    [trnd_12{i} trnd_13{i}] = conv_euclidean_dist(trn_inx{i},features,indices,w,1);    %% normalize is better than no normalize  
    [tstd_12{i} tstd_13{i}] = conv_euclidean_dist(tst_inx{i},features,indices,w,1);
  end
end
%% Data preparation
trn_dat1 = cell(1,num_case);
trn_dat2 = cell(1,num_case);
tst_dat1 = cell(1,num_case);
tst_dat2 = cell(1,num_case);

for i=1:num_case 
 %=> Compute hypothesis
 trn_dat1{i}  = trnd_13{i} - trnd_12{i};
 trn_dat2{i}  = trnd_12{i} - trnd_13{i};
 tst_dat1{i}   = tstd_13{i} - tstd_12{i};
 tst_dat2{i}   = tstd_12{i} - tstd_13{i};
 
 % => Normalize using logistic (lost the range)
%  trn_dat1{i} = logistic(trn_dat1{i});
%  trn_dat2{i} = logistic(trn_dat2{i});
%  tst_dat1{i} = logistic(tst_dat1{i});
%  tst_dat2{i} = logistic(tst_dat2{i});
 
 %=> Normalize using min-max
%  mm = minmax(trn_dat1{i}')';
%  inn= find(mm(1,:)~=mm(2,:));
%  mm = mm(:,inn);    
%  trn_dat1{i} =
%  (trn_dat1{i}(:,inn)-repmat(mm(1,:),size(trn_dat1{i},1),1))./repmat(mm(2,:)-mm(1,:),size(trn_dat1{i},1),1);
%  mm = minmax(tst_dat1{i}')';
%  inn= find(mm(1,:)~=mm(2,:));
%  mm = mm(:,inn);    
%  tst_dat1{i} = (tst_dat1{i}(:,inn)-repmat(mm(1,:),size(tst_dat1{i},1),1))./repmat(mm(2,:)-mm(1,:),size(tst_dat1{i},1),1);
%  
%  mm = minmax(trn_dat2{i}')';
%  inn= find(mm(1,:)~=mm(2,:));
%  mm = mm(:,inn);    
%  trn_dat2{i} = (trn_dat2{i}(:,inn)-repmat(mm(1,:),size(trn_dat2{i},1),1))./repmat(mm(2,:)-mm(1,:),size(trn_dat2{i},1),1);

%  mm = minmax(tst_dat2{i}')';
%  inn= find(mm(1,:)~=mm(2,:));
%  mm = mm(:,inn);    
%  tst_dat2{i} = (tst_dat2{i}(:,inn)-repmat(mm(1,:),size(tst_dat2{i},1),1))./repmat(mm(2,:)-mm(1,:),size(tst_dat2{i},1),1);

 % => normalize from [-1 1] to [0 1]
 trn_dat1{i} = (trn_dat1{i}+1)/2;
 trn_dat2{i} = (trn_dat2{i}+1)/2;
 tst_dat1{i} = (tst_dat1{i}+1)/2;
 tst_dat2{i} = (tst_dat2{i}+1)/2;
end
correct = 0;   % correct rate
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CODE HERE                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

fprintf('Correct = %f\n',correct);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Using the logging function to save paramters
% and the result for plotting or in grid search
% logging(log_file,[i1 i2 i3 i4 i5 correct]);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear;