Mercurial > hg > camir-aes2014
diff core/magnatagatune/tests_evals/do_test_rounds.m @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/magnatagatune/tests_evals/do_test_rounds.m Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,255 @@ +function [out]= do_test_rounds(trainfun, X, simdata, trainparams, fparams,... + paramhash, paramhash_train, clips) + +% --- +% DEBUG: we mix up the training set +% --- +% simdata = mixup(simdata); + +if isfield(simdata, 'clip_type'); + clip_type = simdata.clip_type; +else + clip_type = 'MTTClip'; +end + +nTestSets = size(simdata.partBinTst, 2); % num cv bins +ntrainsizes = size(simdata.partBinTrn, 2); % num increases of training + +for m = 1:ntrainsizes + + ok_train = zeros(2, nTestSets); + ok_test = zeros(2, nTestSets); + equal_test = zeros(1, nTestSets); + ok_notin_train = zeros(2, nTestSets); + +% A = cell(nTestSets,1); +% dout = cell(nTestSets,1); +% clips_train = cell(nTestSets,1); +% clips_test = cell(nTestSets,1); +% clips_notin_train = cell(nTestSets,1); +% Y_notin_train = cell(nTestSets,1); +% Ytrain = cell(nTestSets,1); +% Ytest = cell(nTestSets,1); + % parfor + for k = 1:nTestSets + + + % runlog mlr + try + + % --- + % Get the training constraints and features for this round + % --- + % DEBUG: the similarity data in Ytrain and Ytest seems correct. + [clips_train{k}, Xtrain, Ytrain{k}] ... + = get_data_compact(clips, X, simdata.partBinTrn{k,m}); + Ytest{k}={}; + + % training step + [A{k}, dout{k}] = feval(trainfun, Xtrain, Ytrain{k}, trainparams); + + % --- + % test step + % TODO: the distmeasure object could be created by the wrapper! + % --- + if isfield(dout{k},'interpreter'); + interpreter = str2func(dout{k}.interpreter); + else + % only for backward compability + % warning ('legacy implementation of dist measure'); + if isnumeric(A{k}) + % mahalanobis case + + % special delta mahalanobis + interpreter = str2func('DistMeasureMahal'); + else + % neural network case: A{k} is a neural net object + interpreter = str2func('DistMeasureGeneric'); + end + end + + if isfield(trainparams,'deltafun') + % special delta + diss = feval(interpreter,clips, A{k}, X, str2func(trainparams.deltafun), trainparams.deltafun_params); + else + % standard + % --- + % TODO: the default delta is different between + % similarity measures. except for the mahalmeasure + % this should be specified + % --- + diss = feval(interpreter, clips, A{k}, X); + end + + % test training data + [ok_train(:,k)] = metric_fulfills_ranking... + (diss, Ytrain{k}, feval(clip_type,clips_train{k})); + + % get test data + [clips_test{k}, Xtest, Ytest{k}] ... + = get_data_compact(clips, X, simdata.partBinTst{k}); + + % diss = DistMeasureMahal(feval(clip_type,clips_test{k}), A{k}, Xtest); + % test test data + [ok_test(:,k), equal_test(k)] = metric_fulfills_ranking... + (diss, Ytest{k}, feval(clip_type,clips_test{k})); + cprint(3,'%2.2f %2.2f fold performance', ok_test(:,k)); + + % --- + % extra diag for MLR + % TODO: make this wrappeable + % --- + if isequal(trainfun, @mlr_wrapper) + dout{k}.mprperf = mlr_test(A{k}, 0, Xtrain, Ytrain{k}(:,1:2), Xtest, Ytest{k}(:,1:2)) ; + end + + % --- + % this gives data for the unused training set remainders + % --- + if isfield(simdata,'partBinNoTrn') + if ~isempty(simdata.partBinNoTrn{k,m}) + [clips_notin_train{k}, X_notin_train, Y_notin_train{k}] ... + = get_data_compact(clips, X, simdata.partBinNoTrn{k,m}); + + % test unused training data + [ok_notin_train(:,k), equal_test(k)] = metric_fulfills_ranking... + (diss, Y_notin_train{k}, feval(clip_type,clips_notin_train{k})); + + % what to do if there is no data ? + else + ok_notin_train(:,k) = -1; + end + else + ok_notin_train(:,k) = -1; + end + + catch err + + % --- + % in case training or test fails + % --- + print_error(err); + + A{k} = []; + dout{k} = -1; + + ok_test(:,k) = -1; + ok_train(:,k) = -1; + ok_notin_train(:,k) = -1; + equal_test(k) = -1; + + % --- + % save feature, system and data configuration + % and indicate failure + % --- + xml_save(sprintf('runlog_%s.%s_trainparam.xml',... + paramhash, paramhash_train), trainparams); + xml_save(sprintf('runlog_%s.%s_err.xml',... + paramhash, paramhash_train), print_error(err)); + end + end + + if ~(ntrainsizes == 1) + + % save elaborate testing data + size_sum = 0; + for i = 1:nTestSets + size_sum = size_sum + size(simdata.partBinTrn{i,m}) / size(simdata.partBinTrn{i,end}); + end + size_sum = size_sum / nTestSets; + + out.inctrain.trainfrac(:, m) = size_sum; + out.inctrain.dataPartition(:, m) = 0; + + % --- + % NOTE: the max value is important for debugging, + % especially when the maximal training success is reached + % in the middle of the data set + % --- +% out.inctrain.max_ok_test(:, m) = max(ok_test, 2); + out.inctrain.mean_ok_test(:, m) = mean(ok_test(:, ok_test(1,:) >=0), 2); + out.inctrain.var_ok_test(:, m) = var(ok_test(:, ok_test(1,:) >=0), 0, 2); + out.inctrain.equal_test(m) = median(equal_test); + + out.inctrain.mean_ok_train(:, m) = mean(ok_train(:, ok_train(1,:) >=0), 2); + out.inctrain.var_ok_train(:, m) = var(ok_train(:, ok_train(1,:) >=0), 0, 2); + + % --- + % TODO: DEBUG: this does not work correctly + % maybe thats also true for the above? + % --- + out.inctrain.mean_ok_notin_train(:, m) = mean(ok_notin_train(:, ok_notin_train(1,:) >=0), 2); + out.inctrain.var_ok_notin_train(:, m) = var(ok_notin_train(:, ok_notin_train(1,:) >=0), 0, 2); + + diag.inctrain(m).ok_train = ok_train; + diag.inctrain(m).ok_test = ok_test; + diag.inctrain(m).ok_notin_train = ok_notin_train; + diag.inctrain(m).equal_test = equal_test; + end + + % --- + % save traditional information for full training set + % --- + if size(simdata.partBinTrn{1,m}) == size(simdata.partBinTrn{1,end}); + +% out.max_ok_test = max(ok_test, 2); + out.mean_ok_test = mean(ok_test(:, ok_test(1,:) >=0), 2); + out.var_ok_test = var(ok_test(:, ok_test(1,:) >=0), 0, 2); + out.equal_test = median(equal_test); + + out.mean_ok_train = mean(ok_train(:, ok_train(1,:) >=0), 2); + out.var_ok_train = var(ok_train(:, ok_train(1,:) >=0), 0, 2); + + % --- + % TODO: DEBUG: this does not work correctly + % --- + out.mean_ok_notin_train = mean(ok_notin_train(:, ok_notin_train(1,:) >=0), 2); + out.var_ok_notin_train = var(ok_notin_train(:, ok_notin_train(1,:) >=0), 0, 2); + + % --- + % get winning measure + % we use the weighted winning measure if possible + % --- + if max(ok_test(2,:)) > 0 + [~, best] = max(ok_test(2,:)); + else + [~, best] = max(ok_test(1,:)); + end + + diag.A = A; + diag.diag = dout; + + diag.ok_test = ok_test; + diag.equal_test = equal_test; + diag.ok_train = ok_train; + diag.ok_notin_train = ok_notin_train; + + % save some metric matrices + out.best_A = A{best}; + out.best_diag = dout{best}; + out.best_idx = best; + + end +end + +% save parameters +out.camirrev = camirversion(); +out.fparams = fparams; +out.trainfun = trainfun; +out.trainparams = trainparams; +out.clip_ids = clips.id(); +out.dataPartition = []; +out.Y = size(simdata); +% --- +% NOTE: this takes A LOT OF DISC SPACE +% --- +% out.Ytrain = Ytrain{end}; +% out.Ytest = Ytest{end}; + +% --- +% save the diagostics data to disk +% --- +save(sprintf('runlog_%s.%s_results.mat',... + paramhash, paramhash_train),... + 'out', 'diag'); +end \ No newline at end of file