annotate toolboxes/FullBNT-1.0.7/bnt/potentials/@upot/upot_to_opt_policy.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 function [policy, EU] = upot_to_opt_policy(pot)
wolffd@0 2 % UPOT_TO_OPT_POLICY Compute an optimal deterministic policy given a utility potential
wolffd@0 3 % [policy, EU] = upot_to_opt_policy(pot)
wolffd@0 4 %
wolffd@0 5 % policy(a,b, ..., z) = P(do z | a, b, ..), which will be a delta function
wolffd@0 6 % EU is the contraction of this potential, i.e., P .* U
wolffd@0 7
wolffd@0 8 sz = pot.sizes; % mysize(pot.p);
wolffd@0 9 if isempty(sz)
wolffd@0 10 EU = pot.u;
wolffd@0 11 policy = [];
wolffd@0 12 return;
wolffd@0 13 end
wolffd@0 14
wolffd@0 15 parent_size = prod(sz(1:end-1));
wolffd@0 16 self_size = sz(end);
wolffd@0 17 C = pot.p .* pot.u; % contraction
wolffd@0 18 C = reshape(C, parent_size, self_size);
wolffd@0 19 policy = zeros(parent_size, self_size);
wolffd@0 20 for i=1:parent_size
wolffd@0 21 act = argmax(C(i,:));
wolffd@0 22 policy(i, act) = 1;
wolffd@0 23 end
wolffd@0 24 policy = myreshape(policy, sz);
wolffd@0 25 EU = sum(C(:));