wolffd@0: function cwr = cwr_em(X, Y, nc, varargin) wolffd@0: % CWR_LEARN Fit the parameters of a cluster weighted regression model using EM wolffd@0: % function cwr = cwr_learn(X, Y, ...) wolffd@0: % wolffd@0: % X(:, t) is the t'th input example wolffd@0: % Y(:, t) is the t'th output example wolffd@0: % nc is the number of clusters wolffd@0: % wolffd@0: % Kevin Murphy, May 2003 wolffd@0: wolffd@0: [max_iter, thresh, cov_typeX, cov_typeY, clamp_weights, ... wolffd@0: muX, muY, SigmaX, SigmaY, weightsY, priorC, create_init_params, ... wolffd@0: cov_priorX, cov_priorY, verbose, regress, clamp_covX, clamp_covY] = process_options(... wolffd@0: varargin, 'max_iter', 10, 'thresh', 1e-2, 'cov_typeX', 'full', ... wolffd@0: 'cov_typeY', 'full', 'clamp_weights', 0, ... wolffd@0: 'muX', [], 'muY', [], 'SigmaX', [], 'SigmaY', [], 'weightsY', [], 'priorC', [], ... wolffd@0: 'create_init_params', 1, 'cov_priorX', [], 'cov_priorY', [], 'verbose', 0, ... wolffd@0: 'regress', 1, 'clamp_covX', 0, 'clamp_covY', 0); wolffd@0: wolffd@0: [nx N] = size(X); wolffd@0: [ny N2] = size(Y); wolffd@0: if N ~= N2 wolffd@0: error(sprintf('nsamples X (%d) ~= nsamples Y (%d)', N, N2)); wolffd@0: end wolffd@0: %if N < nx wolffd@0: % fprintf('cwr_em warning: dim X (%d) > nsamples X (%d)\n', nx, N); wolffd@0: %end wolffd@0: if (N < nx) & regress wolffd@0: fprintf('cwr_em warning: dim X = %d, nsamples X = %d\n', nx, N); wolffd@0: end wolffd@0: if (N < ny) wolffd@0: fprintf('cwr_em warning: dim Y = %d, nsamples Y = %d\n', ny, N); wolffd@0: end wolffd@0: if (nc > N) wolffd@0: error(sprintf('cwr_em: more centers (%d) than data', nc)) wolffd@0: end wolffd@0: wolffd@0: if nc==1 wolffd@0: % No latent variable, so there is a closed-form solution wolffd@0: w = 1/N; wolffd@0: WYbig = Y*w; wolffd@0: WYY = WYbig * Y'; wolffd@0: WY = sum(WYbig, 2); wolffd@0: WYTY = sum(diag(WYbig' * Y)); wolffd@0: cwr.priorC = 1; wolffd@0: cwr.SigmaX = []; wolffd@0: if ~regress wolffd@0: % This is just fitting an unconditional Gaussian wolffd@0: cwr.weightsY = []; wolffd@0: [cwr.muY, cwr.SigmaY] = ... wolffd@0: mixgauss_Mstep(1, WY, WYY, WYTY, ... wolffd@0: 'cov_type', cov_typeY, 'cov_prior', cov_priorY); wolffd@0: % There is a much easier way... wolffd@0: assert(approxeq(cwr.muY, mean(Y'))) wolffd@0: assert(approxeq(cwr.SigmaY, cov(Y') + 0.01*eye(ny))) wolffd@0: else wolffd@0: % This is just linear regression wolffd@0: WXbig = X*w; wolffd@0: WXX = WXbig * X'; wolffd@0: WX = sum(WXbig, 2); wolffd@0: WXTX = sum(diag(WXbig' * X)); wolffd@0: WXY = WXbig * Y'; wolffd@0: [cwr.muY, cwr.SigmaY, cwr.weightsY] = ... wolffd@0: clg_Mstep(1, WY, WYY, WYTY, WX, WXX, WXY, ... wolffd@0: 'cov_type', cov_typeY, 'cov_prior', cov_priorY); wolffd@0: end wolffd@0: if clamp_covY, cwr.SigmaY = SigmaY; end wolffd@0: if clamp_weights, cwr.weightsY = weightsY; end wolffd@0: return; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: if create_init_params wolffd@0: [cwr.muX, cwr.SigmaX] = mixgauss_init(nc, X, cov_typeX); wolffd@0: [cwr.muY, cwr.SigmaY] = mixgauss_init(nc, Y, cov_typeY); wolffd@0: cwr.weightsY = zeros(ny, nx, nc); wolffd@0: cwr.priorC = normalize(ones(nc,1)); wolffd@0: else wolffd@0: cwr.muX = muX; cwr.muY = muY; cwr.SigmaX = SigmaX; cwr.SigmaY = SigmaY; wolffd@0: cwr.weightsY = weightsY; cwr.priorC = priorC; wolffd@0: end wolffd@0: wolffd@0: wolffd@0: if clamp_covY, cwr.SigmaY = SigmaY; end wolffd@0: if clamp_covX, cwr.SigmaX = SigmaX; end wolffd@0: if clamp_weights, cwr.weightsY = weightsY; end wolffd@0: wolffd@0: previous_loglik = -inf; wolffd@0: num_iter = 1; wolffd@0: converged = 0; wolffd@0: wolffd@0: while (num_iter <= max_iter) & ~converged wolffd@0: wolffd@0: % E step wolffd@0: wolffd@0: [likXandY, likYgivenX, post] = cwr_prob(cwr, X, Y); wolffd@0: loglik = sum(log(likXandY)); wolffd@0: % extract expected sufficient statistics wolffd@0: w = sum(post,2); % post(c,t) wolffd@0: WYY = zeros(ny, ny, nc); wolffd@0: WY = zeros(ny, nc); wolffd@0: WYTY = zeros(nc,1); wolffd@0: wolffd@0: WXX = zeros(nx, nx, nc); wolffd@0: WX = zeros(nx, nc); wolffd@0: WXTX = zeros(nc, 1); wolffd@0: WXY = zeros(nx,ny,nc); wolffd@0: %WYY = repmat(reshape(w, [1 1 nc]), [ny ny 1]) .* repmat(Y*Y', [1 1 nc]); wolffd@0: for c=1:nc wolffd@0: weights = repmat(post(c,:), ny, 1); wolffd@0: WYbig = Y .* weights; wolffd@0: WYY(:,:,c) = WYbig * Y'; wolffd@0: WY(:,c) = sum(WYbig, 2); wolffd@0: WYTY(c) = sum(diag(WYbig' * Y)); wolffd@0: wolffd@0: weights = repmat(post(c,:), nx, 1); % weights(nx, nsamples) wolffd@0: WXbig = X .* weights; wolffd@0: WXX(:,:,c) = WXbig * X'; wolffd@0: WX(:,c) = sum(WXbig, 2); wolffd@0: WXTX(c) = sum(diag(WXbig' * X)); wolffd@0: WXY(:,:,c) = WXbig * Y'; wolffd@0: end wolffd@0: wolffd@0: % M step wolffd@0: % Q -> X is called Q->Y in Mstep_clg wolffd@0: [cwr.muX, cwr.SigmaX] = mixgauss_Mstep(w, WX, WXX, WXTX, ... wolffd@0: 'cov_type', cov_typeX, 'cov_prior', cov_priorX); wolffd@0: for c=1:nc wolffd@0: assert(is_psd(cwr.SigmaX(:,:,c))) wolffd@0: end wolffd@0: wolffd@0: if clamp_weights % affects estimate of mu and Sigma wolffd@0: W = cwr.weightsY; wolffd@0: else wolffd@0: W = []; wolffd@0: end wolffd@0: [cwr.muY, cwr.SigmaY, cwr.weightsY] = ... wolffd@0: clg_Mstep(w, WY, WYY, WYTY, WX, WXX, WXY, ... wolffd@0: 'cov_type', cov_typeY, 'clamped_weights', W, ... wolffd@0: 'cov_prior', cov_priorY); wolffd@0: %'xs', X, 'ys', Y, 'post', post); % debug wolffd@0: %a = linspace(min(Y(2,:)), max(Y(2,:)), nc+2); wolffd@0: %cwr.muY(2,:) = a(2:end-1); wolffd@0: wolffd@0: cwr.priorC = normalize(w); wolffd@0: wolffd@0: for c=1:nc wolffd@0: assert(is_psd(cwr.SigmaY(:,:,c))) wolffd@0: end wolffd@0: wolffd@0: if clamp_covY, cwr.SigmaY = SigmaY; end wolffd@0: if clamp_covX, cwr.SigmaX = SigmaX; end wolffd@0: if clamp_weights, cwr.weightsY = weightsY; end wolffd@0: wolffd@0: if verbose, fprintf(1, 'iteration %d, loglik = %f\n', num_iter, loglik); end wolffd@0: num_iter = num_iter + 1; wolffd@0: converged = em_converged(loglik, previous_loglik, thresh); wolffd@0: previous_loglik = loglik; wolffd@0: wolffd@0: end wolffd@0: