diff src/samer/models/ICAWithScaler.java @ 0:bf79fb79ee13

Initial Mercurial check in.
author samer
date Tue, 17 Jan 2012 17:50:20 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/samer/models/ICAWithScaler.java	Tue Jan 17 17:50:20 2012 +0000
@@ -0,0 +1,163 @@
+/*
+ *	Copyright (c) 2002, Samer Abdallah, King's College London.
+ *	All rights reserved.
+ *
+ *	This software is provided AS iS and WITHOUT ANY WARRANTY;
+ *	without even the implied warranty of MERCHANTABILITY or
+ *	FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+package samer.models;
+import  samer.core.*;
+import  samer.core.Agent.*;
+import  samer.core.types.*;
+import  samer.tools.*;
+import  samer.maths.*;
+
+public class ICAWithScaler extends ICA
+{
+	VVector  		k;			// state, scaling vector
+	double[]		__k, tmp;
+
+	public ICAWithScaler(Vec input) { this(input.size()); setInput(input); }
+	public ICAWithScaler(int N)
+	{
+		super(N);
+		Shell.push(node);
+		k = new VVector("k",n);
+		Shell.pop();
+		__k=k.array();
+		tmp=new double[n];
+
+		for (int i=0; i<n; i++) __k[i]=1;
+		k.changed();
+	}
+
+	public void dispose() { k.dispose(); super.dispose(); }
+	public void infer() {	// this overrides ICA.infer
+		infer.run(); // compute s=Wx
+		Mathx.mul(s.array(),__k);
+		s.changed();
+	}
+
+	public void compute() {
+		Mathx.mul(tmp,sourceModel.getGradient(),__k);
+		grad.apply(tmp,_g);
+	}
+
+	public samer.maths.opt.Functionx functionx() { return null; }
+
+	public void fold() {
+		for (int i=0; i<n; i++) {
+			Mathx.mul(W.getArray()[i],__k[i]);
+			__k[i]=1;
+		}
+		k.changed();
+		W.changed();
+	}
+
+	public Trainer getDiffTrainer() { return new DifferentialTrainer(); }
+	public Trainer getScaleTrainer() { return new ScalerTrainer(); }
+
+
+	public class DifferentialTrainer extends ON3Trainer {
+		VDouble		scaleRate,stretchRate;
+		double		lastflush;
+
+		public DifferentialTrainer() {
+			scaleRate=new VDouble("alpha",0.1);
+			stretchRate=new VDouble("beta",0.05);
+		}
+
+		public void reset() { super.reset(); lastflush=0; }
+		public void flush() { diffFlush(); fold(); super.flush(); lastflush=0; }
+		public void diffFlush() // flush multipliers to k instead of W
+		{
+			double batchlet=batch-lastflush;
+			if (batchlet==0) return;
+
+			// do differential learning on trace & diagonal of G
+			double alpha=scaleRate.value/batchlet;
+			double beta=stretchRate.value/batchlet;
+
+			// compute factors and scale each row of W
+			double mu=G.trace()/n,dl;
+			for (int i=0; i<_n; i++) {
+				dl=alpha*mu+beta*(_G[i][i]-mu);
+				double tmp=Math.exp(-dl);
+				// if (Double.isNaN(tmp)) throw new Error("alt: NaN"+i);
+				__k[i]*=tmp; // instead of Mathx.mul(_W[i],tmp);
+				_G[i][i]=0;
+			}
+			k.changed();
+			lastflush=batch;
+		}
+	}
+
+	/** This one trains ONLY the scaler part, not the ICA part, so is a lot faster
+		than using the differential trainer with a zero learning rate. */
+	
+	public class ScalerTrainer extends AnonymousTask implements Model.Trainer
+	{
+		VVector		G;
+		double[]		_G;
+		double[]     _g,_s;
+		VDouble		scaleRate,stretchRate;
+		int				_n;
+		double		batch,thresh;
+		
+		public ScalerTrainer()
+		{
+			_n=n;
+			G=new VVector("G",n);
+			thresh=Shell.getDouble("anomaly",20*n);
+			scaleRate=new VDouble("alpha",0.02);
+			stretchRate=new VDouble("beta",0.002);
+			batch=0;
+
+			_s=s.array();
+			_G=G.array();
+		}
+
+		public void starting() { reset(); }
+		public void run() { accumulate(); }
+
+		public void dispose() { G.dispose(); scaleRate.dispose(); stretchRate.dispose(); super.dispose(); }
+		public void oneshot() { accumulate(); flush(); }
+		public void reset() { Mathx.zero(_G); batch=0; }
+		public void accumulate() { accumulate(1); }
+		public void accumulate(double w) {
+			// HACK!
+			if (sourceModel.getEnergy()>thresh) return; 
+			batch+=w;
+
+			double[] phi=sourceModel.getGradient();
+			for (int i=0; i<_n; i++) _G[i] += w*(phi[i]*_s[i] - 1);
+		}
+
+		public void flush()
+		{
+			if (batch==0) return;
+
+			G.changed();
+			
+			// do differential learning on trace & diagonal of G
+			double alpha=scaleRate.value/batch;
+			double beta=stretchRate.value/batch;
+			
+			// compute factors and scale each row of W
+			double mu=Mathx.sum(_G)/n, dl;
+			for (int i=0; i<_n; i++) {
+				dl=alpha*mu+beta*(_G[i]-mu);
+				double tmp=Math.exp(-dl);
+				if (Double.isNaN(tmp)) throw new Error("alt: NaN"+i);
+				__k[i]*=tmp; // instead of Mathx.mul(_W[i],tmp);
+			}
+			k.changed();
+		
+			reset(); // ready for next batch
+		}
+	}
+}
+
+