Mercurial > hg > plcrp
changeset 0:b31415b4a196
Initial check in.
author | samer |
---|---|
date | Fri, 20 Jan 2012 16:58:52 +0000 |
parents | |
children | ed130acab5f1 |
files | Makefile RngStream.c RngStream.h crp.c crp.pl plutils.c rndutils.c rndutils.h test_crp.pl utils.c |
diffstat | 10 files changed, 1531 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,43 @@ +# install directories +export INSTALL_LIB_TO=~/lib/prolog/x86_64 +export INSTALL_PL_TO=~/lib/prolog + +# flags for install - BSD install seems to be different from GNU install +export INSTALL_FLAGS='-bCS' + +SO=dylib + +TARGET=crp.$(SO) +PLLD=swipl-ld +GSL=/opt/local +HAVE_GSL=1 + +ifeq ($(HAVE_GSL),1) + PLLDFLAGS=-DHAVE_GSL=1 -I$(GSL)/include -L$(GSL)/lib -lgsl -lm -fPIC -Wall -O2 +else + PLLDFLAGS=-DHAVE_GSL=0 -lm -fPIC -Wall -O2 +endif + +.SUFFIXES: .c .o .so .dylib + +main: $(TARGET) RngStream.o rndutils.o + +$(TARGET): crp.c RngStream.o rndutils.o + +clean: + rm $(TARGET) RngStream.o rndutils.o + +.c.o: + $(PLLD) -v $(PLLDFLAGS) -c $< + +.c.so: + $(PLLD) -v $(PLLDFLAGS) -shared -o $@ RngStream.o rndutils.o $< + strip -x $@ + +.c.dylib: + $(PLLD) -v $(PLLDFLAGS) -shared -o $@ RngStream.o rndutils.o $< + strip -x $@ + +install: main + install $(INSTALL_FLAGS) $(TARGET) $(INSTALL_LIB_TO) + install $(INSTALL_FLAGS) -m 644 crp.pl $(INSTALL_PL_TO)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RngStream.c Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,293 @@ +/***********************************************************************\ + * + * File: RngStream.c for multiple streams of Random Numbers + * Language: ANSI C + * Copyright: Pierre L'Ecuyer, University of Montreal + * Date: 14 August 2001 + * License: GPL version 2 or later + * + * Notice: Please contact P. L'Ecuyer at <lecuyer@iro.UMontreal.ca> + * for commercial purposes. + * +\***********************************************************************/ + + +#include "RngStream.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +/*---------------------------------------------------------------------*/ +/* Private part. */ +/*---------------------------------------------------------------------*/ + +typedef struct rng_state *RngStream; + +#define norm 2.328306549295727688e-10 +#define m1 4294967087.0 +#define m2 4294944443.0 +#define a12 1403580.0 +#define a13n 810728.0 +#define a21 527612.0 +#define a23n 1370589.0 + +#define two17 131072.0 +#define two53 9007199254740992.0 +#define fact 5.9604644775390625e-8 /* 1 / 2^24 */ + + + +/* Default initial seed */ +static double nextSeed[6] = { 12345, 12345, 12345, 12345, 12345, 12345 }; + + +/* The following are the transition matrices of the two MRG components */ +/* (in matrix form), raised to the powers 2^0, 2^76, and 2^127, resp.*/ + +static double A1p0[3][3] = { + { 0.0, 1.0, 0.0 }, + { 0.0, 0.0, 1.0 }, + { -810728.0, 1403580.0, 0.0 } + }; + +static double A2p0[3][3] = { + { 0.0, 1.0, 0.0 }, + { 0.0, 0.0, 1.0 }, + { -1370589.0, 0.0, 527612.0 } + }; + + +static double A1p76[3][3] = { + { 82758667.0, 1871391091.0, 4127413238.0 }, + { 3672831523.0, 69195019.0, 1871391091.0 }, + { 3672091415.0, 3528743235.0, 69195019.0 } + }; + +static double A2p76[3][3] = { + { 1511326704.0, 3759209742.0, 1610795712.0 }, + { 4292754251.0, 1511326704.0, 3889917532.0 }, + { 3859662829.0, 4292754251.0, 3708466080.0 } + }; + +static double A1p127[3][3] = { + { 2427906178.0, 3580155704.0, 949770784.0 }, + { 226153695.0, 1230515664.0, 3580155704.0 }, + { 1988835001.0, 986791581.0, 1230515664.0 } + }; + +static double A2p127[3][3] = { + { 1464411153.0, 277697599.0, 1610723613.0 }, + { 32183930.0, 1464411153.0, 1022607788.0 }, + { 2824425944.0, 32183930.0, 2093834863.0 } + }; + + + + + +/*-------------------------------------------------------------------------*/ + +static double MultModM (double a, double s, double c, double m) + /* Compute (a*s + c) % m. m must be < 2^35. Works also for s, c < 0 */ +{ + double v; + long a1; + v = a * s + c; + if ((v >= two53) || (v <= -two53)) { + a1 = (long) (a / two17); + a -= a1 * two17; + v = a1 * s; + a1 = (long) (v / m); + v -= a1 * m; + v = v * two17 + a * s + c; + } + a1 = (long) (v / m); + if ((v -= a1 * m) < 0.0) + return v += m; + else + return v; +} + + +/*-------------------------------------------------------------------------*/ + + +static void print_vec(double *p) +{ + int i; + for (i=0; i<6; i++) printf("%.0lf ",p[i]); + printf("\n"); +} + + +static void MatVecModM (double A[3][3], double s[3], double v[3], double m) + /* Returns v = A*s % m. Assumes that -m < s[i] < m. */ + /* Works even if v = s. */ +{ + int i; + double x[3]; + for (i = 0; i < 3; ++i) { + x[i] = MultModM (A[i][0], s[0], 0.0, m); + x[i] = MultModM (A[i][1], s[1], x[i], m); + x[i] = MultModM (A[i][2], s[2], x[i], m); + } + memcpy(v,x,3*sizeof(double)); +} + + +static void MatMatModM (double A[3][3], double B[3][3], double C[3][3], + double m) + /* Returns C = A*B % m. Work even if A = C or B = C or A = B = C. */ +{ + int i, j; + double V[3], W[3][3]; + for (i = 0; i < 3; ++i) { + for (j = 0; j < 3; ++j) V[j] = B[j][i]; + MatVecModM (A, V, V, m); + for (j = 0; j < 3; ++j) W[j][i] = V[j]; + } + memcpy(C,W,9*sizeof(double)); +} + +static void MatTwoPowModM (double A[3][3], double B[3][3], double m, long e) + /* Compute matrix B = (A^(2^e) % m); works even if A = B */ +{ + int i, j; + + /* initialize: B = A */ + if (A != B) { + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; ++j) + B[i][j] = A[i][j]; + } + } + /* Compute B = A^{2^e} */ + for (i = 0; i < e; i++) + MatMatModM (B, B, B, m); +} + + +/* returns the next integer in the sequence + * new state goes in g1 + * g1 can be the same as g, resulting in + * an update in place. + */ +double next(RngStream g, RngStream g1) +{ + double p1, p2; + +// printf("before: "); print_vec(g->Cg); + /* Component 1 */ + p1 = a12 * g->Cg[1] - a13n * g->Cg[0]; + p1 -= m1*floor(p1/m1); + g1->Cg[0] = g->Cg[1]; + g1->Cg[1] = g->Cg[2]; + g1->Cg[2] = p1; + + /* Component 2 */ + p2 = a21 * g->Cg[5] - a23n * g->Cg[3]; + p2 -= m2*floor(p2/m2); + g1->Cg[3] = g->Cg[4]; + g1->Cg[4] = g->Cg[5]; + g1->Cg[5] = p2; + + // printf("after: "); print_vec(g1->Cg); + /* Combination */ + return (p1 > p2) ? (p1 - p2) : (p1 - p2 + m1); +} + + +/*-------------------------------------------------------------------------*/ +static int CheckSeed (unsigned seed[6]) +{ + /* Check that the seeds are legitimate values. Returns 0 if legal seeds, + * i:1..6 if seed[i] is bad + * 8 if seeds 1..3 are zero + * 9 if seeds 4..6 are zero + */ + int i; + + for (i = 0; i < 3; ++i) { + if (seed[i] >= m1) return i+1; + } + for (i = 3; i < 6; ++i) { + if (seed[i] >= m2) return i+1; + } + if (seed[0] == 0 && seed[1] == 0 && seed[2] == 0) return 8; + if (seed[3] == 0 && seed[4] == 0 && seed[5] == 0) return 9; + + return 0; +} + + +/*---------------------------------------------------------------------*/ +/* Public part. */ +/*---------------------------------------------------------------------*/ + + +void RngStream_InitState(RngStream g) { + memcpy(g->Cg,nextSeed,6*sizeof(double)); + //printf("init: "); print_vec(g->Cg); +} + +void RngStream_InitJump(struct rng_jump *j, int e) +{ + void *a1, *a2; + + if (e>=127) { a1=A1p127; a2=A2p127; e-=127; } + else if (e==76) { a1=A1p76; a2=A2p76; e-=76; } + else { a1=A1p0; a2=A2p0; } + + memcpy(j->T1,a1,9*sizeof(double)); + memcpy(j->T2,a2,9*sizeof(double)); + MatTwoPowModM(j->T1,j->T1,m1,e); + MatTwoPowModM(j->T2,j->T2,m2,e); +} + +void RngStream_DoubleJump(struct rng_jump *j1, struct rng_jump *j2) +{ + MatMatModM(j1->T1,j1->T1,j2->T1,m1); + MatMatModM(j1->T2,j1->T2,j2->T2,m2); +} + +void RngStream_Advance(struct rng_jump *j, RngStream g1, RngStream g2) +{ + MatVecModM (j->T1, &g1->Cg[0], &g2->Cg[0], m1); + MatVecModM (j->T2, &g1->Cg[3], &g2->Cg[3], m2); +} + +double RngStream_Float(RngStream g, RngStream g1) { + return norm*next(g,g1); +} + +double RngStream_Double(RngStream g, RngStream g1) { + double u = RngStream_Float(g,g1); + u += fact*RngStream_Float(g1,g1); + return (u < 1.0) ? u : (u - 1.0); +} + +double RngStream_Raw(RngStream g, RngStream g1) { + return next(g,g1); +} + +/*-------------------------------------------------------------------------*/ + +int RngStream_SetState (RngStream g, unsigned seed[6]) +{ + int i; + if (CheckSeed (seed)) + return -1; /* FAILURE */ + for (i = 0; i < 6; ++i) + g->Cg[i] = (double)seed[i]; + + return 0; /* SUCCESS */ +} + + +void RngStream_GetState (RngStream g, unsigned seed[6]) +{ + int i; + for (i = 0; i < 6; ++i) seed[i] = (unsigned)g->Cg[i]; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RngStream.h Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,27 @@ +/* RngStream.h for ANSI C */ +#ifndef RNGSTREAM_H +#define RNGSTREAM_H + +struct rng_state { + double Cg[6]; // state of generator +}; + +struct rng_jump { + double T1[3][3]; // transition matrix for a jump in first generator + double T2[3][3]; // ditto for second +}; + + +void RngStream_InitState(struct rng_state *g); +void RngStream_InitJump(struct rng_jump *j,int e); +void RngStream_DoubleJump(struct rng_jump *j, struct rng_jump *j2); +int RngStream_SetState(struct rng_state *g, unsigned seed[6]); +void RngStream_GetState(struct rng_state *g, unsigned seed[6]); +void RngStream_Advance(struct rng_jump *j, struct rng_state *g1, struct rng_state *g2); +double RngStream_Float(struct rng_state *g1, struct rng_state *g2); +double RngStream_Double(struct rng_state *g1, struct rng_state *g2); +double RngStream_Raw(struct rng_state *g1, struct rng_state *g2); + +#endif + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crp.c Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,424 @@ +/* + */ + +#include <SWI-Prolog.h> +#include <math.h> +#include <float.h> +#include <stdio.h> + +#include "rndutils.h" +#include "plutils.c" + +install_t install(); + +foreign_t crp_prob( term_t alpha, term_t classes, term_t x, term_t pprob, term_t p); +foreign_t crp_sample( term_t alpha, term_t classes, term_t action, term_t rnd1, term_t rnd2); +foreign_t crp_sample_obs( term_t alpha, term_t classes, term_t x, term_t probx, term_t act, term_t rnd1, term_t rnd2); +foreign_t crp_sample_rm( term_t classes, term_t x, term_t class, term_t rnd1, term_t rnd2); +foreign_t sample_dp_teh( term_t ApSumKX, term_t B, term_t NX, term_t p1, term_t p2, term_t rnd1, term_t rnd2); +foreign_t sample_py_teh( term_t ThPrior, term_t DPrior, term_t CountsX, term_t p1, term_t p2, term_t rnd1, term_t rnd2); + +static atom_t atom_new; +static functor_t functor_old1, functor_old2; +static functor_t functor_dp1, functor_py2; + +install_t install() { + PL_register_foreign("crp_prob", 5, (void *)crp_prob, 0); + PL_register_foreign("crp_sample", 5, (void *)crp_sample, 0); + PL_register_foreign("crp_sample_obs", 7, (void *)crp_sample_obs, 0); + PL_register_foreign("crp_sample_rm", 5, (void *)crp_sample_rm, 0); + PL_register_foreign("sample_dp_teh", 7, (void *)sample_dp_teh, 0); + PL_register_foreign("sample_py_teh", 7, (void *)sample_py_teh, 0); + + functor_dp1 = PL_new_functor(PL_new_atom("dp"),1); + functor_py2 = PL_new_functor(PL_new_atom("py"),2); + functor_old1 = PL_new_functor(PL_new_atom("old"),1); + functor_old2 = PL_new_functor(PL_new_atom("old"),2); + atom_new = PL_new_atom("new"); +} + + +// unify Prolog BLOB with RndState structure +static int unify_state(term_t state, RndState *S, PL_blob_t *pblob) { + return PL_unify_blob(state, S, sizeof(RndState), pblob); +} + +// extract RndState structure from Prolog BLOB +static int get_state(term_t state, RndState *S0, PL_blob_t **ppblob) +{ + size_t len; + RndState *S; + + PL_get_blob(state, (void **)&S, &len, ppblob); + *S0=*S; + return TRUE; +} + + +// ----------------------------------------------------- +// Prolog versions of functions to implement + +int counts_dist( term_t gem, term_t counts, size_t len, double *dist); +int get_classes(term_t Classes, term_t Counts, term_t Vals, long *len); +void stoch(double *x, size_t len); + +/* +%% crp_prob( +GEM:gem_model, +Classes:classes(A), +X:A, +PProb:float, -Prob:float) is det. +% +% Compute the probability Prob of observing X given a CRP +% and a base probability of PProb. +crp_prob( Alpha, classes(Counts,Vals), X, PProb, P) :- + counts_dist( Alpha, Counts, Counts1), + stoch( Counts1, Probs, _), + maplist( equal(X), Vals, Mask), + maplist( mul, [PProb | Mask], Probs, PostProbs), + sumlist( PostProbs, P). + +*/ + +foreign_t crp_prob( term_t Alpha, term_t Classes, term_t X, term_t PProb, term_t Prob) +{ + term_t Counts=PL_new_term_ref(); + term_t Vals=PL_new_term_ref(); + double prob=0, pprob; + double *dist=NULL; + long len=0; + + int rc = get_double(PProb, &pprob) + && get_classes(Classes, Counts, Vals, &len) + && alloc_array(len+1, sizeof(double), (void **)&dist) + && counts_dist(Alpha, Counts, len, dist); + + if (rc) { + term_t Val = PL_new_term_ref(); + int i; + + stoch(dist,len+1); + prob = pprob*dist[0]; + for (i=1; i<=len && PL_get_list(Vals,Val,Vals); i++) { + if (PL_unify(Val,X)) prob += dist[i]; + } + } else rc=0; + if (dist) free(dist); + return rc && PL_unify_float(Prob,prob); +} + +/* + + +%% crp_sample( +GEM:gem_model, +Classes:classes(A), -A:action(A))// is det. +% +% Sample a new value from CRP, Action A is either new, which means +% that the user should sample a new value from the base distribtion, +% or old(X,C), where X is an old value and C is the index of its class. +% Operates in random state DCG. +crp_sample( Alpha, classes(Counts,Vals), Action, RS1, RS2) :- + counts_dist(Alpha, Counts, Counts1), + discrete(Counts1,Z,RS1,RS2), + ( Z>1 -> succ(C,Z), nth1(C,Vals,X), Action=old(X,C) + ; Action=new). + +*/ + +foreign_t crp_sample( term_t Alpha, term_t Classes, term_t Action, term_t Rnd1, term_t Rnd2) +{ + term_t Counts=PL_new_term_ref(); + term_t Vals=PL_new_term_ref(); + PL_blob_t *blob; + double *dist=NULL; + RndState rs; + long len=0; + + int rc = get_classes(Classes, Counts, Vals, &len) + && alloc_array(len+1, sizeof(double), (void **)&dist) + && counts_dist(Alpha, Counts, len, dist) + && get_state(Rnd1,&rs,&blob); + + if (rc) { + int z=Discrete( &rs, len+1, dist, sum_array(dist,len+1)); + + if (z==0) { rc = PL_unify_atom(Action,atom_new); } + else { + term_t X=PL_new_term_ref(); + int i=0; + while (i<z && PL_get_list(Vals,X,Vals)) i++; + rc = (i==z) && PL_unify_term(Action, PL_FUNCTOR, functor_old2, PL_TERM, X, PL_INT, z); + } + } + if (dist) free(dist); + return rc && unify_state(Rnd2,&rs,blob); +} + +/* + +%% crp_sample_obs( +GEM:gem_model, +Classes:classes(A), +X:A, +PProb:float, -A:action)// is det. +% +% Sample class appropriate for observation of value X. PProb is the +% base probability of X from the base distribution. Action A is new +% or old(Class). +% Operates in random state DCG. +crp_sample_obs( Alpha, classes(Counts,Vals), X, ProbX, A, RS1, RS2) :- + counts_dist( Alpha, Counts, [CNew|Counts1]), + PNew is CNew*ProbX, + maplist( post_count(X),Vals,Counts1,Counts2), + discrete( [PNew|Counts2], Z, RS1, RS2), + ( Z=1 -> A=new; succ(C,Z), A=old(C)). + +*/ + +foreign_t crp_sample_obs( term_t Alpha, term_t Classes, term_t X, term_t Probx, term_t Act, term_t Rnd1, term_t Rnd2) +{ + term_t Counts=PL_new_term_ref(); + term_t Vals=PL_new_term_ref(); + PL_blob_t *blob; + double probx=0; + double *dist=NULL; + long len=0; + RndState rs; + + int rc = get_double(Probx,&probx) + && get_classes(Classes, Counts, Vals, &len) + && alloc_array(len+1, sizeof(double), (void **)&dist) + && counts_dist(Alpha, Counts, len, dist) + && get_state(Rnd1,&rs,&blob); + + if (rc) { + term_t Val=PL_new_term_ref(); + int i, z; + + dist[0] *= probx; + for (i=1; i<=len && PL_get_list(Vals,Val,Vals); i++) { + if (!PL_unify(Val,X)) dist[i]=0; + } + + z=Discrete( &rs, len+1, dist, sum_array(dist,len+1)); + if (z==0) { rc = PL_unify_atom(Act,atom_new); } + else { + rc = PL_unify_term(Act, PL_FUNCTOR, functor_old1, PL_INT, z); + } + } + if (dist) free(dist); + return rc && unify_state(Rnd2,&rs,blob); +} + +/* +%% crp_sample_rm( +Classes:classes(A), +X:A, -C:natural)// is det. +% +% Sample appropriate class from which to remove value X. +% Operates in random state DCG. +crp_sample_rm( classes(Counts,Vals), X, Class, RS1, RS2) :- + maplist(post_count(X),Vals,Counts,Counts1), + discrete( Counts1, Class, RS1, RS2). + +*/ + +foreign_t crp_sample_rm( term_t Classes, term_t X, term_t Class, term_t Rnd1, term_t Rnd2) +{ + term_t Counts=PL_new_term_ref(); + term_t Vals=PL_new_term_ref(); + PL_blob_t *blob; + double *dist=NULL; + long len=0; + RndState rs; + + int rc = get_classes(Classes, Counts, Vals, &len) + && alloc_array(len, sizeof(double), (void **)&dist) + && get_list_doubles(Counts, dist, len) + && get_state(Rnd1,&rs,&blob); + + if (rc) { + term_t Val=PL_new_term_ref(); + int i, z; + + for (i=0; i<len && PL_get_list(Vals,Val,Vals); i++) { + if (!PL_unify(Val,X)) dist[i]=0; + } + + z = Discrete( &rs, len, dist, sum_array(dist,len)); + rc = (z<len) && PL_unify_integer(Class, z+1); + } + if (dist) free(dist); + return rc && unify_state(Rnd2,&rs,blob); +} + +/* +post_count(X,Val,Count,PC) :- X=Val -> PC=Count; PC=0. + +% ----------------------------------------------------------- +% Dirichlet process and Pitman-Yor process +% pseudo-counts models. + +counts_dist(_,[],0,[1]) :- !. +counts_dist(dp(Alpha),Counts,_,[Alpha|Counts]) :- !. +counts_dist(py(Alpha,Discount),Counts,K,[CNew|Counts1]) :- !, + CNew is Alpha+Discount*K, + maplist(sub(Discount),Counts,Counts1). + +*/ + +int get_float_arg(int n,term_t Term, double *px) +{ + term_t X=PL_new_term_ref(); + return PL_get_arg(n,Term,X) && PL_get_float(X,px); +} + +int counts_dist( term_t gem, term_t counts, size_t len, double *dist) +{ + if (len==0) { dist[0]=1; return TRUE; } + else { + if (PL_is_functor(gem, functor_dp1)) { + double alpha; + term_t head=PL_new_term_ref(); + int i, rc = get_float_arg(1,gem,&alpha); + + dist[0] = alpha; + for(i=1; rc && i<=len && PL_get_list(counts,head,counts); i++) { + rc = rc && PL_get_float(head,&dist[i]); + } + return rc; + } else if (PL_is_functor(gem, functor_py2)) { + double theta, disc, c; + term_t head=PL_new_term_ref(); + + int i, rc = get_float_arg(1,gem,&theta) + && get_float_arg(2,gem,&disc); + + dist[0] = theta + disc*len; + for(i=1; rc && i<=len && PL_get_list(counts,head,counts); i++) { + rc = rc && PL_get_float(head,&c); + dist[i] = c-disc; + } + return rc; + } else return FALSE; + } +} + +int get_classes(term_t Classes, term_t Counts, term_t Vals, long *len) +{ + term_t K=PL_new_term_ref(); + + return PL_get_arg(1,Classes,K) + && PL_get_arg(2,Classes,Counts) + && PL_get_arg(3,Classes,Vals) + && PL_get_long(K,len); +} + + +void stoch(double *x, size_t len) +{ + int i; + double total=0, *xp; + for (i=0, xp=x; i<len; i++, xp++) total += *xp; + for (i=0, xp=x; i<len; i++, xp++) *xp /= total; +} + +/* +sample_dp_teh( ApSumKX, B, NX, dp(Alpha1), dp(Alpha2)) --> + { Alpha1_1 is Alpha1+1 }, + seqmap(beta(Alpha1_1),NX,WX), + seqmap(bernoulli(Alpha1),NX,SX), + { maplist(log,WX,LogWX), + sumlist(SX,SumSX), + sumlist(LogWX,SumLogWX), + A1 is ApSumKX-SumSX, B1 is B-SumLogWX + }, + gamma(A1,B1,Alpha2). + +% run_left( seqmap(accum_log_beta(Alpha1_1),NX), 0, SumLogWX), +% run_left( seqmap(accum_bernoulli(Alpha1),NX), 0, SumSX), +%accum_log_beta(A,B) --> \> beta(A,B,X), { LogX is log(X) }, \< add(LogX). +%accum_bernoulli(A,B) --> \> bernoulli(A,B,X), \< add(X). + + */ + +int Bernoulli(RndState *rs,double a,double b) { + if ((a+b)*Uniform(rs)<b) return 1; else return 0; +} + + +foreign_t sample_dp_teh( term_t ApSumKX, term_t B, term_t NX, term_t p1, term_t p2, term_t rnd1, term_t rnd2) +{ + term_t N=PL_new_term_ref(); + PL_blob_t *pblob; + double apsumkx, b, alphap1; + double alpha1=0, alpha2; + double sum_log_wx, sum_sx; + long n=0; + RndState rs; + + int rc = get_double(ApSumKX, &apsumkx) + && get_double(B, &b) + && get_float_arg(1,p1,&alpha1) + && get_state(rnd1,&rs,&pblob); + + alphap1 = alpha1+1; + sum_log_wx = sum_sx = 0; + while (rc && PL_get_list(NX,N,NX)) { + rc = get_long(N,&n); + sum_log_wx += log(Beta(&rs,alphap1,n)); + sum_sx += Bernoulli(&rs,alpha1,n); + } + alpha2 = Gamma(&rs, apsumkx-sum_sx)/(b-sum_log_wx); + + return rc && PL_unify_term(p2, PL_FUNCTOR, functor_dp1, PL_FLOAT, alpha2) + && unify_state(rnd2,&rs,pblob); +} + +foreign_t sample_py_teh( term_t ThPrior, term_t DPrior, term_t CountsX, term_t p1, term_t p2, term_t rnd1, term_t rnd2) +{ + PL_blob_t *pblob; + term_t Counts = PL_new_term_ref(); + term_t Count = PL_new_term_ref(); + double theta_a, disc_a; + double theta_b, disc_b; + double theta1, disc1; + double theta2, disc2; + double theta1_1, disc1_1; + double sum_log_wx, sum_sx, sum_nsx, sum_zx; + RndState rs; + + int rc = get_float_arg(1,ThPrior,&theta_a) + && get_float_arg(2,ThPrior,&theta_b) + && get_float_arg(1,DPrior,&disc_a) + && get_float_arg(2,DPrior,&disc_b) + && get_float_arg(1,p1,&theta1) + && get_float_arg(2,p1,&disc1) + && get_state(rnd1,&rs,&pblob); + + theta1_1 = theta1+1; + disc1_1 = 1-disc1; + sum_log_wx = sum_sx = sum_nsx = sum_zx = 0; + while (rc && PL_get_list(CountsX,Counts,CountsX)) { + int n, k, i; + long c=0; + + for(k=0, n=0; rc && PL_get_list(Counts,Count,Counts); k++, n+=c) { + rc = get_long(Count,&c); + if (k>0) { if (Bernoulli(&rs, disc1*k, theta1)) sum_sx++; else sum_nsx++; } + for (i=0; i<c-1; i++) sum_zx += Bernoulli(&rs, i, disc1_1); + } + if (n>1) sum_log_wx += log(Beta(&rs, theta1_1, n-1)); + } + + theta2 = Gamma(&rs, theta_a + sum_sx)/(theta_b-sum_log_wx); + disc2 = Beta(&rs, disc_a + sum_nsx, disc_b + sum_zx); + return rc && unify_state(rnd2,&rs,pblob) + && PL_unify_term(p2, PL_FUNCTOR, functor_py2, PL_FLOAT, theta2, PL_FLOAT, disc2); +} + +/* +foreign_t sum_lengths( term_t Lists, term_t Total) +{ + double total=0; + size_t len=0; + term_t List=PL_new_term_ref(); + term_t Tail=PL_new_term_ref(); + int rc=1; + + while (rc && PL_get_list(Lists,List,Lists)) { + rc = PL_skip_list(List,Tail,&len); + total += len; + } + return rc && PL_unify_integer(Total, total); +} +*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crp.pl Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,293 @@ +:- module(crp, + [ empty_classes/1 + , classes_value/2 + , classes_counts/2 + , classes_update/3 + , seqmap_classes//2 + , dec_class//3 + , inc_class//1 + , remove_class//1 + , add_class//2 + + , crp_prob/5 + , crp_sample/5 + , crp_sample_obs/7 + , crp_sample_rm/5 + , crp_dist/6 + + , dp_sampler_teh/3 + , py_sampler_teh/4 + ]). + +/** <module> Chinese Restaurant Process utilities + + == + gem_model ---> dp(Alpha:nonneg) + ; py(Alpha:nonneg,Discount:nonneg). + + gamma_prior ---> gamma(nonneg,nonneg). + beta_prior ---> beta(nonneg,nonneg). + param_sampler == pred(+gem_model,-gem_model,+rndstate,-rndstate). + == + +*/ +:- meta_predicate seqmap_classes(4,+,?,?). + +:- load_foreign_library(foreign(crp)). +:- use_module(library(dcgu)). +:- use_module(library(math)). +:- use_module(library(eval)). +:- use_module(library(lazy)). +:- use_module(library(randpred)). +:- use_module(library(apply_macros)). + + +%% crp_prob( +GEM:gem_model, +Classes:classes(A), +X:A, +PProb:float, -Prob:float) is det. +% +% Compute the probability Prob of observing X given a CRP +% and a base probability of PProb. + + +%% crp_sample( +GEM:gem_model, +Classes:classes(A), -A:action(A))// is det. +% +% Sample a new value from CRP, Action A is either new, which means +% that the user should sample a new value from the base distribtion, +% or old(X,ID), where X is an old value and C is the class ID. +% Operates in random state DCG. + + +%% crp_sample_obs( +GEM:gem_model, +Classes:classes(A), +X:A, +PProb:float, -A:action)// is det. +% +% Sample class appropriate for observation of value X. PProb is the +% base probability of X from the base distribution. Action A is new +% or old(ID) where ID is the class id. +% Operates in random state DCG. + + +%% crp_sample_rm( +Classes:classes(A), +X:A, -C:class_id)// is det. +% +% Sample appropriate class from which to remove value X. C is the +% class id of the chosen class. +% Operates in random state DCG. + + +%% crp_dist( +GEM:gem_model, +Classes:classes(A), +Base:dist(A), -Dist:dist(A))// is det. +% +% Get posterior distribution associated with node using stick breaking method. +% Operates in random state DCG. +crp_dist( dp(Alpha), classes(_,Counts,Values), Base, Dist, RS1, RS3) :- + sumlist(Counts,Total), + Norm is Total+Alpha, + + ( Total>0 + -> dirichlet(Counts,Probs1, RS1, RS2), + lazy_dp(Alpha,Base,Alpha,ValuesT,ProbsT, RS2, RS3), + maplist(mul(Total),Probs1,Probs2), + append(Probs2,ProbsT,ProbsA), + append(Values,ValuesT,ValuesA), + Dist=lazy_discrete(ValuesA,ProbsA,Norm) + ; lazy_dp(Alpha, Base, 1, ValuesT, ProbsT, RS1, RS3), + Dist=lazy_discrete(ValuesT,ProbsT,1) + ). + + +% -------------------------------------------------------------------------------- +% classes data structure (basic CRP stuff) + +user:portray(classes(_,Counts,Vals)) :- format('<crp|~p:~p>',[Counts,Vals]). + + +%% empty_classes( -Classes:classes(_)) is det. +% +% Unify Classes with an empty classes structure. +empty_classes(classes(0,[],[])). + + +%% classes_value( +Classes:classes(A), +X:A) is semidet. +%% classes_value( +Classes:classes(A), -X:A) is multi. +% +% Check that X is one of the values represented in Classes. +% If X is unbound on entry, it is unified with all values on backtracking. +classes_value(classes(_,_,Vals),X) :- member(X,Vals). + + +%% classes_counts( +Classes:classes(A), -Counts:list(natural)) is det. +% +% Gets the list of counts, one per class. +classes_counts( classes(_,Counts,_), Counts). + +%% seqmap_classes( +P:pred(natural,A,T,T), +Classes:classes(A), +S1:T, -S2:T) is multi. +% +% Sequentiall apply phrase P to all classes. Arguments to P are the number of items +% in the class and the value (of type A) associated with it. +seqmap_classes(P, classes(_,Counts,Vals)) --> seqmap( P, Counts, Vals). + +user:goal_expansion(seqmap_classes(P,CX,S1,S2), (CX=classes(_,Counts,Vals), seqmap(P, Counts,Vals,S1,S2))). + +%% dec_class( +ID:class_id, -C:natural, -X:A, +C1:classes(A), -C2:classes(A)) is det. +% +% Decrement count associated with class id N. C is the count after +% decrementing and X is the value associated with the Nth class. +dec_class(N,CI,X,classes(K,C1,V),classes(K,C2,V)) :- dec_nth(N,_,CI,C1,C2), nth1(N,V,X). +dec_nth(1,X,Y,[X|T],[Y|T]) :- succ(Y,X). +dec_nth(N,A,B,[X|T1],[X|T2]) :- succ(M,N), dec_nth(M,A,B,T1,T2). + +%% inc_class( +ID:class_id, +C1:classes(A), -C2:classes(A)) is det. +% +% Increment count associated with class N. +inc_class(C,classes(K,C1,V),classes(K,C2,V)) :- inc_nth(C,C1,C2). +inc_nth(1,[X|T],[Y|T]) :- succ(X,Y). +inc_nth(N,[X|T1],[X|T2]) :- succ(M,N), inc_nth(M,T1,T2). + + +%% remove_class( +ID:class_id, +C1:classes(A), -C2:classes(A)) is det. +% +% Removes class N. +remove_class(I,classes(K1,C1,V1),classes(K2,C2,V2)) :- + remove_from_list(I,_,C1,C2), + remove_from_list(I,_,V1,V2), + succ(K2,K1). + +%% add_class( +X:A, -ID:class_id, +C1:classes(A), -C2:classes(A)) is det. +% +% Add a class associated with value X. N is the id of the new class. +add_class(X,K2,classes(K1,C1,V1),classes(K2,C2,V2)) :- + succ(K1,K2), + append(C1,[1],C2), + append(V1,[X],V2). + + +remove_from_list(1,X,[X|T],T). +remove_from_list(N,X,[Y|T1],[Y|T2]) :- + ( var(N) + -> remove_from_list(M,X,T1,T2), succ(M,N) + ; succ(M,N), remove_from_list(M,X,T1,T2) + ). + + +%------------------------------------------------------------------ +% Get posterior distribution at node using stick-breaking +% construction. + +lazy_dp(A,H,P0,Vals,Probs) --> + spawn(S0), { lazy_unfold(unfold_dp(A,H),Vals,Probs,(P0,S0),_) }. + +lazy_dp_paired(A,H,P0,ValsProbs) --> + spawn(S0), { lazy_unfold(unfold_dp(A,H),ValsProbs,(P0,S0),_) }. + +unfold_dp(A,H,V,X) --> \> call(H,V), unfold_gem(A,X). +unfold_dp(A,H,V:X) --> \> call(H,V), unfold_gem(A,X). + +% lazy_gem(A,Probs) --> spawn(S0), { lazy_unfold(unfold_gem(A),Probs,(1,S0),_) }. + +unfold_gem(A,X) --> + \> beta(1,A,P), + \< trans(P0,P1), + { X is P*P0, P1 is P0-X }. + +%% classes_update( +Action:action(A), +C1:classes(A), -C2:classes(A)) is det. +% +% Update classes structure with a new observation. +classes_update(old(_,ID),C1,C2) :- inc_class(ID,C1,C2). +classes_update(new(X,ID),C1,C2) :- add_class(X,ID,C1,C2). + + + + +% PARAMETER SAMPLING + + + +% --------------------------------------------------------------- +% Initialisers +% Samplers written in C. + +%% dp_sampler_teh( +Prior:gamma_prior, +Counts:list(natural), -S:param_sampler) is det. +% +% Prepares a predicate for sampling the concentration parameter of a Dirichlet process. +% The sampler's =|gem_prior|= arguments must be of the form =|dp(_)|=. +dp_sampler_teh( gamma(A,B), CX, crp:sample_dp_teh(ApSumKX,B,NX)) :- + maplist(sumlist,CX,NX), + maplist(length,CX,KX), + sumlist(KX,SumKX), + ApSumKX is A+SumKX. + +%% py_sampler_teh( +ThPrior:gamma_prior, +DiscPr:beta_prior, +Counts:list(natural), -S:param_sampler) is det. +% +% Prepares a predicate for sampling the concentration and discount +% parameters of a Pitman-Yor process. +% The sampler's =|gem_prior|= arguments must be of the form =|dp(_)|=. +py_sampler_teh( ThPrior, DiscPrior, CountsX, crp:Sampler) :- + Sampler = sample_py_teh( ThPrior, DiscPrior, CountsX). + +/* +slow_sample_py_teh( gamma(A,B), beta(DA,DB), CountsX, py(Theta1,Disc1), py(Theta2,Disc2)) --> + % do several lots of sampling auxillary variables, one per client node + % seqmap( py_sample_s_z_w(Theta1,Disc1), CountsX, SX, NSX, ZX, WX), + seqmap( py_sample_s_z_log_w(Theta1,Disc1), CountsX, SX, NSX, ZX, LogWX), + { % maplist(log,WX,LogWX), + sumlist(SX,SumSX), + sumlist(NSX,SumNSX), + sumlist(ZX,SumZX), + sumlist(LogWX,SumLogWX), + A1 is A+SumSX, B1 is B-SumLogWX, + DA1 is DA+SumNSX, DB1 is DB+SumZX }, + gamma(A1, B1, Theta2), + beta(DA1, DB1, Disc2). + +py_sample_s_z_w(Theta,Disc,Counts,S,NS,Z,W) --> + py_sample_bern_z(Disc,Counts,Z), + py_sample_bern_s(Theta,Disc,Counts,S,NS), + py_sample_beta_w(Theta,Counts,W). + +py_sample_s_z_log_w(Theta,Disc,Counts,S,NS,Z,LogW) --> + py_sample_bern_z(Disc,Counts,Z), + py_sample_bern_s(Theta,Disc,Counts,S,NS), + py_sample_beta_log_w(Theta,Counts,LogW). + +py_sample_beta_w(_, [], 1) --> !. +py_sample_beta_w(Theta, Counts, W) --> + {sumlist(Counts,N), Th1 is Theta+1, N1 is N-1}, + beta( Th1, N1, W). + +py_sample_beta_log_w(_, [], 0) --> !. +py_sample_beta_log_w(Theta, Counts, LogW) --> + {sumlist(Counts,N), Th1 is Theta+1, N1 is N-1}, + beta( Th1, N1, W), { LogW is log(W) }. + +py_sample_bern_s(Theta,Disc,Counts,SumS,SumNS) --> + ( {Counts=[_|Cm1], length(Cm1,Kminus1), numlist(1,Kminus1,KX)} + -> {maplist(mul(Disc),KX,KDX)}, + sum_bernoulli(KDX, Theta, SumS), + {SumNS is Kminus1 - SumS} + ; {SumS=0,SumNS=0} + ). + +py_sample_bern_z(Disc,Counts,Z) --> + {Disc1 is 1-Disc}, + seqmap( sample_bern_z(Disc1), Counts, ZX), + {sumlist(ZX,Z)}. + +sample_bern_z(Disc1,Count,SumZ) --> + {CountM2 is Count-2}, + ( {CountM2<0} -> {SumZ=0} + ; {numlist(0,CountM2,I)}, + sum_bernoulli(I, Disc1, SumZ) + ). + +sum_bernoulli(AX,B,T,S1,S2) :- sum_bernoulli(AX,B,0,T,S1,S2). +sum_bernoulli([],_,T,T,S,S) :- !. +sum_bernoulli([A|AX],B,T1,T3,S1,S3) :- + bernoulli(A,B,X,S1,S2), T2 is T1+X, + sum_bernoulli(AX,B,T2,T3,S2,S3). + +% Gamma distribution with rate parameter B. +:- procedure gamma(1,1). +gamma(A,B,X) --> gamma(A,U), {X is U/B}. + +% Bernoulli with unnormalised weights for 0 and 1. +:- procedure bernoulli(1,1). +bernoulli(A,B,X) --> + uniform01(U), + ({(A+B)*U<B} -> {X=1}; {X=0} ). +*/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plutils.c Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,118 @@ +/* + * Prolog-C utilities + * Samer Abdallah (2009) + */ + +//#define _USE_MATH_DEFINES 1 +//#include "plutils.h" + +//#include <math.h> +//#include <float.h> + +// throws a Prolog exception to signal type error +int type_error(term_t actual, const char *expected) +{ + term_t ex = PL_new_term_ref(); + int rc = PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2, + PL_FUNCTOR_CHARS, "type_error", 2, + PL_CHARS, expected, + PL_TERM, actual, + PL_VARIABLE); + + return rc && PL_raise_exception(ex); +} + +double sum_array(double *p, int n) { + double tot=0; + int i; + for (i=0; i<n; i++) tot+=*p++; + return tot; +} + +int memory_error(size_t amount) +{ + term_t ex = PL_new_term_ref(); + int rc = PL_unify_term(ex, PL_FUNCTOR_CHARS, "error", 2, + PL_FUNCTOR_CHARS, "memory_error", 1, + PL_INTEGER, amount, + PL_VARIABLE); + + return rc && PL_raise_exception(ex); +} + +// extract double from Prolog float +int get_double(term_t term, double *p) +{ + if (PL_get_float(term, p)) return TRUE; + else return type_error(term, "float"); +} + +// extract long from Prolog integer +int get_long(term_t term, long *p) +{ + if (PL_get_long(term, p)) return TRUE; + else return type_error(term, "integer"); +} + + +// unify Prolog list of floats with array of doubles +int unify_list_doubles(term_t list, double *x, int n) +{ + int i; + list=PL_copy_term_ref(list); + + for (i=0; i<n; i++) { + term_t head=PL_new_term_ref(); + term_t tail=PL_new_term_ref(); + if (!PL_unify_list(list,head,tail)) PL_fail; + if (!PL_unify_float(head,x[i])) PL_fail; + list=tail; + } + return PL_unify_nil(list); +} + +// read list of floats from term and write to double array +int get_list_doubles(term_t list, double *vals, int len) +{ + term_t head=PL_new_term_ref(); + int n; + + // copy term ref so as not to modify original + list=PL_copy_term_ref(list); + for (n=0; n<len && PL_get_list(list,head,list);n++) { + if (!PL_get_float(head,&vals[n])) return FALSE; + } + if (!PL_get_nil(list)) return FALSE; + return TRUE; +} + +int unify_args_doubles(term_t state, double *p, int n) +{ + term_t arg = PL_new_term_ref(); + int i; + + for (i=0; i<n; i++) { + if (!PL_put_float(arg,p[i])) PL_fail; + if (!PL_unify_arg(i+1,state,arg)) PL_fail; + } + return TRUE; +} + +int get_args_doubles(term_t state, double *p, int n) +{ + term_t arg = PL_new_term_ref(); + int i; + + for (i=0; i<n; i++) { + _PL_get_arg(i+1, state, arg); + if (!get_double(arg, p+i)) return FALSE; + } + return TRUE; +} + +int alloc_array(size_t N, size_t SZ, void **PP) { + *PP=calloc(N,SZ); + if (*PP) return TRUE; + else return memory_error(N*SZ); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rndutils.c Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,289 @@ +/* + * Pseudorandom generators for SWI Prolog + * Samer Abdallah (2009) + * Some samplers adapted from code in Tom Minka's lightspeed Matlab toolbox. + * Other bits of code culled from SWI Prolog distribution random.c + * + * To do: + * work out lower bound for skew stable distributions + * stream splitting with jumps + * reduce blob copying + * fast discrete distributions + * ziggurat for normals + */ + +#define _USE_MATH_DEFINES 1 + +#include <math.h> +#include <float.h> +#include "rndutils.h" + +#if HAVE_GSL +#include <gsl/gsl_sf_zeta.h> +#else +double gsl_sf_zeta(const double s) { return NAN; } +double gsl_sf_hzeta(const double s, const double k) { return NAN; } +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + + +// ----------------------------------------------------- + +// Returns a sample from Normal(0,1) +double Normal(RndState *S) +{ + double x=S->prev_normal; + + if(!isnan(x)) { + S->prev_normal=NAN; + } else { + double y,radius; + + /* Generate a random point inside the unit circle */ + do { + x = 2*Uniform(S)-1; + y = 2*Uniform(S)-1; + radius = (x*x)+(y*y); + } while((radius >= 1.0) || (radius == 0.0)); + /* Box-Muller formula */ + radius = sqrt(-2*log(radius)/radius); + x *= radius; + y *= radius; + S->prev_normal=y; + } + return x; +} + +/* Returns a sample from Gamma(a, 1). + * For Gamma(a,b), scale the result by b. + */ +double Gamma(RndState *S, double a) +{ + /* Algorithm: + * G. Marsaglia and W.W. Tsang, A simple method for generating gamma + * variables, ACM Transactions on Mathematical Software, Vol. 26, No. 3, + * Pages 363-372, September, 2000. + * http://portal.acm.org/citation.cfm?id=358414 + */ + double boost, d, c, v; + if(a < 1) { + /* boost using Marsaglia's (1961) method: gam(a) = gam(a+1)*U^(1/a) */ + boost = exp(log(Uniform(S))/a); + a++; + } + else boost = 1; + d = a-1.0/3; c = 1.0/sqrt(9*d); + while(1) { + double x,u; + do { + x = Normal(S); + v = 1+c*x; + } while(v <= 0); + v = v*v*v; + x = x*x; + u = Uniform(S); + if((u < 1-.0331*x*x) || + (log(u) < 0.5*x + d*(1-v+log(v)))) break; + } + return( boost*d*v ); +} + +/* Returns a sample from Dirichlet(n,a) where n is dimensionality */ +int Dirichlet(RndState *S, long n, double *a, double *x) +{ + long i; + double tot=0, z; + for (i=0; i<n; i++) { z=Gamma(S,a[i]); tot+=z; x[i]=z; } + for (i=0; i<n; i++) { x[i]/=tot; } + return 1; +} + +/* Returns a sample from Beta(a,b) */ +double Beta(RndState *S, double a, double b) +{ + double g = Gamma(S,a); + return g/(g + Gamma(S,b)); +} + +double Hyperbolic(RndState *S, double a) +{ + return floor(pow(1-Uniform(S),-1/a)); +} + +/* Sample from zeta distribution + * + * This is an inverse CDF method. It works by using an + * approximation of the Hurwitz Zeta function and then + * walking left or right until we get to the correct point. + * The approximation is good for large values so usually we + * only have to walk left or right a few steps for small + * values. + */ +double Zeta(RndState *S, double s) +{ + double v=(1-Uniform(S))*gsl_sf_zeta(s); + double k0, k=round(pow(v*(s-1),1/(1-s))); // approx inverse of hzeta + double zk0, zk=gsl_sf_hzeta(s,k); + + if (zk>=v) { + do { + k0=k; zk0=zk; + k=k0+1; zk=zk0-pow(k0,-s); + } while (zk>=v && zk!=zk0); + return k0; + } else { + do { + k0=k; zk0=zk; + k=k0-1; zk=zk0+pow(k,-s); + } while (zk<v && zk!=zk0); + return k; + } +} + +/* Very fast binomial sampler. + * Returns the number of successes out of n trials, with success probability p. + */ +int Binomial(RndState *S, double p, int n) +{ + int r = 0; + if(isnan(p)) return 0; + if(p < DBL_EPSILON) return 0; + if(p >= 1-DBL_EPSILON) return n; + if((p > 0.5) && (n < 15)) { + /* Coin flip method. This takes O(n) time. */ + int i; + for(i=0;i<n;i++) { + if(Uniform(S) < p) r++; + } + return r; + } + if(n*p < 10) { + /* Waiting time method. This takes O(np) time. */ + double q = -log(1-p), e = -log(Uniform(S)), s; + r = n; + for(s = e/r; s <= q; s += e/r) { + r--; + if(r == 0) break; + e = -log(Uniform(S)); + } + r = n-r; + return r; + } + if (1) { + /* Recursive method. This makes O(log(log(n))) recursive calls. */ + int i = (int)floor(p*(n+1)); + double b = Beta(S,i, n+1-i); + if(b <= p) r = i + Binomial(S,(p-b)/(1-b), n-i); + else r = i - 1 - Binomial(S,(b-p)/b, i-1); + return r; + } +} + +double Exponential(RndState *S) { return -log(1-Uniform(S)); } + +long Poisson(RndState *S, double lambda) +{ + long r; + if (lambda>=15) { + double m=floor(lambda*7/8); + double x=Gamma(S,m); + + if (x>lambda) r=Binomial(S,lambda/x,m-1); + else r=m+Poisson(S,lambda-x); + } else { + double p; + for (p=0, r=-1; p<lambda; p+=Exponential(S)) r++; + } + return r; +} + +/* Returns a sample from stable distribution */ +double Stable(RndState *S, int param, double alpha, double beta) +{ + double X, theta=M_PI*(Uniform(S)-0.5); + + // These methods come from John Nolan's book about + // stable distributions. They return standardised stable random + // numbers in the S(alpha,beta;0) parameterisation + + if (beta==0) { + if (alpha==1) { + X=tan(theta); + } else { + double W=Exponential(S); + X=sin(alpha*theta)/cos(theta)*pow(cos((alpha-1)*theta)/(W*cos(theta)),1/alpha-1); + } + } else { + double W=Exponential(S); + double zeta=beta*tan(alpha*M_PI/2); + if (alpha==1) { + double b=2*beta/M_PI; + double bt=1+b*theta; + X=bt*tan(theta) - b*log(W*cos(theta)/bt); + } else { + double ath = alpha*theta; + double a1th = theta-ath; + X = ((sin(ath) + zeta*cos(ath))/cos(theta)) + * pow((cos(a1th) + zeta*sin(a1th))/(W*cos(theta)),1/alpha-1); + if (param==0) X=X-zeta; + } + } + return X; +} + +/* Returns a sample from discrete distribution */ +long Discrete(RndState *S, long n, double *p, double tot) +{ + double u; + long i; + + for (i=0, u=tot*Uniform(S)-p[0]; u>=0; u-=p[++i]); + return i; +} + +double Uniform(RndState *S0) +{ + return RngStream_Double(&S0->g,&S0->g); +} + +double Raw(RndState *S0) +{ + return RngStream_Raw(&S0->g,&S0->g); +} + +void InitRndState(RndState *S) +{ + RngStream_InitState(&S->g); + S->prev_normal=NAN; +} + + +int spawn_gen(RndState *S0, RndState *S1) +{ + unsigned seeds[6]; + int i; + + for (i=0; i<6; i++) seeds[i]=(unsigned)RngStream_Raw(&S0->g,&S0->g); + RngStream_SetState(&S1->g,seeds); + for (i=0; i<3; i++) RngStream_Raw(&S1->g,&S1->g); + S1->prev_normal=NAN; + return 1; +} + +int randomise_from(FILE *p, RndState *S) +{ + unsigned seeds[6]; + size_t r=fread((void *)seeds,sizeof(unsigned),6,p); + + if (r==6) { + int i; + if (RngStream_SetState(&S->g,seeds)<0) return 0; + for (i=0; i<3; i++) RngStream_Raw(&S->g,&S->g); + S->prev_normal=NAN; + return 1; + } else return 0; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rndutils.h Fri Jan 20 16:58:52 2012 +0000 @@ -0,0 +1,31 @@ +#include <stdio.h> +#include "RngStream.h" + +// random generator state +typedef struct { + struct rng_state g; + double prev_normal; +} RndState; + +typedef struct { + struct rng_jump j; + int e; +} RndJump; + +int randomise_from(FILE *p, RndState *S); +int spawn_gen(RndState *S0, RndState *S1); +void InitRndState(RndState *S); + +double Raw(RndState *); +double Uniform(RndState *); +double Normal(RndState *); +double Exponential(RndState *); +double Gamma(RndState *,double a); +long Poisson(RndState *,double a); +int Dirichlet(RndState *,long n, double *a, double *x); +double Beta(RndState *,double a, double b); +double Zeta(RndState *,double a); +int Binomial(RndState *,double p, int n); +double Stable(RndState *,int param,double a, double b); +long Discrete(RndState *, long n, double *p, double tot); +