changeset 278:d9dba57becd4 sampling

Use the GNU Scientific Library implementation of MT19937 for generating random numbers. Answer is sufficiently similar to that from random() on Linux that I believe I've translated things right.
author mas01cr
date Tue, 01 Jul 2008 22:17:33 +0000
parents 5c34b71c5ffa
children dee55886eca0
files audioDB.h sample.cpp
diffstat 2 files changed, 13 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.h	Tue Jul 01 09:00:29 2008 +0000
+++ b/audioDB.h	Tue Jul 01 22:17:33 2008 +0000
@@ -12,6 +12,7 @@
 #include <assert.h>
 #include <float.h>
 #include <signal.h>
+#include <gsl/gsl_rng.h>
 
 // includes for web services
 #include "soapH.h"
@@ -250,7 +251,7 @@
   void batchinsert(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
-  unsigned random_track(unsigned *propTable, unsigned total);
+  unsigned random_track(unsigned *propTable, unsigned total, gsl_rng *);
   void sample(const char *dbName);
   void ws_status(const char*dbName, char* hostport);
   void ws_query(const char*dbName, const char *trackKey, const char* hostport);
--- a/sample.cpp	Tue Jul 01 09:00:29 2008 +0000
+++ b/sample.cpp	Tue Jul 01 22:17:33 2008 +0000
@@ -1,6 +1,7 @@
 #include "audioDB.h"
 
 #include <gsl/gsl_sf.h>
+#include <gsl/gsl_rng.h>
 
 static
 double yfun(double d) {
@@ -34,12 +35,11 @@
   return c;
 }
 
-unsigned audioDB::random_track(unsigned *propTable, unsigned total) {
+unsigned audioDB::random_track(unsigned *propTable, unsigned total, gsl_rng *rng) {
   /* FIXME: make this O(1) by using the alias-rejection method, or
      some other sensible method of sampling from a discrete
      distribution. */
-  /* FIXME: use a real random number generator, not random() */
-  double thing = random() / (double) RAND_MAX;
+  double thing = gsl_rng_uniform(rng);
   unsigned sofar = 0;
   for (unsigned int i = 0; i < dbH->numFiles; i++) {
     sofar += propTable[i];
@@ -56,6 +56,8 @@
 void audioDB::sample(const char *dbName) {
   initTables(dbName, 0);
 
+  gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
   /* FIXME: in Real Life we'll want to initialize the RNG using
      /dev/random or the current time or something, like this:
 
@@ -63,8 +65,8 @@
      int fd = open("/dev/urandom", O_RDONLY);
      read(fd, &seed, 4);
      
-     srandom(seed);
-  */ 
+     gsl_rng_set(rng, seed);
+  */
 
   // build track offset table (FIXME: cut'n'pasted from query.cpp)
   off_t *trackOffsetTable = new off_t[dbH->numFiles];
@@ -102,17 +104,14 @@
   double sumlogdist = 0;
 
   for (unsigned int i = 0; i < nsamples;) {
-    unsigned track1 = random_track(propTable, total);
-    unsigned track2 = random_track(propTable, total);
+    unsigned track1 = random_track(propTable, total, rng);
+    unsigned track2 = random_track(propTable, total, rng);
 
     if(track1 == track2)
       continue;
 
-    /* FIXME: this uses lower-order bits, which is OK on Linux but not
-       necessarily elsewhere.  Again, use a real random number
-       generator */
-    unsigned i1 = random() % propTable[track1];
-    unsigned i2 = random() % propTable[track2];
+    unsigned i1 = gsl_rng_uniform_int(rng, propTable[track1]);
+    unsigned i2 = gsl_rng_uniform_int(rng, propTable[track2]);
 
     VERB_LOG(1, "%d %d, %d %d | ", track1, i1, track2, i2);