changeset 279:dee55886eca0 sampling

make the RNG a part of the audioDB object. Easier to deal with memory discipline and initialization (though note the FIXME comment in audioDB::initTables()). Also initialize the RNG from the current time. A mature implementation would use a proper source of entropy...
author mas01cr
date Wed, 02 Jul 2008 13:53:23 +0000
parents d9dba57becd4
children
files audioDB.cpp audioDB.h common.cpp sample.cpp
diffstat 4 files changed, 27 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/audioDB.cpp	Tue Jul 01 22:17:33 2008 +0000
+++ b/audioDB.cpp	Wed Jul 02 13:53:23 2008 +0000
@@ -97,6 +97,9 @@
   if(l2normTable)
     munmap(l2normTable, l2normTableLength);
 
+  if(rng)
+    gsl_rng_free(rng);
+
   if(dbfid>0)
     close(dbfid);
   if(infid>0)
--- a/audioDB.h	Tue Jul 01 22:17:33 2008 +0000
+++ b/audioDB.h	Wed Jul 02 13:53:23 2008 +0000
@@ -159,6 +159,8 @@
   char* indata;
   struct stat statbuf;  
   dbTableHeaderPtr dbH;
+
+  gsl_rng *rng;
   
   char *fileTable;
   unsigned* trackTable;
@@ -224,6 +226,7 @@
   void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp);
   void query_loop(const char* dbName, const char* inFile, Reporter *reporter);
 
+  void initRNG();
   void initDBHeader(const char *dbName);
   void initInputFile(const char *inFile);
   void initTables(const char* dbName, const char* inFile);
@@ -251,7 +254,7 @@
   void batchinsert(const char* dbName, const char* inFile);
   void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0);
   void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
-  unsigned random_track(unsigned *propTable, unsigned total, gsl_rng *);
+  unsigned random_track(unsigned *propTable, unsigned total);
   void sample(const char *dbName);
   void ws_status(const char*dbName, char* hostport);
   void ws_query(const char*dbName, const char *trackKey, const char* hostport);
@@ -285,6 +288,7 @@
   db(0), \
   indata(0), \
   dbH(0), \
+  rng(0), \
   fileTable(0), \
   trackTable(0), \
   dataBuf(0), \
--- a/common.cpp	Tue Jul 01 22:17:33 2008 +0000
+++ b/common.cpp	Wed Jul 02 13:53:23 2008 +0000
@@ -70,6 +70,15 @@
   }
 }
 
+void audioDB::initRNG() {
+  rng = gsl_rng_alloc(gsl_rng_mt19937);
+  if(!rng) {
+    error("could not allocate Random Number Generator");
+  }
+  /* FIXME: maybe we should use a real source of entropy? */
+  gsl_rng_set(rng, time(NULL));
+}
+
 void audioDB::initDBHeader(const char* dbName) {
   if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
     error("Can't open database file", dbName, "open");
@@ -179,6 +188,13 @@
 }
 
 void audioDB::initTables(const char* dbName, const char* inFile = 0) {
+  /* FIXME: initRNG() really logically belongs in the audioDB
+     contructor.  However, there are of the order of four constructors
+     at the moment, and more to come from API implementation.  Given
+     that duplication, I think this is the least worst place to put
+     it; the assumption is that nothing which doesn't look at a
+     database will need an RNG.  -- CSR, 2008-07-02 */
+  initRNG();
   initDBHeader(dbName);
   initInputFile(inFile);
 }
--- a/sample.cpp	Tue Jul 01 22:17:33 2008 +0000
+++ b/sample.cpp	Wed Jul 02 13:53:23 2008 +0000
@@ -35,7 +35,7 @@
   return c;
 }
 
-unsigned audioDB::random_track(unsigned *propTable, unsigned total, gsl_rng *rng) {
+unsigned audioDB::random_track(unsigned *propTable, unsigned total) {
   /* FIXME: make this O(1) by using the alias-rejection method, or
      some other sensible method of sampling from a discrete
      distribution. */
@@ -56,18 +56,6 @@
 void audioDB::sample(const char *dbName) {
   initTables(dbName, 0);
 
-  gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
-
-  /* FIXME: in Real Life we'll want to initialize the RNG using
-     /dev/random or the current time or something, like this:
-
-     unsigned int seed;
-     int fd = open("/dev/urandom", O_RDONLY);
-     read(fd, &seed, 4);
-     
-     gsl_rng_set(rng, seed);
-  */
-
   // build track offset table (FIXME: cut'n'pasted from query.cpp)
   off_t *trackOffsetTable = new off_t[dbH->numFiles];
   unsigned cumTrack=0;
@@ -104,8 +92,8 @@
   double sumlogdist = 0;
 
   for (unsigned int i = 0; i < nsamples;) {
-    unsigned track1 = random_track(propTable, total, rng);
-    unsigned track2 = random_track(propTable, total, rng);
+    unsigned track1 = random_track(propTable, total);
+    unsigned track2 = random_track(propTable, total);
 
     if(track1 == track2)
       continue;