comparison sample.cpp @ 278:d9dba57becd4 sampling

Use the GNU Scientific Library implementation of MT19937 for generating random numbers. Answer is sufficiently similar to that from random() on Linux that I believe I've translated things right.
author mas01cr
date Tue, 01 Jul 2008 22:17:33 +0000
parents 5c34b71c5ffa
children dee55886eca0
comparison
equal deleted inserted replaced
276:5c34b71c5ffa 278:d9dba57becd4
1 #include "audioDB.h" 1 #include "audioDB.h"
2 2
3 #include <gsl/gsl_sf.h> 3 #include <gsl/gsl_sf.h>
4 #include <gsl/gsl_rng.h>
4 5
5 static 6 static
6 double yfun(double d) { 7 double yfun(double d) {
7 return gsl_sf_log(d) - gsl_sf_psi(d); 8 return gsl_sf_log(d) - gsl_sf_psi(d);
8 } 9 }
32 } 33 }
33 34
34 return c; 35 return c;
35 } 36 }
36 37
37 unsigned audioDB::random_track(unsigned *propTable, unsigned total) { 38 unsigned audioDB::random_track(unsigned *propTable, unsigned total, gsl_rng *rng) {
38 /* FIXME: make this O(1) by using the alias-rejection method, or 39 /* FIXME: make this O(1) by using the alias-rejection method, or
39 some other sensible method of sampling from a discrete 40 some other sensible method of sampling from a discrete
40 distribution. */ 41 distribution. */
41 /* FIXME: use a real random number generator, not random() */ 42 double thing = gsl_rng_uniform(rng);
42 double thing = random() / (double) RAND_MAX;
43 unsigned sofar = 0; 43 unsigned sofar = 0;
44 for (unsigned int i = 0; i < dbH->numFiles; i++) { 44 for (unsigned int i = 0; i < dbH->numFiles; i++) {
45 sofar += propTable[i]; 45 sofar += propTable[i];
46 if (thing < ((double) sofar / (double) total)) { 46 if (thing < ((double) sofar / (double) total)) {
47 return i; 47 return i;
54 } 54 }
55 55
56 void audioDB::sample(const char *dbName) { 56 void audioDB::sample(const char *dbName) {
57 initTables(dbName, 0); 57 initTables(dbName, 0);
58 58
59 gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
60
59 /* FIXME: in Real Life we'll want to initialize the RNG using 61 /* FIXME: in Real Life we'll want to initialize the RNG using
60 /dev/random or the current time or something, like this: 62 /dev/random or the current time or something, like this:
61 63
62 unsigned int seed; 64 unsigned int seed;
63 int fd = open("/dev/urandom", O_RDONLY); 65 int fd = open("/dev/urandom", O_RDONLY);
64 read(fd, &seed, 4); 66 read(fd, &seed, 4);
65 67
66 srandom(seed); 68 gsl_rng_set(rng, seed);
67 */ 69 */
68 70
69 // build track offset table (FIXME: cut'n'pasted from query.cpp) 71 // build track offset table (FIXME: cut'n'pasted from query.cpp)
70 off_t *trackOffsetTable = new off_t[dbH->numFiles]; 72 off_t *trackOffsetTable = new off_t[dbH->numFiles];
71 unsigned cumTrack=0; 73 unsigned cumTrack=0;
72 for(unsigned int k = 0; k < dbH->numFiles; k++){ 74 for(unsigned int k = 0; k < dbH->numFiles; k++){
100 102
101 double sumdist = 0; 103 double sumdist = 0;
102 double sumlogdist = 0; 104 double sumlogdist = 0;
103 105
104 for (unsigned int i = 0; i < nsamples;) { 106 for (unsigned int i = 0; i < nsamples;) {
105 unsigned track1 = random_track(propTable, total); 107 unsigned track1 = random_track(propTable, total, rng);
106 unsigned track2 = random_track(propTable, total); 108 unsigned track2 = random_track(propTable, total, rng);
107 109
108 if(track1 == track2) 110 if(track1 == track2)
109 continue; 111 continue;
110 112
111 /* FIXME: this uses lower-order bits, which is OK on Linux but not 113 unsigned i1 = gsl_rng_uniform_int(rng, propTable[track1]);
112 necessarily elsewhere. Again, use a real random number 114 unsigned i2 = gsl_rng_uniform_int(rng, propTable[track2]);
113 generator */
114 unsigned i1 = random() % propTable[track1];
115 unsigned i2 = random() % propTable[track2];
116 115
117 VERB_LOG(1, "%d %d, %d %d | ", track1, i1, track2, i2); 116 VERB_LOG(1, "%d %d, %d %d | ", track1, i1, track2, i2);
118 117
119 /* FIXME: this seeking, reading and distance calculation should 118 /* FIXME: this seeking, reading and distance calculation should
120 share more code with the query loop */ 119 share more code with the query loop */