Mercurial > hg > audiodb
changeset 271:1f2c7d5e581c sampling
Small cleanups.
Includes some commented-out code which would fix (in a hacky way) some
of the FIXME notes.
author | mas01cr |
---|---|
date | Mon, 16 Jun 2008 17:17:11 +0000 |
parents | 9636040ff503 |
children | 5d721f1ead01 |
files | sample.cpp |
diffstat | 1 files changed, 23 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/sample.cpp Mon Jun 16 11:59:43 2008 +0000 +++ b/sample.cpp Mon Jun 16 17:17:11 2008 +0000 @@ -53,6 +53,16 @@ void audioDB::sample(const char *dbName) { initTables(dbName, 0); + /* FIXME: in Real Life we'll want to initialize the RNG using + /dev/random or the current time or something, like this: + + unsigned int seed; + int fd = open("/dev/urandom", O_RDONLY); + read(fd, &seed, 4); + + srandom(seed); + */ + // build track offset table (FIXME: cut'n'pasted from query.cpp) off_t *trackOffsetTable = new off_t[dbH->numFiles]; unsigned cumTrack=0; @@ -88,14 +98,15 @@ double sumdist = 0; double sumlogdist = 0; - unsigned int nsamples = 2049; + unsigned int nsamples = 20490; for (unsigned int i = 0; i < nsamples;) { - /* FIXME: in Real Life we'll want to initialize the RNG using - /dev/random or the current time or something. */ unsigned track1 = random_track(propTable, total); unsigned track2 = random_track(propTable, total); + if(track1 == track2) + continue; + /* FIXME: this uses lower-order bits, which is OK on Linux but not necessarily elsewhere. Again, use a real random number generator */ @@ -130,8 +141,11 @@ VERB_LOG(1, "%f %f %f | ", v1norm, v2norm, v1v2); /* assume normalizedDistance == true for now */ /* FIXME: not convinced that the statistics we calculated in - TASLP paper are valid for normalizedDistance */ + TASLP paper are technically valid for normalizedDistance */ + double dist = 2 - 2 * v1v2 / sqrt(v1norm * v2norm); + // double dist = v1norm + v2norm - 2*v1v2; + VERB_LOG(1, "%f %f\n", dist, log(dist)); sumdist += dist; sumlogdist += log(dist); @@ -151,10 +165,14 @@ std::cout << "number of samples: " << nsamples << std::endl; std::cout << "sum of distances (S): " << sumdist << std::endl; std::cout << "sum of log distances (L): " << sumlogdist << std::endl; + + /* FIXME: we'll also want some more summary statistics based on + propTable, for the minimum-of-X estimate */ std::cout << "mean number of applicable sequences (N): " << meanN << std::endl; std::cout << std::endl; std::cout << "Estimated parameters" << std::endl; - std::cout << "sigma^2: " << sigma2 << std::endl; + std::cout << "sigma^2: " << sigma2 << "; "; + std::cout << "Msigma^2: " << sumdist / nsamples << std::endl; std::cout << "d: " << d << std::endl; double logw = (2 / d) * gsl_sf_log(-gsl_sf_log(0.99)); @@ -166,9 +184,6 @@ std::cout << "track xthresh: " << exp(logxthresh) << std::endl; - /* FIXME: we'll also want some summary statistics based on - propTable, for the minimum-of-X estimate */ - delete[] propTable; delete[] v1; delete[] v2;