Mercurial > hg > audiodb
comparison sample.cpp @ 270:9636040ff503 sampling
Parametrize nsamples (though not on the command-line yet)
Compute the mean number of possible sequences in tracks, and display
that.
Compute the track xthreshold as in TASLP paper. (Not quite getting the
same answers, though).
author | mas01cr |
---|---|
date | Mon, 16 Jun 2008 11:59:43 +0000 |
parents | dcbb30790b30 |
children | 1f2c7d5e581c |
comparison
equal
deleted
inserted
replaced
269:dcbb30790b30 | 270:9636040ff503 |
---|---|
61 cumTrack += trackTable[k] * dbH->dim; | 61 cumTrack += trackTable[k] * dbH->dim; |
62 } | 62 } |
63 | 63 |
64 unsigned *propTable = new unsigned[dbH->numFiles]; | 64 unsigned *propTable = new unsigned[dbH->numFiles]; |
65 unsigned total = 0; | 65 unsigned total = 0; |
66 unsigned count = 0; | |
66 | 67 |
67 for (unsigned int i = 0; i < dbH->numFiles; i++) { | 68 for (unsigned int i = 0; i < dbH->numFiles; i++) { |
68 /* what kind of a stupid language doesn't have binary max(), let | 69 /* what kind of a stupid language doesn't have binary max(), let |
69 alone nary? */ | 70 alone nary? */ |
70 unsigned int prop = trackTable[i] - sequenceLength + 1; | 71 unsigned int prop = trackTable[i] - sequenceLength + 1; |
71 prop = prop > 0 ? prop : 0; | 72 prop = prop > 0 ? prop : 0; |
73 if (prop > 0) | |
74 count++; | |
72 propTable[i] = prop; | 75 propTable[i] = prop; |
73 total += prop; | 76 total += prop; |
74 } | 77 } |
75 | 78 |
76 if (total == 0) { | 79 if (total == 0) { |
83 double v1norm, v2norm, v1v2; | 86 double v1norm, v2norm, v1v2; |
84 | 87 |
85 double sumdist = 0; | 88 double sumdist = 0; |
86 double sumlogdist = 0; | 89 double sumlogdist = 0; |
87 | 90 |
88 /* 1037 samples for now */ | 91 unsigned int nsamples = 2049; |
89 for (unsigned int i = 0; i < 1037;) { | 92 |
93 for (unsigned int i = 0; i < nsamples;) { | |
90 /* FIXME: in Real Life we'll want to initialize the RNG using | 94 /* FIXME: in Real Life we'll want to initialize the RNG using |
91 /dev/random or the current time or something. */ | 95 /dev/random or the current time or something. */ |
92 unsigned track1 = random_track(propTable, total); | 96 unsigned track1 = random_track(propTable, total); |
93 unsigned track2 = random_track(propTable, total); | 97 unsigned track2 = random_track(propTable, total); |
94 | 98 |
135 } else { | 139 } else { |
136 VERB_LOG(1, "infinity found: %f %f %f\n", v1norm, v2norm, v1v2); | 140 VERB_LOG(1, "infinity found: %f %f %f\n", v1norm, v2norm, v1v2); |
137 } | 141 } |
138 } | 142 } |
139 | 143 |
140 double sigma2 = (sumdist / (sequenceLength * dbH->dim * 1037)); | 144 /* FIXME: the mean isn't really what we should be reporting here */ |
141 double d = 2 * yinv(log(sumdist/1037) - sumlogdist/1037); | 145 unsigned meanN = total / count; |
146 | |
147 double sigma2 = sumdist / (sequenceLength * dbH->dim * nsamples); | |
148 double d = 2 * yinv(log(sumdist/nsamples) - sumlogdist/nsamples); | |
142 | 149 |
143 std::cout << "Summary statistics" << std::endl; | 150 std::cout << "Summary statistics" << std::endl; |
144 std::cout << "number of samples: " << 1037 << std::endl; | 151 std::cout << "number of samples: " << nsamples << std::endl; |
145 std::cout << "sum of distances (S): " << sumdist << std::endl; | 152 std::cout << "sum of distances (S): " << sumdist << std::endl; |
146 std::cout << "sum of log distances (L): " << sumlogdist << std::endl; | 153 std::cout << "sum of log distances (L): " << sumlogdist << std::endl; |
154 std::cout << "mean number of applicable sequences (N): " << meanN << std::endl; | |
147 std::cout << std::endl; | 155 std::cout << std::endl; |
148 std::cout << "Estimated parameters" << std::endl; | 156 std::cout << "Estimated parameters" << std::endl; |
149 std::cout << "sigma^2: " << sigma2 << std::endl; | 157 std::cout << "sigma^2: " << sigma2 << std::endl; |
150 std::cout << "d: " << d << std::endl; | 158 std::cout << "d: " << d << std::endl; |
151 std::cout << "check: " << yfun(d/2) << std::endl; | 159 |
160 double logw = (2 / d) * gsl_sf_log(-gsl_sf_log(0.99)); | |
161 double logxthresh = gsl_sf_log(sumdist / nsamples) + logw | |
162 - (2 / d) * gsl_sf_log(meanN) | |
163 - gsl_sf_log(d/2) | |
164 - (2 / d) * gsl_sf_log(2 / d) | |
165 + (2 / d) * gsl_sf_lngamma(d / 2); | |
166 | |
167 std::cout << "track xthresh: " << exp(logxthresh) << std::endl; | |
152 | 168 |
153 /* FIXME: we'll also want some summary statistics based on | 169 /* FIXME: we'll also want some summary statistics based on |
154 propTable, for the minimum-of-X estimate */ | 170 propTable, for the minimum-of-X estimate */ |
155 | 171 |
156 delete[] propTable; | 172 delete[] propTable; |