annotate xthresh.c @ 302:74824093c1c4

Implement O((M+N) log(M+N)) duplicate key detection in batchinsert, rather than naive O(N^2). Note that I haven't measured the constants in those complexity expressions; I am anticipating that 40000 is a sufficiently large N for this to matter. Write a test case for duplicate keys, too. Use 0037, since no-one else seems to be writing tests, and everything is merged onto the trunk these days anyway.
author mas01cr
date Mon, 04 Aug 2008 10:00:34 +0000
parents 3be15407e814
children
rev   line source
mas01cr@280 1 #include <gsl/gsl_sf.h>
mas01cr@280 2 #include <stdio.h>
mas01cr@280 3 #include <stdlib.h>
mas01cr@280 4 #include <math.h>
mas01cr@280 5
mas01cr@280 6 int main(int argc, char *argv[]) {
mas01cr@280 7 if(argc != 4) {
mas01cr@280 8 fprintf(stderr, "Wrong number of arguments: %d\n", argc);
mas01cr@280 9 exit(1);
mas01cr@280 10 }
mas01cr@280 11
mas01cr@280 12 long int meanN = strtol(argv[1], NULL, 10);
mas01cr@280 13
mas01cr@280 14 double d = strtod(argv[2], NULL);
mas01cr@280 15 double sigma2 = strtod(argv[3], NULL);
mas01cr@280 16
mas01cr@280 17 double logw = (2 / d) * gsl_sf_log(-gsl_sf_log(0.99));
mas01cr@280 18 double logxthresh = gsl_sf_log(sigma2) + logw
mas01cr@280 19 - (2 / d) * gsl_sf_log(meanN)
mas01cr@280 20 - gsl_sf_log(d/2)
mas01cr@280 21 - (2 / d) * gsl_sf_log(2 / d)
mas01cr@280 22 + (2 / d) * gsl_sf_lngamma(d / 2);
mas01cr@280 23
mas01cr@280 24 printf("w: %f\n", exp(logw));
mas01cr@280 25 printf("x_thresh: %f\n", exp(logxthresh));
mas01cr@280 26 exit(0);
mas01cr@280 27 }