Mercurial > hg > audiodb
changeset 285:781b129925ff
test for lshlib with point-set generator independent of normalization so that statistics for (P1,P2,R,cR)-sensitivity to k,L and R can be gathered.
author | mas01mc |
---|---|
date | Mon, 14 Jul 2008 21:50:47 +0000 |
parents | cacad987d785 |
children | fb8bec5c604e |
files | tests/pointset_test/genpoints2.c tests/pointset_test/lshL.m tests/pointset_test/lshP2.m tests/pointset_test/lshRho.m tests/pointset_test/run-test.sh |
diffstat | 5 files changed, 156 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/pointset_test/genpoints2.c Mon Jul 14 21:50:47 2008 +0000 @@ -0,0 +1,102 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <math.h> +#include <stdlib.h> +#include <stdio.h> + +double randn(); +double randbl(); + +/* genpoints count radius^2 */ +int main(int argc, char *argv[]) { + if (argc < 3) { + fprintf(stderr, "usage: %s count radius^2 [dim]\n", argv[0]); + exit(1); + } + long int count = strtol(argv[1], NULL, 0); + double rsquared = strtod(argv[2], NULL); + long int dim = 3; + if(argc > 3) + dim = strtol(argv[3], NULL, 0); + + // Generate *count* Gaussian Random vectors in R^*dim* + // sitting on the *rdashed*-sphere + + srandom(time()); + + int i,j; + for (i = 0; i < count + 1; i++) { + // Normed Gaussian random vectors are distributed uniformly on unit sphere + double* coords = malloc(dim * sizeof(double)); + double nmsq = 0.0; + + for (j = 0; j < dim; j++){ + if(i < count) + coords[j] = randn(); + else + coords[j] = 0.0; + nmsq += coords[j]*coords[j]; + } + + double nm2 = 0.0; + if(i < count){ + nm2 = sqrt(rsquared/nmsq); + // Place on rdash-sphere + for (j = 0; j < dim; j++) + coords[j] *= nm2; + } + // Translate to (0,0,...,1) + coords[dim-1]+=1.0; + + // Recompute norm-squared + nmsq = 0.0; + for (j = 0; j < dim; j++){ + nmsq += coords[j]*coords[j]; + } + + // Save last value to distance calulcation to query(0,0,...,1) + double nth = coords[dim-1]; + // Output to ASCII terminal + printf("("); + for(j = 0; j < dim; j++) + printf("%8.3f ", coords[j]); + printf(") d = %8.3f\n", sqrt(nmsq - nth*nth + (nth-1)*(nth-1))); + + + // Save single feature vector + char name[40]; + if(i < count) + snprintf(name, 39, "testfeature%d", i); + else + snprintf(name, 39, "queryfeature"); + /* assumes $PWD is right */ + int fd = open(name, O_CREAT|O_TRUNC|O_WRONLY, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH); + + write(fd, &dim, sizeof(int)); + for(j = 0; j < dim; j++) + write(fd, coords + j, sizeof(double)); + close(fd); + + free(coords); + } + exit(0); +} + +// Genereate U[0,1] +double randbl(){ + return ( (double)rand() / ((double)(RAND_MAX)+(double)(1)) ); +} + +// Generate z ~ N(0,1) +double randn(){ +// Box-Muller + double x1, x2; + do{ + x1 = randbl(); + } while (x1 == 0); // cannot take log of 0 + x2 = randbl(); + double z = sqrt(-2.0 * log(x1)) * cos(2.0 * M_PI * x2); + return z; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/pointset_test/lshL.m Mon Jul 14 21:50:47 2008 +0000 @@ -0,0 +1,9 @@ +function L = lshL(w,k,delta) +if nargin<3, delta=0.01;end +if nargin<2, k=10;end +if nargin<1, w=4;end + +P1=lshP2(w,1); +L = ceil(log(1/delta)/-log(1-P1^k)); + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/pointset_test/lshP2.m Mon Jul 14 21:50:47 2008 +0000 @@ -0,0 +1,7 @@ +function P2 = lshP2(w,c) +if nargin<2, c=1;end +if nargin<1, w=4;end + +P2 = 1 - 2*normal_cdf(-w/c) - 2/(sqrt(2*pi)*(w/c)) * ( 1-exp(-w^2/(2*c^2)) ); + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/pointset_test/lshRho.m Mon Jul 14 21:50:47 2008 +0000 @@ -0,0 +1,10 @@ +function Rho = lshRho(w,c) +if nargin<2, c=1;end +if nargin<1, w=4;end + +P1 = lshP2(w,1); +P2 = lshP2(w,c); + +Rho = log(1/P1) / log(1/P2); + +endfunction
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/pointset_test/run-test.sh Mon Jul 14 21:50:47 2008 +0000 @@ -0,0 +1,28 @@ +#! /bin/bash + +. ../test-utils.sh + +if [ -f testdb ]; then rm -f testdb; fi + +${AUDIODB} -d testdb -N + +intstring 1 > testpower +floatstring -1 >> testpower + +${AUDIODB} -d testdb -P + + +for i in rad[0-9][0-9]/* +do +${AUDIODB} -d testdb -I -f $i -w testpower +done + +# sequence queries require L2NORM +${AUDIODB} -d testdb -L + +rm -f testdb.lsh.* + +${AUDIODB} -d testdb -X -R 1 -l 1 --lsh_N 10000 --lsh_b 10000 --lsh_k 10 --lsh_m 5 --absolute-threshold -10 + +${AUDIODB} -d testdb -Q sequence -R 1 -l 1 -f testfeature -w testpower --absolute-threshold -10 -e +