changeset 285:781b129925ff

test for lshlib with point-set generator independent of normalization so that statistics for (P1,P2,R,cR)-sensitivity to k,L and R can be gathered.
author mas01mc
date Mon, 14 Jul 2008 21:50:47 +0000
parents cacad987d785
children fb8bec5c604e
files tests/pointset_test/genpoints2.c tests/pointset_test/lshL.m tests/pointset_test/lshP2.m tests/pointset_test/lshRho.m tests/pointset_test/run-test.sh
diffstat 5 files changed, 156 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pointset_test/genpoints2.c	Mon Jul 14 21:50:47 2008 +0000
@@ -0,0 +1,102 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+double randn();
+double randbl();
+
+/* genpoints count radius^2 */
+int main(int argc, char *argv[]) {
+  if (argc < 3) {
+    fprintf(stderr, "usage: %s count radius^2 [dim]\n", argv[0]);
+    exit(1);
+  }
+  long int count = strtol(argv[1], NULL, 0);
+  double rsquared = strtod(argv[2], NULL);
+  long int dim = 3;
+  if(argc > 3)
+    dim = strtol(argv[3], NULL, 0);
+  
+  // Generate *count* Gaussian Random vectors in R^*dim*
+  // sitting on the *rdashed*-sphere
+
+  srandom(time());
+
+  int i,j;
+  for (i = 0; i < count + 1; i++) {
+    // Normed Gaussian random vectors are distributed uniformly on unit sphere
+    double* coords = malloc(dim * sizeof(double));
+    double nmsq = 0.0;
+
+    for (j = 0; j < dim; j++){
+      if(i < count)
+	coords[j] = randn();
+      else
+	coords[j] = 0.0;
+      nmsq += coords[j]*coords[j];
+    }
+
+    double nm2 = 0.0;
+    if(i < count){
+      nm2 = sqrt(rsquared/nmsq);
+      // Place on rdash-sphere
+      for (j = 0; j < dim; j++)
+	coords[j] *= nm2;
+    }
+    // Translate to (0,0,...,1)
+    coords[dim-1]+=1.0; 
+
+    // Recompute norm-squared
+    nmsq = 0.0;
+    for (j = 0; j < dim; j++){
+      nmsq += coords[j]*coords[j];
+    }
+
+    // Save last value to distance calulcation to query(0,0,...,1)
+    double nth = coords[dim-1];
+    // Output to ASCII terminal
+    printf("(");
+    for(j = 0; j < dim; j++)
+      printf("%8.3f ", coords[j]);
+    printf(") d = %8.3f\n", sqrt(nmsq - nth*nth + (nth-1)*(nth-1)));
+    
+
+    // Save single feature vector
+    char name[40];
+    if(i < count)
+      snprintf(name, 39, "testfeature%d", i);
+    else
+      snprintf(name, 39, "queryfeature");
+    /* assumes $PWD is right */
+    int fd = open(name, O_CREAT|O_TRUNC|O_WRONLY, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH);
+
+    write(fd, &dim, sizeof(int));
+    for(j = 0; j < dim; j++)
+      write(fd, coords + j, sizeof(double));
+    close(fd);
+
+    free(coords);
+  }
+  exit(0);
+}
+
+// Genereate U[0,1]
+double randbl(){
+  return (   (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
+}
+
+// Generate z ~ N(0,1)
+double randn(){
+// Box-Muller
+  double x1, x2;
+  do{
+    x1 = randbl();
+  } while (x1 == 0); // cannot take log of 0
+  x2 = randbl();
+  double z = sqrt(-2.0 * log(x1)) * cos(2.0 * M_PI * x2);
+  return z;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pointset_test/lshL.m	Mon Jul 14 21:50:47 2008 +0000
@@ -0,0 +1,9 @@
+function L = lshL(w,k,delta)
+if nargin<3, delta=0.01;end
+if nargin<2, k=10;end
+if nargin<1, w=4;end
+
+P1=lshP2(w,1);
+L = ceil(log(1/delta)/-log(1-P1^k));
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pointset_test/lshP2.m	Mon Jul 14 21:50:47 2008 +0000
@@ -0,0 +1,7 @@
+function P2 = lshP2(w,c)
+if nargin<2, c=1;end
+if nargin<1, w=4;end
+
+P2 = 1 - 2*normal_cdf(-w/c) - 2/(sqrt(2*pi)*(w/c)) * ( 1-exp(-w^2/(2*c^2)) );
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pointset_test/lshRho.m	Mon Jul 14 21:50:47 2008 +0000
@@ -0,0 +1,10 @@
+function Rho = lshRho(w,c)
+if nargin<2, c=1;end
+if nargin<1, w=4;end
+
+P1 = lshP2(w,1);
+P2 = lshP2(w,c);
+
+Rho = log(1/P1) / log(1/P2);
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/pointset_test/run-test.sh	Mon Jul 14 21:50:47 2008 +0000
@@ -0,0 +1,28 @@
+#! /bin/bash
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 1 > testpower
+floatstring -1 >> testpower
+
+${AUDIODB} -d testdb -P
+
+
+for i in rad[0-9][0-9]/*
+do
+${AUDIODB} -d testdb -I -f $i -w testpower
+done
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+rm -f testdb.lsh.*
+
+${AUDIODB} -d testdb -X -R 1 -l 1 --lsh_N 10000 --lsh_b 10000 --lsh_k 10 --lsh_m 5 --absolute-threshold -10
+
+${AUDIODB} -d testdb -Q sequence -R 1 -l 1 -f testfeature -w testpower --absolute-threshold -10 -e
+