annotate lshlib.cpp @ 737:18974af682cf

* Tweaks to event strings for safety * Fixed mis-firing event in Bridge * Added a simple example * Added packaging
author mas01mj
date Thu, 16 Sep 2010 11:39:52 +0000
parents 828c1c4e25cc
children 9bd13c7819ae
rev   line source
mas01cr@589 1 #include <vector>
mas01cr@589 2 #include <queue>
mas01cr@589 3 #include <stdio.h>
mas01cr@589 4 #include <stdlib.h>
mas01cr@589 5 #include <sys/types.h>
mas01cr@589 6 #include <sys/stat.h>
mas01cr@648 7 #if defined(WIN32)
mas01cr@648 8 #include <sys/locking.h>
mas01cr@648 9 #include <io.h>
mas01cr@648 10 #include <windows.h>
mas01cr@648 11 #endif
mas01cr@589 12 #include <fcntl.h>
mas01cr@589 13 #include <string.h>
mas01cr@589 14 #include <iostream>
mas01cr@589 15 #include <fstream>
mas01cr@589 16 #include <math.h>
mas01cr@589 17 #include <sys/time.h>
mas01cr@589 18 #include <assert.h>
mas01cr@589 19 #include <float.h>
mas01cr@589 20 #include <signal.h>
mas01cr@589 21 #include <time.h>
mas01cr@589 22 #include <limits.h>
mas01cr@589 23 #include <errno.h>
mas01cr@589 24 #ifdef MT19937
mas01cr@589 25 #include "mt19937/mt19937ar.h"
mas01cr@589 26 #endif
mas01cr@589 27
mas01mc@292 28 #include "lshlib.h"
mas01mc@292 29
mas01cr@648 30 #define getpagesize() (64*1024)
mas01mc@292 31
mas01cr@648 32 Uns32T get_page_logn() {
mas01cr@648 33 int pagesz = (int) getpagesize();
mas01cr@648 34 return (Uns32T) log2((double) pagesz);
mas01mc@292 35 }
mas01mc@292 36
mas01mc@292 37 void H::error(const char* a, const char* b, const char *sysFunc) {
mas01mc@292 38 cerr << a << ": " << b << endl;
mas01mc@292 39 if (sysFunc) {
mas01mc@292 40 perror(sysFunc);
mas01mc@292 41 }
mas01mc@292 42 exit(1);
mas01mc@292 43 }
mas01mc@292 44
mas01mc@293 45 H::H(){
mas01mc@293 46 // Delay initialization of lsh functions until we know the parameters
mas01mc@293 47 }
mas01mc@293 48
mas01mc@293 49 H::H(Uns32T kk, Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float ww, float rr):
mas01mc@292 50 #ifdef USE_U_FUNCTIONS
mas01mc@292 51 use_u_functions(true),
mas01mc@292 52 #else
mas01mc@292 53 use_u_functions(false),
mas01mc@292 54 #endif
mas01mc@293 55 maxp(0),
mas01mc@292 56 bucketCount(0),
mas01mc@292 57 pointCount(0),
mas01mc@292 58 N(NN),
mas01mc@292 59 C(CC),
mas01mc@292 60 k(kk),
mas01mc@292 61 m(mm),
mas01mc@293 62 L((mm*(mm-1))/2),
mas01mc@293 63 d(dd),
mas01mc@293 64 w(ww),
mas01mc@293 65 radius(rr)
mas01mc@292 66 {
mas01mc@292 67
mas01mc@292 68 if(m<2){
mas01mc@292 69 m=2;
mas01mc@292 70 L=1; // check value of L
mas01mc@292 71 cout << "warning: setting m=2, L=1" << endl;
mas01mc@292 72 }
mas01mc@292 73 if(use_u_functions && k%2){
mas01mc@292 74 k++; // make sure k is even
mas01mc@292 75 cout << "warning: setting k even" << endl;
mas01mc@292 76 }
mas01mc@293 77
mas01mc@293 78 // We have the necessary parameters, so construct hashfunction datastructures
mas01mc@293 79 initialize_lsh_functions();
mas01mc@292 80 }
mas01mc@292 81
mas01mc@293 82 void H::initialize_lsh_functions(){
mas01mc@292 83 H::P = UH_PRIME_DEFAULT;
mas01mc@292 84
mas01mc@292 85 /* FIXME: don't use time(); instead use /dev/random or similar */
mas01mc@292 86 /* FIXME: write out the seed somewhere, so that we can get
mas01mc@292 87 repeatability */
mas01mc@292 88 #ifdef MT19937
mas01mc@292 89 init_genrand(time(NULL));
mas01mc@292 90 #else
mas01mc@292 91 srand(time(NULL)); // seed random number generator
mas01mc@292 92 #endif
mas01mc@293 93 Uns32T i,j, kk;
mas01mc@293 94 #ifdef USE_U_FUNCTIONS
mas01mc@293 95 H::A = new float**[ H::m ]; // m x k x d random projectors
mas01mc@293 96 H::b = new float*[ H::m ]; // m x k random biases
mas01mc@293 97 #else
mas01mc@293 98 H::A = new float**[ H::L ]; // m x k x d random projectors
mas01mc@293 99 H::b = new float*[ H::L ]; // m x k random biases
mas01mc@293 100 #endif
mas01mc@293 101 H::g = new Uns32T*[ H::L ]; // L x k random projections
mas01mc@293 102 assert( H::g && H::A && H::b ); // failure
mas01mc@293 103 #ifdef USE_U_FUNCTIONS
mas01mc@293 104 // Use m \times u_i functions \in R^{(k/2) \times (d)}
mas01mc@293 105 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 106 for( j = 0; j < H::m ; j++ ){ // m functions u_i(v)
mas01mc@293 107 H::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients
mas01mc@293 108 H::b[j] = new float[ H::k/2 ]; // bias
mas01mc@293 109 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 110 for( kk = 0; kk < H::k/2 ; kk++ ){
mas01mc@293 111 H::A[j][kk] = new float[ H::d ];
mas01mc@293 112 assert( H::A[j][kk] ); // failure
mas01mc@293 113 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 114 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 115 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 116 }
mas01mc@293 117 }
mas01mc@293 118 #else
mas01mc@293 119 // Use m \times u_i functions \in R^{k \times (d)}
mas01mc@293 120 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 121 for( j = 0; j < H::L ; j++ ){ // m functions u_i(v)
mas01mc@293 122 H::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients
mas01mc@293 123 H::b[j] = new float[ H::k ]; // bias
mas01mc@293 124 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 125 for( kk = 0; kk < H::k ; kk++ ){
mas01mc@293 126 H::A[j][kk] = new float[ H::d ];
mas01mc@293 127 assert( H::A[j][kk] ); // failure
mas01mc@293 128 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 129 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 130 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 131 }
mas01mc@293 132 }
mas01mc@293 133 #endif
mas01mc@293 134
mas01mc@293 135 // Storage for LSH hash function output (Uns32T)
mas01mc@293 136 for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2)
mas01mc@293 137 H::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z
mas01mc@293 138 assert( H::g[j] );
mas01mc@292 139 }
mas01mc@292 140
mas01mc@293 141 // LSH Hash tables
mas01mc@293 142 H::h = new bucket**[ H::L ];
mas01mc@293 143 assert( H::h );
mas01mc@292 144 for( j = 0 ; j < H::L ; j++ ){
mas01mc@292 145 H::h[j] = new bucket*[ H::N ];
mas01mc@292 146 assert( H::h[j] );
mas01mc@292 147 for( i = 0 ; i < H::N ; i++)
mas01mc@292 148 H::h[j][i] = 0;
mas01mc@292 149 }
mas01mc@293 150
mas01mc@293 151 // Standard hash functions
mas01mc@293 152 H::r1 = new Uns32T*[ H::L ];
mas01mc@293 153 H::r2 = new Uns32T*[ H::L ];
mas01mc@293 154 assert( H::r1 && H::r2 ); // failure
mas01mc@293 155 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 156 H::r1[ j ] = new Uns32T[ H::k ];
mas01mc@293 157 H::r2[ j ] = new Uns32T[ H::k ];
mas01mc@293 158 assert( H::r1[j] && H::r2[j] ); // failure
mas01mc@293 159 for( i = 0; i<H::k; i++){
mas01mc@293 160 H::r1[j][i] = randr();
mas01mc@293 161 H::r2[j][i] = randr();
mas01mc@293 162 }
mas01mc@293 163 }
mas01mc@293 164
mas01mc@534 165 // Storage for whole or partial function evaluation depending on USE_U_FUNCTIONS
mas01mc@293 166 H::initialize_partial_functions();
mas01mc@293 167 }
mas01mc@293 168
mas01mc@293 169 void H::initialize_partial_functions(){
mas01mc@293 170
mas01mc@293 171 #ifdef USE_U_FUNCTIONS
mas01mc@293 172 H::uu = vector<vector<Uns32T> >(H::m);
mas01mc@293 173 for( Uns32T aa=0 ; aa < H::m ; aa++ )
mas01mc@293 174 H::uu[aa] = vector<Uns32T>( H::k/2 );
mas01mc@293 175 #endif
mas01mc@293 176 }
mas01mc@293 177
mas01mc@293 178
mas01mc@293 179 // Generate z ~ N(0,1)
mas01mc@293 180 float H::randn(){
mas01mc@293 181 // Box-Muller
mas01mc@293 182 float x1, x2;
mas01mc@293 183 do{
mas01mc@293 184 x1 = ranf();
mas01mc@293 185 } while (x1 == 0); // cannot take log of 0
mas01mc@293 186 x2 = ranf();
mas01mc@293 187 float z;
mas01mc@293 188 z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2);
mas01mc@293 189 return z;
mas01mc@293 190 }
mas01mc@293 191
mas01mc@293 192 float H::ranf(){
mas01mc@293 193 #ifdef MT19937
mas01mc@293 194 return (float) genrand_real2();
mas01mc@293 195 #else
mas01mc@293 196 return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
mas01mc@293 197 #endif
mas01mc@293 198 }
mas01mc@293 199
mas01mc@293 200 // range is 1..2^29
mas01mc@293 201 /* FIXME: that looks like an ... odd range. Still. */
mas01mc@293 202 Uns32T H::randr(){
mas01mc@293 203 #ifdef MT19937
mas01mc@293 204 return (Uns32T)((genrand_int32() >> 3) + 1);
mas01mc@293 205 #else
mas01mc@293 206 return (Uns32T) ((rand() >> 2) + 1);
mas01mc@293 207 #endif
mas01mc@292 208 }
mas01mc@292 209
mas01mc@292 210 // Destruct hash tables
mas01mc@292 211 H::~H(){
mas01mc@293 212 Uns32T i,j,kk;
mas01mc@340 213 bucket** pp;
mas01mc@293 214 #ifdef USE_U_FUNCTIONS
mas01mc@293 215 for( j = 0 ; j < H::m ; j++ ){
mas01mc@293 216 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@293 217 delete[] A[j][kk];
mas01mc@293 218 delete[] A[j];
mas01mc@293 219 }
mas01mc@293 220 delete[] A;
mas01mc@293 221 for( j = 0 ; j < H::m ; j++ )
mas01mc@293 222 delete[] b[j];
mas01mc@293 223 delete[] b;
mas01mc@293 224 #else
mas01mc@293 225 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 226 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@293 227 delete[] A[j][kk];
mas01mc@293 228 delete[] A[j];
mas01mc@293 229 }
mas01mc@293 230 delete[] A;
mas01mc@293 231 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 232 delete[] b[j];
mas01mc@293 233 delete[] b;
mas01mc@293 234 #endif
mas01mc@293 235
mas01mc@293 236 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 237 delete[] g[j];
mas01mc@293 238 delete[] g;
mas01mc@292 239 for( j=0 ; j < H::L ; j++ ){
mas01mc@292 240 delete[] H::r1[ j ];
mas01mc@292 241 delete[] H::r2[ j ];
mas01mc@340 242 for(i = 0; i< H::N ; i++){
mas01mc@340 243 pp = 0;
mas01mc@340 244 #ifdef LSH_CORE_ARRAY
mas01mc@340 245 if(H::h[ j ][ i ])
mas01mc@340 246 if(H::h[ j ][ i ]->t2 & LSH_CORE_ARRAY_BIT){
mas01mc@340 247 pp = get_pointer_to_bucket_linked_list(H::h[ j ][ i ]);
mas01mc@340 248 if(*pp){
mas01mc@344 249 (*pp)->snext.ptr=0; // Head of list uses snext as a non-pointer value
mas01mc@340 250 delete *pp; // Now the destructor can do its work properly
mas01mc@340 251 }
mas01mc@340 252 free(H::h[ j ][ i ]->next);
mas01mc@340 253 H::h[ j ][ i ]->next = 0; // Zero next pointer
mas01mc@344 254 H::h[ j ][ i ]->snext.ptr = 0; // Zero head-of-list pointer as above
mas01mc@340 255 }
mas01mc@340 256 #endif
mas01mc@292 257 delete H::h[ j ][ i ];
mas01mc@340 258 }
mas01mc@292 259 delete[] H::h[ j ];
mas01mc@292 260 }
mas01mc@474 261 delete[] H::r1;
mas01mc@474 262 delete[] H::r2;
mas01mc@474 263 delete[] H::h;
mas01mc@292 264 }
mas01mc@292 265
mas01mc@292 266
mas01mc@293 267 // Compute all hash functions for vector v
mas01mc@293 268 // #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d}
mas01mc@293 269 // to make L \times g_j functions \in Z^k
mas01mc@293 270 void H::compute_hash_functions(vector<float>& v){ // v \in R^d
mas01mc@293 271 float iw = 1. / H::w; // hash bucket width
mas01mc@293 272 Uns32T aa, kk;
mas01mc@293 273 if( v.size() != H::d )
mas01mc@293 274 error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality
mas01mc@293 275 double tmp = 0;
mas01mc@293 276 float *pA, *pb;
mas01mc@293 277 Uns32T *pg;
mas01mc@293 278 int dd;
mas01mc@293 279 vector<float>::iterator vi;
mas01mc@293 280 vector<Uns32T>::iterator ui;
mas01mc@293 281
mas01mc@293 282 #ifdef USE_U_FUNCTIONS
mas01mc@293 283 Uns32T bb;
mas01mc@293 284 // Store m dot products to expand
mas01mc@293 285 for( aa=0; aa < H::m ; aa++ ){
mas01mc@293 286 ui = H::uu[aa].begin();
mas01mc@293 287 for( kk = 0 ; kk < H::k/2 ; kk++ ){
mas01mc@293 288 pb = *( H::b + aa ) + kk;
mas01mc@293 289 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 290 dd = H::d;
mas01mc@293 291 tmp = 0.;
mas01mc@293 292 vi = v.begin();
mas01mc@293 293 while( dd-- )
mas01mc@293 294 tmp += *pA++ * *vi++; // project
mas01mc@293 295 tmp += *pb; // translate
mas01mc@293 296 tmp *= iw; // scale
mas01mc@293 297 *ui++ = (Uns32T) floor(tmp); // floor
mas01mc@293 298 }
mas01mc@293 299 }
mas01mc@293 300 // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d}
mas01mc@293 301 Uns32T j;
mas01mc@293 302 for( aa=0, j=0 ; aa < H::m-1 ; aa++ )
mas01mc@293 303 for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){
mas01mc@293 304 pg= *( H::g + j ); // L \times functions g_j(v) \in Z^k
mas01mc@293 305 // u_1 \in Z^{(k/2) \times d}
mas01mc@293 306 ui = H::uu[aa].begin();
mas01mc@293 307 kk=H::k/2;
mas01mc@293 308 while( kk-- )
mas01mc@293 309 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z
mas01mc@293 310 // u_2 \in Z^{(k/2) \times d}
mas01mc@293 311 ui = H::uu[bb].begin();
mas01mc@293 312 kk=H::k/2;
mas01mc@293 313 while( kk--)
mas01mc@293 314 *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z
mas01mc@293 315 }
mas01mc@293 316 #else
mas01mc@293 317 for( aa=0; aa < H::L ; aa++ ){
mas01mc@514 318 pg= *( H::g + aa ); // L \times functions g_j(v) \in Z^k
mas01mc@514 319 for( kk = 0 ; kk != H::k ; kk++ ){
mas01mc@293 320 pb = *( H::b + aa ) + kk;
mas01mc@293 321 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 322 dd = H::d;
mas01mc@293 323 tmp = 0.;
mas01mc@293 324 vi = v.begin();
mas01mc@293 325 while( dd-- )
mas01mc@293 326 tmp += *pA++ * *vi++; // project
mas01mc@293 327 tmp += *pb; // translate
mas01mc@293 328 tmp *= iw; // scale
mas01mc@514 329 *pg++ = (Uns32T) (floor(tmp)); // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z
mas01mc@293 330 }
mas01mc@293 331 }
mas01mc@293 332 #endif
mas01mc@293 333 }
mas01mc@293 334
mas01mc@292 335 // make hash value \in Z
mas01mc@293 336 void H::generate_hash_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
mas01mc@293 337 H::t1 = computeProductModDefaultPrime( g, r1, H::k ) % H::N;
mas01mc@293 338 H::t2 = computeProductModDefaultPrime( g, r2, H::k );
mas01mc@292 339 }
mas01mc@292 340
mas01mc@292 341 #define CR_ASSERT(b){if(!(b)){fprintf(stderr, "ASSERT failed on line %d, file %s.\n", __LINE__, __FILE__); exit(1);}}
mas01mc@292 342
mas01mc@292 343 // Computes (a.b) mod UH_PRIME_DEFAULT
mas01mc@293 344 inline Uns32T H::computeProductModDefaultPrime(Uns32T *a, Uns32T *b, IntT size){
mas01mc@292 345 LongUns64T h = 0;
mas01mc@292 346
mas01mc@292 347 for(IntT i = 0; i < size; i++){
mas01mc@292 348 h = h + (LongUns64T)a[i] * (LongUns64T)b[i];
mas01mc@292 349 h = (h & TWO_TO_32_MINUS_1) + 5 * (h >> 32);
mas01mc@292 350 if (h >= UH_PRIME_DEFAULT) {
mas01mc@292 351 h = h - UH_PRIME_DEFAULT;
mas01mc@292 352 }
mas01mc@292 353 CR_ASSERT(h < UH_PRIME_DEFAULT);
mas01mc@292 354 }
mas01mc@292 355 return h;
mas01mc@292 356 }
mas01mc@292 357
mas01mc@292 358 Uns32T H::bucket_insert_point(bucket **pp){
mas01mc@296 359 collisionCount = 0;
mas01mc@340 360 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 361 if(!*pp){
mas01mc@292 362 *pp = new bucket();
mas01mc@292 363 (*pp)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@292 364 (*pp)->next = new bucket();
mas01mc@292 365 }
mas01mc@340 366 // The list head holds point collision count
mas01mc@340 367 if( (*pp)->t2 & LSH_CORE_ARRAY_BIT )
mas01mc@340 368 bucket_insert_point(get_pointer_to_bucket_linked_list(*pp)); // recurse
mas01mc@340 369 else{
mas01mc@340 370 collisionCount = (*pp)->t2;
mas01mc@340 371 if(collisionCount < H::C){ // Block if row is full
mas01mc@340 372 (*pp)->t2++; // Increment collision counter (numPoints in row)
mas01mc@340 373 pointCount++;
mas01mc@340 374 collisionCount++;
mas01mc@340 375 // Locate the bucket linked list
mas01mc@340 376 __bucket_insert_point( (*pp)->next );
mas01mc@340 377 }
mas01mc@292 378 }
mas01mc@340 379 #else // NOT USING LSH_LIST_HEAD_COUNTERS
mas01mc@340 380 if(!*pp)
mas01mc@340 381 *pp = new bucket();
mas01mc@296 382 pointCount++;
mas01mc@296 383 __bucket_insert_point(*pp); // No collision count storage
mas01mc@292 384 #endif
mas01mc@292 385 return collisionCount;
mas01mc@292 386 }
mas01mc@292 387
mas01mc@340 388 // insert points into hashtable row collision chain
mas01mc@292 389 void H::__bucket_insert_point(bucket* p){
mas01mc@292 390 if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
mas01mc@292 391 p->t2 = H::t2;
mas01mc@292 392 bucketCount++; // Record start of new point-locale collision chain
mas01mc@344 393 p->snext.ptr = new sbucket();
mas01mc@344 394 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 395 return;
mas01mc@292 396 }
mas01mc@292 397
mas01mc@292 398 if(p->t2 == H::t2){
mas01mc@344 399 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 400 return;
mas01mc@292 401 }
mas01mc@292 402
mas01mc@292 403 if(p->next){
mas01mc@340 404 // Construct list in t2 order
mas01mc@340 405 if(H::t2 < p->next->t2){
mas01mc@340 406 bucket* tmp = new bucket();
mas01mc@340 407 tmp->next = p->next;
mas01mc@340 408 p->next = tmp;
mas01mc@340 409 __bucket_insert_point(tmp);
mas01mc@340 410 }
mas01mc@340 411 else
mas01mc@340 412 __bucket_insert_point(p->next);
mas01mc@292 413 }
mas01mc@340 414 else {
mas01mc@292 415 p->next = new bucket();
mas01mc@292 416 __bucket_insert_point(p->next);
mas01mc@292 417 }
mas01mc@292 418 }
mas01mc@292 419
mas01mc@340 420 // insert points into point-locale collision chain
mas01mc@292 421 void H::__sbucket_insert_point(sbucket* p){
mas01mc@292 422 if(p->pointID==IFLAG){
mas01mc@292 423 p->pointID = H::p;
mas01mc@292 424 return;
mas01mc@292 425 }
mas01mc@340 426
mas01mc@292 427 // Search for pointID
mas01mc@340 428 if(p->snext){
mas01mc@292 429 __sbucket_insert_point(p->snext);
mas01mc@292 430 }
mas01mc@292 431 else{
mas01mc@340 432 // Make new point collision bucket at end of list
mas01mc@340 433 p->snext = new sbucket();
mas01mc@340 434 __sbucket_insert_point(p->snext);
mas01mc@292 435 }
mas01mc@292 436 }
mas01mc@292 437
mas01mc@293 438 inline bucket** H::get_bucket(int j){
mas01mc@292 439 return *(h+j);
mas01mc@292 440 }
mas01mc@292 441
mas01mc@340 442 // Find the linked-list pointer at the end of the CORE_ARRAY
mas01mc@340 443 bucket** H::get_pointer_to_bucket_linked_list(bucket* rowPtr){
mas01mc@344 444 Uns32T numBuckets = rowPtr->snext.numBuckets; // Cast pointer to unsigned int
mas01mc@343 445 Uns32T numPoints = rowPtr->t2 & 0x7FFFFFFF; // Value is stored in low 31 bits of t2 field
mas01mc@343 446 bucket** listPtr = reinterpret_cast<bucket**> (reinterpret_cast<unsigned int*>(rowPtr->next)+numPoints+numBuckets+1);
mas01mc@343 447 return listPtr;
mas01mc@340 448 }
mas01mc@340 449
mas01mc@293 450 // Interface to Locality Sensitive Hashing G
mas01mc@293 451 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
mas01mc@293 452 H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
mas01mc@308 453 indexName(0),
mas01mc@293 454 lshHeader(0),
mas01mc@292 455 calling_instance(0),
mas01mc@293 456 add_point_callback(0)
mas01mc@292 457 {
mas01mc@293 458
mas01mc@292 459 }
mas01mc@292 460
mas01mc@292 461 // Serialize from file LSH constructor
mas01mc@292 462 // Read parameters from database file
mas01mc@292 463 // Load the hash functions, close the database
mas01mc@292 464 // Optionally load the LSH tables into head-allocated lists in core
mas01mc@292 465 G::G(char* filename, bool lshInCoreFlag):
mas01mc@293 466 H(), // default base-class constructor call delays data-structure initialization
mas01mc@309 467 indexName(0),
mas01mc@293 468 lshHeader(0),
mas01mc@292 469 calling_instance(0),
mas01mc@292 470 add_point_callback(0)
mas01mc@292 471 {
mas01mc@474 472 FILE* dbFile = 0;
mas01mc@292 473 int dbfid = unserialize_lsh_header(filename);
mas01mc@309 474 indexName = new char[O2_INDEX_MAXSTR];
mas01mc@309 475 strncpy(indexName, filename, O2_INDEX_MAXSTR); // COPY THE CONTENTS TO THE NEW POINTER
mas01mc@293 476 H::initialize_lsh_functions(); // Base-class data-structure initialization
mas01mc@293 477 unserialize_lsh_functions(dbfid); // populate with on-disk hashfunction values
mas01mc@292 478
mas01mc@292 479 // Format1 only needs unserializing if specifically requested
mas01mc@292 480 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){
mas01mc@292 481 unserialize_lsh_hashtables_format1(dbfid);
mas01mc@292 482 }
mas01mc@292 483
mas01mc@292 484 // Format2 always needs unserializing
mas01mc@292 485 if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){
mas01mc@474 486 dbFile = fdopen(dbfid, "rb");
mas01mc@336 487 if(!dbFile)
mas01mc@336 488 error("Cannot open LSH file for reading", filename);
mas01mc@336 489 unserialize_lsh_hashtables_format2(dbFile);
mas01mc@292 490 }
mas01mc@336 491 serial_close(dbfid);
mas01mc@474 492 if(dbFile){
mas01mc@474 493 fclose(dbFile);
mas01mc@474 494 dbFile = 0;
mas01mc@474 495 }
mas01mc@336 496 }
mas01mc@292 497
mas01mc@292 498 G::~G(){
mas01mc@292 499 delete lshHeader;
mas01mc@474 500 delete[] indexName;
mas01mc@292 501 }
mas01mc@292 502
mas01mc@292 503 // single point insertion; inserted values are hash value and pointID
mas01mc@292 504 Uns32T G::insert_point(vector<float>& v, Uns32T pp){
mas01mc@292 505 Uns32T collisionCount = 0;
mas01mc@292 506 H::p = pp;
mas01mc@299 507 if(H::maxp && pp<=H::maxp)
mas01mc@296 508 error("points must be indexed in strict ascending order", "LSH::insert_point(vector<float>&, Uns32T pointID)");
mas01mc@296 509 H::maxp=pp; // Store highest pointID in database
mas01mc@293 510 H::compute_hash_functions( v );
mas01mc@292 511 for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion
mas01mc@293 512 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@292 513 collisionCount += bucket_insert_point( *(h + j) + t1 );
mas01mc@292 514 }
mas01mc@292 515 return collisionCount;
mas01mc@292 516 }
mas01mc@292 517
mas01mc@292 518
mas01mc@292 519 // batch insert for a point set
mas01mc@292 520 // inserted values are vector hash value and pointID starting at basePointID
mas01mc@292 521 void G::insert_point_set(vector<vector<float> >& vv, Uns32T basePointID){
mas01mc@292 522 for(Uns32T point=0; point<vv.size(); point++)
mas01mc@292 523 insert_point(vv[point], basePointID+point);
mas01mc@292 524 }
mas01mc@292 525
mas01mc@292 526 // point retrieval routine
mas01mc@292 527 void G::retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 528 calling_instance = caller;
mas01mc@292 529 add_point_callback = add_point;
mas01mc@293 530 H::compute_hash_functions( v );
mas01mc@292 531 for(Uns32T j = 0 ; j < H::L ; j++ ){
mas01mc@293 532 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01cr@370 533 if( bucket* bPtr = *(get_bucket(j) + get_t1()) ) {
mas01mc@340 534 #ifdef LSH_LIST_HEAD_COUNTERS
mas01cr@370 535 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@340 536 retrieve_from_core_hashtable_array((Uns32T*)(bPtr->next), qpos);
mas01cr@370 537 } else {
mas01mc@340 538 bucket_chain_point( bPtr->next, qpos);
mas01cr@370 539 }
mas01mc@292 540 #else
mas01cr@370 541 bucket_chain_point( bPtr , qpos);
mas01mc@292 542 #endif
mas01cr@370 543 }
mas01mc@292 544 }
mas01mc@292 545 }
mas01mc@292 546
mas01mc@292 547 void G::retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 548 for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ )
mas01mc@292 549 retrieve_point(vv[qpos], qpos, add_point, caller);
mas01mc@292 550 }
mas01mc@292 551
mas01mc@292 552 // export lsh tables to table structure on disk
mas01mc@292 553 //
mas01mc@292 554 // LSH TABLE STRUCTURE
mas01mc@292 555 // ---header 64 bytes ---
mas01mc@292 556 // [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0]
mas01mc@292 557 //
mas01mc@292 558 // ---random projections L x k x d float ---
mas01mc@292 559 // A[0][0][0] A[0][0][1] ... A[0][0][d-1]
mas01mc@292 560 // A[0][1][0] A[0][1][1] ... A[1][1][d-1]
mas01mc@292 561 // ...
mas01mc@292 562 // A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1]
mas01mc@292 563 // ...
mas01mc@292 564 // ...
mas01mc@292 565 // A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1]
mas01mc@292 566 // A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1]
mas01mc@292 567 // ...
mas01mc@292 568 // A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1]
mas01mc@292 569 //
mas01mc@292 570 // ---bias L x k float ---
mas01mc@292 571 // b[0][0] b[0][1] ... b[0][k-1]
mas01mc@292 572 // b[1][0] b[1][1] ... b[1][k-1]
mas01mc@292 573 // ...
mas01mc@292 574 // b[L-1][0] b[L-1][1] ... b[L-1][k-1]
mas01mc@292 575 //
mas01mc@292 576 // ---random r1 L x k float ---
mas01mc@292 577 // r1[0][0] r1[0][1] ... r1[0][k-1]
mas01mc@292 578 // r1[1][0] r1[1][1] ... r1[1][k-1]
mas01mc@292 579 // ...
mas01mc@292 580 // r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1]
mas01mc@292 581 //
mas01mc@292 582 // ---random r2 L x k float ---
mas01mc@292 583 // r2[0][0] r2[0][1] ... r2[0][k-1]
mas01mc@292 584 // r2[1][0] r2[1][1] ... r2[1][k-1]
mas01mc@292 585 // ...
mas01mc@292 586 // r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1]
mas01mc@292 587 //
mas01mc@293 588 // ******* HASHTABLES FORMAT1 (optimized for LSH_ON_DISK retrieval) *******
mas01mc@292 589 // ---hash table 0: N x C x 8 ---
mas01mc@292 590 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 591 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 592 // ...
mas01mc@292 593 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 594 //
mas01mc@292 595 // ---hash table 1: N x C x 8 ---
mas01mc@292 596 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 597 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 598 // ...
mas01mc@292 599 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 600 //
mas01mc@292 601 // ...
mas01mc@292 602 //
mas01mc@292 603 // ---hash table L-1: N x C x 8 ---
mas01mc@292 604 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 605 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 606 // ...
mas01mc@292 607 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 608 //
mas01mc@293 609 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) *******
mas01mc@293 610 //
mas01mc@293 611 // State machine controlled by regular expression.
mas01mc@293 612 // legend:
mas01mc@293 613 //
mas01mc@306 614 // O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU
mas01mc@306 615 // O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU
mas01mc@306 616 // O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU
mas01mc@293 617 //
mas01mc@306 618 // T1 - T1 hash token
mas01mc@306 619 // t1 - t1 hash key (t1 range 0..2^29-1)
mas01mc@306 620 // T2 - T2 token
mas01mc@293 621 // t2 - t2 hash key (range 1..2^32-6)
mas01mc@293 622 // p - point identifier (range 0..2^32-1)
mas01mc@306 623 // E - end hash table token
mas01mc@293 624 // {...} required arguments
mas01mc@293 625 // [...] optional arguments
mas01mc@293 626 // * - match zero or more occurences
mas01mc@293 627 // + - match one or more occurences
mas01mc@293 628 // {...}^L - repeat argument L times
mas01mc@293 629 //
mas01mc@293 630 // FORMAT2 Regular expression:
mas01mc@306 631 // { [T1 t1 [T2 t2 p+]+ ]* E }^L
mas01mc@293 632 //
mas01mc@292 633
mas01mc@292 634 // Serial header constructors
mas01mc@292 635 SerialHeader::SerialHeader(){;}
mas01mc@296 636 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc):
mas01mc@292 637 lshMagic(O2_SERIAL_MAGIC),
mas01mc@292 638 binWidth(W),
mas01mc@292 639 numTables(L),
mas01mc@292 640 numRows(N),
mas01mc@292 641 numCols(C),
mas01mc@292 642 elementSize(O2_SERIAL_ELEMENT_SIZE),
mas01mc@296 643 version(O2_SERIAL_VERSION),
mas01mc@296 644 size(0), // we are deprecating this value
mas01mc@292 645 flags(FMT),
mas01mc@292 646 dataDim(d),
mas01mc@292 647 numFuns(k),
mas01mc@292 648 radius(r),
mas01mc@296 649 maxp(p),
mas01mc@296 650 size_long((unsigned long long)L * align_up((unsigned long long)N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables
mas01mc@296 651 + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions
mas01mc@296 652 (unsigned long long)L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())),
mas01mc@296 653 pointCount(pc){
mas01mc@296 654
mas01mc@296 655 if(FMT==O2_SERIAL_FILEFORMAT2)
mas01mc@296 656 size_long = (unsigned long long)align_up(O2_SERIAL_HEADER_SIZE
mas01mc@296 657 + (unsigned long long)L*k*(sizeof(float)*d+2+sizeof(Uns32T)
mas01mc@296 658 +sizeof(float)) + (unsigned long long)pc*16UL,get_page_logn());
mas01mc@296 659 } // header
mas01mc@292 660
mas01mc@292 661 float* G::get_serial_hashfunction_base(char* db){
mas01mc@292 662 if(db&&lshHeader)
mas01mc@292 663 return (float*)(db+O2_SERIAL_HEADER_SIZE);
mas01mc@292 664 else return NULL;
mas01mc@292 665 }
mas01mc@292 666
mas01mc@292 667 SerialElementT* G::get_serial_hashtable_base(char* db){
mas01mc@292 668 if(db&&lshHeader)
mas01mc@292 669 return (SerialElementT*)(db+get_serial_hashtable_offset());
mas01mc@292 670 else
mas01mc@292 671 return NULL;
mas01mc@292 672 }
mas01mc@292 673
mas01mc@292 674 Uns32T G::get_serial_hashtable_offset(){
mas01mc@292 675 if(lshHeader)
mas01mc@292 676 return align_up(O2_SERIAL_HEADER_SIZE +
mas01mc@292 677 L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn());
mas01mc@292 678 else
mas01mc@292 679 return 0;
mas01mc@292 680 }
mas01mc@292 681
mas01mc@292 682 void G::serialize(char* filename, Uns32T serialFormat){
mas01mc@292 683 int dbfid;
mas01mc@292 684 int dbIsNew=0;
mas01mc@474 685 FILE* dbFile = 0;
mas01mc@292 686 // Check requested serialFormat
mas01mc@292 687 if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2))
mas01mc@292 688 error("Unrecognized serial file format request: ", "serialize()");
mas01mc@296 689
mas01mc@292 690 // Test to see if file exists
mas01cr@370 691 if((dbfid = open (filename, O_RDONLY)) < 0) {
mas01mc@292 692 // If it doesn't, then create the file (CREATE)
mas01cr@370 693 if(errno == ENOENT) {
mas01mc@292 694 // Create the file
mas01mc@292 695 std::cout << "Creating new serialized LSH database:" << filename << "...";
mas01mc@292 696 std::cout.flush();
mas01mc@292 697 serial_create(filename, serialFormat);
mas01mc@292 698 dbIsNew=1;
mas01cr@370 699 } else {
mas01mc@292 700 // The file can't be opened
mas01mc@292 701 error("Can't open the file", filename, "open");
mas01cr@370 702 }
mas01cr@370 703 }
mas01mc@292 704
mas01mc@292 705 // Load the on-disk header into core
mas01mc@292 706 dbfid = serial_open(filename, 1); // open for write
mas01cr@648 707 serial_get_header(dbfid); // read header
mas01mc@292 708
mas01mc@292 709 // Check compatibility of core and disk data structures
mas01mc@292 710 if( !serial_can_merge(serialFormat) )
mas01mc@292 711 error("Incompatible core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 712
mas01mc@292 713 // For new LSH databases write the hashfunctions
mas01mc@292 714 if(dbIsNew)
mas01mc@292 715 serialize_lsh_hashfunctions(dbfid);
mas01mc@292 716 // Write the hashtables in the requested format
mas01mc@292 717 if(serialFormat == O2_SERIAL_FILEFORMAT1)
mas01mc@292 718 serialize_lsh_hashtables_format1(dbfid, !dbIsNew);
mas01mc@336 719 else{
mas01mc@474 720 dbFile = fdopen(dbfid, "r+b");
mas01mc@336 721 if(!dbFile)
mas01mc@336 722 error("Cannot open LSH file for writing",filename);
mas01mc@336 723 serialize_lsh_hashtables_format2(dbFile, !dbIsNew);
mas01mc@336 724 fflush(dbFile);
mas01mc@336 725 }
mas01mc@292 726
mas01mc@336 727 if(!dbIsNew) {
mas01mc@293 728 cout << "maxp = " << H::maxp << endl;
mas01mc@293 729 lshHeader->maxp=H::maxp;
mas01mc@292 730 // Default to FILEFORMAT1
mas01mc@292 731 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2))
mas01mc@294 732 lshHeader->flags|=O2_SERIAL_FILEFORMAT1;
mas01cr@648 733 serial_write_header(dbfid, lshHeader);
mas01mc@336 734 }
mas01mc@336 735 serial_close(dbfid);
mas01mc@474 736 if(dbFile){
mas01mc@474 737 fclose(dbFile);
mas01mc@474 738 dbFile = 0;
mas01mc@474 739 }
mas01mc@292 740 }
mas01mc@292 741
mas01mc@292 742 // Test to see if core structure and requested format is
mas01mc@292 743 // compatible with currently opened database
mas01mc@292 744 int G::serial_can_merge(Uns32T format){
mas01mc@292 745 SerialHeaderT* that = lshHeader;
mas01mc@292 746 if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 747 || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 748 || !( this->w == that->binWidth &&
mas01mc@296 749 this->L == that->numTables &&
mas01mc@296 750 this->N == that->numRows &&
mas01mc@296 751 this->k == that->numFuns &&
mas01mc@296 752 this->d == that->dataDim &&
mas01mc@296 753 sizeof(SerialElementT) == that->elementSize &&
mas01mc@296 754 this->radius == that->radius)){
mas01mc@292 755 serial_print_header(format);
mas01mc@292 756 return 0;
mas01mc@292 757 }
mas01mc@292 758 else
mas01mc@292 759 return 1;
mas01mc@292 760 }
mas01mc@292 761
mas01mc@292 762 // Used as an error message for serial_can_merge()
mas01mc@292 763 void G::serial_print_header(Uns32T format){
mas01mc@292 764 std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl;
mas01mc@292 765 std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl;
mas01mc@292 766 std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl;
mas01mc@292 767 std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl;
mas01mc@292 768 std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl;
mas01mc@292 769 std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl;
mas01mc@292 770 std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl;
mas01mc@292 771 std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl;
mas01mc@292 772 }
mas01mc@292 773
mas01mc@292 774 int G::serialize_lsh_hashfunctions(int fid){
mas01mc@292 775 float* pf;
mas01mc@292 776 Uns32T *pu;
mas01mc@292 777 Uns32T x,y,z;
mas01mc@292 778
mas01cr@648 779 void *db = calloc(get_serial_hashtable_offset() - O2_SERIAL_HEADER_SIZE, 1);
mas01cr@648 780 pf = (float *) db;
mas01mc@292 781 // HASH FUNCTIONS
mas01mc@292 782 // Write the random projectors A[][][]
mas01mc@292 783 #ifdef USE_U_FUNCTIONS
mas01mc@292 784 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 785 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 786 #else
mas01cr@648 787 for( x = 0 ; x < H::L ; x++ )
mas01cr@648 788 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 789 #endif
mas01cr@648 790 for( z = 0 ; z < d ; z++ )
mas01cr@648 791 *pf++ = H::A[x][y][z];
mas01mc@292 792
mas01mc@292 793 // Write the random biases b[][]
mas01mc@292 794 #ifdef USE_U_FUNCTIONS
mas01mc@292 795 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 796 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 797 #else
mas01cr@648 798 for( x = 0 ; x < H::L ; x++ )
mas01cr@648 799 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 800 #endif
mas01cr@648 801 *pf++ = H::b[x][y];
mas01mc@292 802
mas01mc@292 803 pu = (Uns32T*)pf;
mas01mc@292 804
mas01mc@292 805 // Write the Z projectors r1[][]
mas01mc@292 806 for( x = 0 ; x < H::L ; x++)
mas01mc@292 807 for( y = 0 ; y < H::k ; y++)
mas01mc@293 808 *pu++ = H::r1[x][y];
mas01mc@292 809
mas01mc@292 810 // Write the Z projectors r2[][]
mas01mc@292 811 for( x = 0 ; x < H::L ; x++)
mas01mc@292 812 for( y = 0; y < H::k ; y++)
mas01mc@293 813 *pu++ = H::r2[x][y];
mas01cr@648 814
mas01cr@648 815 off_t cur = lseek(fid, 0, SEEK_CUR);
mas01cr@648 816 lseek(fid, O2_SERIAL_HEADER_SIZE, SEEK_SET);
mas01cr@648 817 write(fid, db, get_serial_hashtable_offset() - O2_SERIAL_HEADER_SIZE);
mas01cr@648 818 lseek(fid, cur, SEEK_SET);
mas01cr@648 819
mas01cr@648 820 free(db);
mas01cr@648 821
mas01mc@292 822 return 1;
mas01mc@292 823 }
mas01mc@292 824
mas01cr@648 825 void G::serial_get_table(int fd, int nth, void *buf, size_t count) {
mas01cr@648 826 off_t cur = lseek(fd, 0, SEEK_CUR);
mas01cr@648 827 /* FIXME: if hashTableSize isn't bigger than a page, this loses. */
mas01cr@648 828 lseek(fd, align_up(get_serial_hashtable_offset() + nth * count, get_page_logn()), SEEK_SET);
mas01cr@648 829 read(fd, buf, count);
mas01cr@648 830 lseek(fd, cur, SEEK_SET);
mas01cr@648 831 }
mas01cr@648 832
mas01cr@648 833 void G::serial_write_table(int fd, int nth, void *buf, size_t count) {
mas01cr@648 834 off_t cur = lseek(fd, 0, SEEK_CUR);
mas01cr@648 835 /* FIXME: see the comment in serial_get_table() */
mas01cr@648 836 lseek(fd, align_up(get_serial_hashtable_offset() + nth * count, get_page_logn()), SEEK_SET);
mas01cr@648 837 write(fd, buf, count);
mas01cr@648 838 lseek(fd, cur, SEEK_SET);
mas01cr@648 839 }
mas01cr@648 840
mas01mc@292 841 int G::serialize_lsh_hashtables_format1(int fid, int merge){
mas01mc@292 842 SerialElementT *pe, *pt;
mas01mc@292 843 Uns32T x,y;
mas01mc@292 844
mas01mc@292 845 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) )
mas01mc@292 846 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 847
mas01mc@292 848 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount;
mas01cr@648 849 size_t hashTableSize = sizeof(SerialElementT) * lshHeader->numRows * lshHeader->numCols;
mas01cr@648 850 pt = (SerialElementT *) malloc(hashTableSize);
mas01mc@292 851 // Write the hash tables
mas01mc@292 852 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 853 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1...";
mas01mc@292 854 std::cout.flush();
mas01cr@648 855 // read a hash table's data from disk
mas01cr@648 856 serial_get_table(fid, x, pt, hashTableSize);
mas01mc@292 857 maxColCount=0;
mas01mc@292 858 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 859 meanColCount=0;
mas01mc@292 860 colCountN=0;
mas01mc@292 861 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 862 // Move disk pointer to beginning of row
mas01mc@292 863 pe=pt+y*lshHeader->numCols;
mas01mc@292 864
mas01mc@292 865 colCount=0;
mas01cr@370 866 if(bucket* bPtr = h[x][y]) {
mas01cr@370 867 if(merge) {
mas01mc@340 868 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 869 serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01cr@370 870 } else {
mas01mc@292 871 serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 872 #else
mas01cr@370 873 serial_merge_hashtable_row_format1(pe, bPtr, colCount);
mas01cr@370 874 } else {
mas01cr@370 875 serial_write_hashtable_row_format1(pe, bPtr, colCount);
mas01mc@292 876 #endif
mas01cr@370 877 }
mas01cr@370 878 }
mas01mc@292 879 if(colCount){
mas01mc@292 880 if(colCount<minColCount)
mas01mc@292 881 minColCount=colCount;
mas01mc@292 882 if(colCount>maxColCount)
mas01mc@292 883 maxColCount=colCount;
mas01mc@292 884 meanColCount+=colCount;
mas01mc@292 885 colCountN++;
mas01mc@292 886 }
mas01mc@292 887 }
mas01mc@292 888 if(colCountN)
mas01mc@292 889 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 890 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 891 << endl;
mas01cr@648 892 serial_write_table(fid, x, pt, hashTableSize);
mas01mc@292 893 }
mas01mc@292 894
mas01mc@292 895 // We're done writing
mas01cr@648 896 free(pt);
mas01mc@292 897 return 1;
mas01mc@292 898 }
mas01mc@292 899
mas01mc@292 900 void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){
mas01mc@292 901 while(b && b->t2!=IFLAG){
mas01mc@292 902 SerialElementT*pe=pr; // reset disk pointer to beginning of row
mas01mc@344 903 serial_merge_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 904 b=b->next;
mas01mc@292 905 }
mas01mc@292 906 }
mas01mc@292 907
mas01mc@292 908 void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 909 while(sb){
mas01mc@292 910 if(colCount==lshHeader->numCols){
mas01mc@292 911 std::cout << "!point-chain full " << endl;
mas01mc@292 912 return;
mas01mc@292 913 }
mas01mc@292 914 Uns32T c=0;
mas01mc@292 915 // Merge collision chains
mas01mc@292 916 while(c<lshHeader->numCols){
mas01mc@292 917 if( (pe+c)->hashValue==IFLAG){
mas01mc@292 918 (pe+c)->hashValue=t2;
mas01mc@292 919 (pe+c)->pointID=sb->pointID;
mas01mc@292 920 colCount=c+1;
mas01mc@292 921 if(c+1<lshHeader->numCols)
mas01mc@292 922 (pe+c+1)->hashValue=IFLAG;
mas01mc@292 923 break;
mas01mc@292 924 }
mas01mc@292 925 c++;
mas01mc@292 926 }
mas01mc@292 927 sb=sb->snext;
mas01mc@292 928 }
mas01mc@292 929 return;
mas01mc@292 930 }
mas01mc@292 931
mas01mc@292 932 void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){
mas01mc@292 933 pe->hashValue=IFLAG;
mas01mc@292 934 while(b && b->t2!=IFLAG){
mas01mc@344 935 serial_write_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 936 b=b->next;
mas01mc@292 937 }
mas01mc@292 938 }
mas01mc@292 939
mas01mc@292 940 void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 941 while(sb){
mas01mc@292 942 if(colCount==lshHeader->numCols){
mas01mc@292 943 std::cout << "!point-chain full " << endl;
mas01mc@292 944 return;
mas01mc@292 945 }
mas01mc@292 946 pe->hashValue=t2;
mas01mc@292 947 pe->pointID=sb->pointID;
mas01mc@292 948 pe++;
mas01mc@292 949 colCount++;
mas01mc@292 950 sb=sb->snext;
mas01mc@292 951 }
mas01mc@292 952 pe->hashValue=IFLAG;
mas01mc@292 953 return;
mas01mc@292 954 }
mas01mc@292 955
mas01mc@336 956 int G::serialize_lsh_hashtables_format2(FILE* dbFile, int merge){
mas01mc@292 957 Uns32T x,y;
mas01mc@292 958
mas01mc@292 959 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
mas01mc@292 960 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 961
mas01mc@340 962 // We must pereform FORMAT2 merges in core FORMAT1 (dynamic list structure)
mas01mc@292 963 if(merge)
mas01mc@340 964 unserialize_lsh_hashtables_format2(dbFile, merge);
mas01mc@292 965
mas01mc@292 966 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
mas01mc@336 967 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 968 fclose(dbFile);
mas01mc@336 969 error("fSeek error in serialize_lsh_hashtables_format2");
mas01mc@336 970 }
mas01mc@292 971
mas01mc@292 972 // Write the hash tables
mas01mc@292 973 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 974 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2...";
mas01mc@292 975 std::cout.flush();
mas01mc@292 976 maxColCount=0;
mas01mc@292 977 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 978 meanColCount=0;
mas01mc@292 979 colCountN=0;
mas01mc@292 980 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 981 colCount=0;
mas01mc@292 982 if(bucket* bPtr = h[x][y]){
mas01mc@306 983 // Check for empty row (even though row was allocated)
mas01mc@340 984 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@306 985 if(bPtr->next->t2==IFLAG){
mas01mc@336 986 fclose(dbFile);
mas01mc@306 987 error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 988 }
mas01mc@306 989 #else
mas01mc@306 990 if(bPtr->t2==IFLAG){
mas01mc@336 991 fclose(dbFile);
mas01mc@306 992 error("b->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 993 }
mas01mc@306 994 #endif
mas01mc@306 995 t1 = O2_SERIAL_TOKEN_T1;
mas01mc@340 996 WRITE_UNS32(&t1, "[T1]");
mas01mc@306 997 t1 = y;
mas01mc@340 998 WRITE_UNS32(&t1, "[t1]");
mas01mc@340 999 #ifdef LSH_CORE_ARRAY
mas01mc@340 1000 t1 = count_buckets_and_points_hashtable_row(bPtr);
mas01mc@340 1001 WRITE_UNS32(&t1,"[count]"); // write numElements
mas01mc@340 1002 #endif
mas01mc@340 1003 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@336 1004 serial_write_hashtable_row_format2(dbFile, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 1005 #else
mas01mc@340 1006 serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
mas01mc@292 1007 #endif
mas01mc@292 1008 }
mas01mc@292 1009 if(colCount){
mas01mc@292 1010 if(colCount<minColCount)
mas01mc@292 1011 minColCount=colCount;
mas01mc@292 1012 if(colCount>maxColCount)
mas01mc@292 1013 maxColCount=colCount;
mas01mc@292 1014 meanColCount+=colCount;
mas01mc@292 1015 colCountN++;
mas01mc@292 1016 }
mas01mc@292 1017 }
mas01mc@292 1018 // Write END of table marker
mas01mc@306 1019 t1 = O2_SERIAL_TOKEN_ENDTABLE;
mas01mc@340 1020 WRITE_UNS32(&t1,"[end]");
mas01mc@292 1021 if(colCountN)
mas01mc@292 1022 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 1023 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 1024 << endl;
mas01mc@340 1025 }
mas01mc@292 1026 // We're done writing
mas01mc@292 1027 return 1;
mas01mc@292 1028 }
mas01mc@292 1029
mas01mc@340 1030 Uns32T G::count_buckets_and_points_hashtable_row(bucket* bPtr){
mas01mc@340 1031 Uns32T total_count = 0;
mas01mc@340 1032 bucket* p = 0;
mas01mc@340 1033
mas01mc@340 1034 // count points
mas01mc@340 1035 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1036 total_count = bPtr->t2; // points already counted
mas01mc@340 1037 p = bPtr->next;
mas01mc@340 1038 #else
mas01mc@340 1039 total_count = count_points_hashtable_row(bPtr);
mas01mc@340 1040 p = bPtr;
mas01mc@340 1041 #endif
mas01mc@340 1042
mas01mc@340 1043 // count buckets
mas01mc@340 1044 do{
mas01mc@340 1045 total_count++;
mas01mc@340 1046 }while((p=p->next));
mas01mc@340 1047
mas01mc@340 1048 return total_count;
mas01mc@340 1049 }
mas01mc@340 1050
mas01mc@340 1051 Uns32T G::count_points_hashtable_row(bucket* bPtr){
mas01mc@340 1052 Uns32T point_count = 0;
mas01mc@340 1053 bucket* p = bPtr;
mas01mc@340 1054 sbucket* s = 0;
mas01mc@340 1055 while(p){
mas01mc@344 1056 s = p->snext.ptr;
mas01mc@340 1057 while(s){
mas01mc@340 1058 point_count++;
mas01mc@340 1059 s=s->snext;
mas01mc@340 1060 }
mas01mc@340 1061 p=p->next;
mas01mc@340 1062 }
mas01mc@340 1063 return point_count;
mas01mc@340 1064 }
mas01mc@340 1065
mas01mc@336 1066 void G::serial_write_hashtable_row_format2(FILE* dbFile, bucket* b, Uns32T& colCount){
mas01mc@292 1067 while(b && b->t2!=IFLAG){
mas01mc@344 1068 if(!b->snext.ptr){
mas01mc@336 1069 fclose(dbFile);
mas01mc@306 1070 error("Empty collision chain in serial_write_hashtable_row_format2()");
mas01mc@306 1071 }
mas01mc@306 1072 t2 = O2_SERIAL_TOKEN_T2;
mas01mc@336 1073 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1074 fclose(dbFile);
mas01mc@292 1075 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1076 }
mas01mc@292 1077 t2 = b->t2;
mas01mc@336 1078 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1079 fclose(dbFile);
mas01mc@292 1080 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1081 }
mas01mc@344 1082 serial_write_element_format2(dbFile, b->snext.ptr, colCount);
mas01mc@292 1083 b=b->next;
mas01mc@292 1084 }
mas01mc@292 1085 }
mas01mc@292 1086
mas01mc@336 1087 void G::serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount){
mas01mc@292 1088 while(sb){
mas01mc@336 1089 if(fwrite(&sb->pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1090 fclose(dbFile);
mas01mc@292 1091 error("Write error in serial_write_element_format2()");
mas01mc@292 1092 }
mas01mc@292 1093 colCount++;
mas01mc@292 1094 sb=sb->snext;
mas01mc@292 1095 }
mas01mc@292 1096 }
mas01mc@292 1097
mas01mc@292 1098
mas01mc@292 1099 int G::serial_create(char* filename, Uns32T FMT){
mas01mc@292 1100 return serial_create(filename, w, L, N, C, k, d, FMT);
mas01mc@292 1101 }
mas01mc@292 1102
mas01mc@292 1103
mas01mc@292 1104 int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols,
mas01mc@292 1105 Uns32T numFuns, Uns32T dim, Uns32T FMT){
mas01mc@292 1106
mas01mc@292 1107 if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS
mas01mc@292 1108 || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS
mas01mc@292 1109 || dim>O2_SERIAL_MAX_DIM){
mas01mc@292 1110 error("LSH parameters out of bounds for serialization");
mas01mc@292 1111 }
mas01mc@292 1112
mas01mc@292 1113 int dbfid;
mas01cr@648 1114 if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL,
mas01cr@654 1115 #if defined(WIN32)
mas01cr@648 1116 _S_IREAD|_S_IWRITE
mas01cr@648 1117 #else
mas01cr@648 1118 S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH
mas01cr@648 1119 #endif
mas01cr@648 1120 )) < 0)
mas01cr@648 1121
mas01mc@292 1122 error("Can't create serial file", filename, "open");
mas01mc@292 1123 get_lock(dbfid, 1);
mas01mc@292 1124
mas01mc@292 1125 // Make header first to get size of serialized database
mas01mc@296 1126 lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT, pointCount);
mas01mc@296 1127
mas01mc@296 1128 cout << "file size: <=" << lshHeader->get_size()/1024UL << "KB" << endl;
mas01mc@296 1129 if(lshHeader->get_size()>O2_SERIAL_MAXFILESIZE)
mas01mc@296 1130 error("Maximum size of LSH file exceded: > 4000MB");
mas01mc@296 1131
mas01mc@292 1132 // go to the location corresponding to the last byte
mas01mc@292 1133 if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1)
mas01mc@292 1134 error("lseek error in db file", "", "lseek");
mas01mc@292 1135
mas01mc@292 1136 // write a dummy byte at the last location
mas01mc@292 1137 if (write (dbfid, "", 1) != 1)
mas01mc@292 1138 error("write error", "", "write");
mas01cr@648 1139
mas01cr@648 1140 serial_write_header(dbfid, lshHeader);
mas01mc@292 1141 close(dbfid);
mas01mc@292 1142
mas01mc@292 1143 std::cout << "done initializing tables." << endl;
mas01mc@292 1144
mas01mc@292 1145 return 1;
mas01mc@292 1146 }
mas01mc@292 1147
mas01cr@648 1148 SerialHeaderT* G::serial_get_header(int fd) {
mas01cr@648 1149 off_t cur = lseek(fd, 0, SEEK_CUR);
mas01mc@292 1150 lshHeader = new SerialHeaderT();
mas01cr@648 1151 lseek(fd, 0, SEEK_SET);
mas01cr@648 1152 if(read(fd, lshHeader, sizeof(SerialHeaderT)) != (ssize_t) (sizeof(SerialHeaderT)))
mas01cr@648 1153 error("Bad return from read");
mas01mc@292 1154
mas01mc@292 1155 if(lshHeader->lshMagic!=O2_SERIAL_MAGIC)
mas01mc@292 1156 error("Not an LSH database file");
mas01cr@648 1157 lseek(fd, cur, SEEK_SET);
mas01mc@292 1158 return lshHeader;
mas01mc@292 1159 }
mas01mc@292 1160
mas01cr@648 1161 void G::serial_write_header(int fd, SerialHeaderT *header) {
mas01cr@648 1162 off_t cur = lseek(fd, 0, SEEK_CUR);
mas01cr@648 1163 lseek(fd, 0, SEEK_SET);
mas01cr@648 1164 if(write(fd, header, sizeof(SerialHeaderT)) != (ssize_t) (sizeof(SerialHeaderT)))
mas01cr@648 1165 error("Bad return from write");
mas01cr@648 1166 lseek(fd, cur, SEEK_SET);
mas01mc@292 1167 }
mas01mc@292 1168
mas01mc@292 1169 int G::serial_open(char* filename, int writeFlag){
mas01mc@292 1170 int dbfid;
mas01mc@292 1171 if(writeFlag){
mas01mc@292 1172 if ((dbfid = open (filename, O_RDWR)) < 0)
mas01mc@292 1173 error("Can't open serial file for read/write", filename, "open");
mas01mc@292 1174 get_lock(dbfid, writeFlag);
mas01mc@292 1175 }
mas01mc@292 1176 else{
mas01mc@292 1177 if ((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1178 error("Can't open serial file for read", filename, "open");
mas01mc@292 1179 get_lock(dbfid, 0);
mas01mc@292 1180 }
mas01mc@292 1181
mas01mc@292 1182 return dbfid;
mas01mc@292 1183 }
mas01mc@292 1184
mas01mc@292 1185 void G::serial_close(int dbfid){
mas01mc@292 1186
mas01mc@292 1187 release_lock(dbfid);
mas01mc@292 1188 close(dbfid);
mas01mc@292 1189 }
mas01mc@292 1190
mas01mc@292 1191 int G::unserialize_lsh_header(char* filename){
mas01mc@292 1192
mas01mc@292 1193 int dbfid;
mas01mc@292 1194 // Test to see if file exists
mas01mc@292 1195 if((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1196 error("Can't open the file", filename, "open");
mas01mc@292 1197 close(dbfid);
mas01mc@292 1198 dbfid = serial_open(filename, 0); // open for read
mas01cr@648 1199 serial_get_header(dbfid);
mas01mc@292 1200
mas01mc@292 1201 // Unserialize header parameters
mas01mc@292 1202 H::L = lshHeader->numTables;
mas01mc@292 1203 H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0);
mas01mc@292 1204 H::N = lshHeader->numRows;
mas01mc@292 1205 H::C = lshHeader->numCols;
mas01mc@292 1206 H::k = lshHeader->numFuns;
mas01mc@292 1207 H::d = lshHeader->dataDim;
mas01mc@293 1208 H::w = lshHeader->binWidth;
mas01mc@293 1209 H::radius = lshHeader->radius;
mas01mc@293 1210 H::maxp = lshHeader->maxp;
mas01mc@296 1211 H::pointCount = lshHeader->pointCount;
mas01mc@292 1212
mas01mc@292 1213 return dbfid;
mas01mc@292 1214 }
mas01mc@292 1215
mas01mc@292 1216 // unserialize the LSH parameters
mas01mc@292 1217 // we leave the LSH tree on disk as a flat file
mas01mc@292 1218 // it is this flat file that we search by memory mapping
mas01mc@292 1219 void G::unserialize_lsh_functions(int dbfid){
mas01mc@292 1220 Uns32T j, kk;
mas01mc@292 1221 float* pf;
mas01mc@292 1222 Uns32T* pu;
mas01mc@292 1223
mas01mc@292 1224 // Load the hash functions into core
mas01cr@648 1225 off_t cur = lseek(dbfid, 0, SEEK_CUR);
mas01cr@648 1226 void *db = malloc(get_serial_hashtable_offset() - O2_SERIAL_HEADER_SIZE);
mas01cr@648 1227 lseek(dbfid, O2_SERIAL_HEADER_SIZE, SEEK_SET);
mas01cr@648 1228 read(dbfid, db, get_serial_hashtable_offset() - O2_SERIAL_HEADER_SIZE);
mas01cr@648 1229 lseek(dbfid, cur, SEEK_SET);
mas01cr@648 1230 pf = (float *)db;
mas01mc@292 1231
mas01mc@292 1232 #ifdef USE_U_FUNCTIONS
mas01mc@292 1233 for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v)
mas01mc@292 1234 for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1235 #else
mas01cr@648 1236 for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v)
mas01cr@648 1237 for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1238 #endif
mas01cr@648 1239 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01cr@648 1240 H::A[j][kk][i] = *pf++; // Normally distributed random vectors
mas01cr@648 1241 }
mas01cr@648 1242 }
mas01mc@292 1243 #ifdef USE_U_FUNCTIONS
mas01cr@648 1244 for( j = 0 ; j < H::m ; j++ ) // biases b
mas01cr@648 1245 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@292 1246 #else
mas01cr@648 1247 for( j = 0 ; j < H::L ; j++ ) // biases b
mas01cr@648 1248 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1249 #endif
mas01cr@648 1250 H::b[j][kk] = *pf++;
mas01mc@292 1251
mas01cr@648 1252 pu = (Uns32T*)pf;
mas01cr@648 1253 for( j = 0 ; j < H::L ; j++ ) // Z projectors r1
mas01cr@648 1254 for( kk = 0 ; kk < H::k ; kk++ )
mas01cr@648 1255 H::r1[j][kk] = *pu++;
mas01cr@648 1256
mas01cr@648 1257 for( j = 0 ; j < H::L ; j++ ) // Z projectors r2
mas01cr@648 1258 for( kk = 0 ; kk < H::k ; kk++ )
mas01cr@648 1259 H::r2[j][kk] = *pu++;
mas01mc@292 1260
mas01cr@648 1261 free(db);
mas01mc@292 1262 }
mas01mc@292 1263
mas01mc@292 1264 void G::unserialize_lsh_hashtables_format1(int fid){
mas01mc@292 1265 SerialElementT *pe, *pt;
mas01mc@292 1266 Uns32T x,y;
mas01mc@292 1267 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01cr@648 1268 pt = (SerialElementT *) malloc(hashTableSize);
mas01mc@292 1269 // Read the hash tables into core
mas01mc@292 1270 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 1271 // memory map a single hash table
mas01mc@292 1272 // Align each hash table to page boundary
mas01cr@648 1273 serial_get_table(fid, x, pt, hashTableSize);
mas01mc@292 1274 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 1275 // Move disk pointer to beginning of row
mas01mc@292 1276 pe=pt+y*lshHeader->numCols;
mas01mc@292 1277 unserialize_hashtable_row_format1(pe, h[x]+y);
mas01mc@340 1278 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1279 printf("S[%d,%d]", x, y);
mas01mc@292 1280 serial_bucket_dump(pe);
mas01mc@292 1281 printf("C[%d,%d]", x, y);
mas01mc@292 1282 dump_hashtable_row(h[x][y]);
mas01mc@292 1283 #endif
mas01mc@292 1284 }
mas01mc@292 1285 }
mas01cr@648 1286 free(pt);
mas01mc@292 1287 }
mas01mc@292 1288
mas01mc@292 1289 void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){
mas01mc@292 1290 Uns32T colCount = 0;
mas01mc@292 1291 while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){
mas01mc@292 1292 H::p = pe->pointID; // current point ID
mas01mc@292 1293 t2 = pe->hashValue;
mas01mc@292 1294 bucket_insert_point(b);
mas01mc@292 1295 pe++;
mas01mc@292 1296 colCount++;
mas01mc@292 1297 }
mas01mc@292 1298 }
mas01mc@292 1299
mas01mc@340 1300 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
mas01mc@292 1301 Uns32T x=0,y=0;
mas01mc@292 1302
mas01mc@292 1303 // Seek to hashtable base offset
mas01mc@336 1304 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 1305 fclose(dbFile);
mas01mc@336 1306 error("fSeek error in unserialize_lsh_hashtables_format2");
mas01mc@292 1307 }
mas01mc@292 1308
mas01mc@292 1309 // Read the hash tables into core (structure is given in header)
mas01mc@292 1310 while( x < H::L){
mas01mc@336 1311 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1312 fclose(dbFile);
mas01mc@292 1313 error("Read error","unserialize_lsh_hashtables_format2()");
mas01mc@292 1314 }
mas01mc@306 1315 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE)
mas01mc@292 1316 x++; // End of table
mas01mc@292 1317 else
mas01mc@292 1318 while(y < H::N){
mas01mc@306 1319 // Read a row and move file pointer to beginning of next row or _bittable
mas01mc@306 1320 if(!(H::t1==O2_SERIAL_TOKEN_T1)){
mas01mc@336 1321 fclose(dbFile);
mas01mc@306 1322 error("State matchine error T1","unserialize_lsh_hashtables_format2()");
mas01mc@292 1323 }
mas01mc@336 1324 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1325 fclose(dbFile);
mas01mc@306 1326 error("Read error: t1","unserialize_lsh_hashtables_format2()");
mas01mc@306 1327 }
mas01mc@306 1328 y = H::t1;
mas01mc@292 1329 if(y>=H::N){
mas01mc@336 1330 fclose(dbFile);
mas01mc@292 1331 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
mas01mc@292 1332 }
mas01mc@340 1333 Uns32T token = 0;
mas01mc@340 1334 #ifdef LSH_CORE_ARRAY
mas01mc@340 1335 Uns32T numElements;
mas01mc@340 1336 if(fread(&numElements, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1337 fclose(dbFile);
mas01mc@340 1338 error("Read error: numElements","unserialize_lsh_hashtables_format2()");
mas01mc@340 1339 }
mas01mc@292 1340
mas01mc@340 1341 // BACKWARD COMPATIBILITY: check to see if T2 or END token was read
mas01mc@340 1342 if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
mas01mc@340 1343 forMerge=true; // Force use of dynamic linked list core format
mas01mc@340 1344 token = numElements;
mas01mc@340 1345 }
mas01mc@340 1346
mas01mc@340 1347 if(forMerge)
mas01mc@340 1348 // Use linked list CORE format
mas01mc@340 1349 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
mas01mc@340 1350 else
mas01mc@340 1351 // Use ARRAY CORE format with numElements counter
mas01mc@340 1352 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
mas01mc@340 1353 #else
mas01mc@340 1354 token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
mas01mc@513 1355 #endif
mas01mc@292 1356 // Check that token is valid
mas01mc@306 1357 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
mas01mc@336 1358 fclose(dbFile);
mas01mc@292 1359 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
mas01mc@292 1360 }
mas01mc@292 1361 // Check for end of table flag
mas01mc@306 1362 if(token==O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@292 1363 x++;
mas01mc@292 1364 break;
mas01mc@292 1365 }
mas01mc@292 1366 // Check for new row flag
mas01mc@306 1367 if(token==O2_SERIAL_TOKEN_T1)
mas01mc@292 1368 H::t1 = token;
mas01mc@292 1369 }
mas01mc@292 1370 }
mas01mc@513 1371 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@513 1372 dump_hashtables();
mas01mc@513 1373 #endif
mas01mc@306 1374 }
mas01mc@292 1375
mas01mc@340 1376 Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token){
mas01mc@292 1377 bool pointFound = false;
mas01mc@340 1378
mas01mc@340 1379 if(token)
mas01mc@340 1380 H::t2 = token;
mas01mc@340 1381 else if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1382 fclose(dbFile);
mas01mc@340 1383 error("Read error T2 token","unserialize_hashtable_row_format2");
mas01mc@340 1384 }
mas01mc@340 1385
mas01mc@306 1386 if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){
mas01mc@336 1387 fclose(dbFile);
mas01mc@292 1388 error("State machine error: expected E or T2");
mas01mc@292 1389 }
mas01mc@340 1390
mas01mc@306 1391 while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){
mas01mc@292 1392 pointFound=false;
mas01mc@292 1393 // Check for T2 token
mas01mc@306 1394 if(H::t2!=O2_SERIAL_TOKEN_T2){
mas01mc@336 1395 fclose(dbFile);
mas01mc@292 1396 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
mas01mc@306 1397 }
mas01mc@292 1398 // Read t2 value
mas01mc@336 1399 if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1400 fclose(dbFile);
mas01mc@292 1401 error("Read error t2","unserialize_hashtable_row_format2");
mas01mc@292 1402 }
mas01mc@336 1403 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1404 fclose(dbFile);
mas01mc@292 1405 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1406 }
mas01mc@306 1407 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@292 1408 pointFound=true;
mas01mc@292 1409 bucket_insert_point(b);
mas01mc@336 1410 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1411 fclose(dbFile);
mas01mc@292 1412 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1413 }
mas01mc@292 1414 }
mas01mc@292 1415 if(!pointFound)
mas01mc@292 1416 error("State machine error: point", "unserialize_hashtable_row_format2()");
mas01mc@306 1417 H::t2 = H::p; // Copy last found token to t2
mas01mc@292 1418 }
mas01mc@292 1419 return H::t2; // holds current token
mas01mc@292 1420 }
mas01mc@292 1421
mas01mc@340 1422 // Unserialize format2 hashtable row to a CORE_ARRAY pointed to
mas01mc@340 1423 // by the hashtable row pointer: rowPtr
mas01mc@340 1424 //
mas01mc@340 1425 // numElements is the total number of t2 buckets plus points
mas01mc@340 1426 // memory required is numElements+1+sizeof(hashtable ptr)
mas01mc@340 1427 //
mas01mc@340 1428 // numElements = numPoints + numBuckets
mas01mc@340 1429 //
mas01mc@340 1430 // During inserts (merges) new hashtable entries are appened at rowPtr+numPoints+numBuckets+1
mas01mc@340 1431 //
mas01mc@340 1432 // ASSUME: that LSH_LIST_HEAD_COUNTERS is set so that the first hashtable bucket is used to count
mas01mc@340 1433 // point and bucket entries
mas01mc@340 1434 //
mas01mc@340 1435 // We store the values of numPoints and numBuckets in separate fields of the first bucket
mas01mc@340 1436 // rowPtr->t2 // numPoints
mas01mc@340 1437 // (Uns32T)(rowPtr->snext) // numBuckets
mas01mc@340 1438 //
mas01mc@340 1439 // We cast the rowPtr->next pointer to (Uns32*) malloc(numElements*sizeof(Uns32T) + sizeof(bucket*))
mas01mc@340 1440 // To get to the fist bucket, we use
mas01mc@340 1441 //
mas01mc@340 1442
mas01mc@340 1443 #define READ_UNS32T(VAL,TOKENSTR) if(fread(VAL, sizeof(Uns32T), 1, dbFile) != 1){\
mas01mc@340 1444 fclose(dbFile);error("Read error unserialize_hashtable_format2",TOKENSTR);}
mas01mc@340 1445
mas01mc@340 1446 #define TEST_TOKEN(TEST, TESTSTR) if(TEST){fclose(dbFile);error("State machine error: ", TESTSTR);}
mas01mc@340 1447
mas01mc@340 1448 #define SKIP_BITS_LEFT_SHIFT_MSB (30)
mas01mc@340 1449
mas01mc@340 1450 #define SKIP_BITS_RIGHT_SHIFT_MSB (28)
mas01mc@340 1451 #define SKIP_BITS_RIGHT_SHIFT_LSB (30)
mas01mc@340 1452
mas01mc@340 1453 #define MAX_POINTS_IN_BUCKET_CORE_ARRAY (16)
mas01mc@340 1454 #define LSH_CORE_ARRAY_END_ROW_TOKEN (0xFFFFFFFD)
mas01mc@340 1455
mas01mc@340 1456 // Encode the skip bits. Zero if only one point, MAX 8 (plus first == 9)
mas01mc@340 1457 #define ENCODE_POINT_SKIP_BITS TEST_TOKEN(!numPointsThisBucket, "no points found");\
mas01mc@340 1458 if(numPointsThisBucket==1){\
mas01mc@340 1459 secondPtr=ap++;\
mas01mc@340 1460 *secondPtr=0;\
mas01mc@340 1461 numPoints++;\
mas01mc@340 1462 }\
mas01mc@340 1463 if(numPointsThisBucket>1){\
mas01mc@340 1464 *firstPtr |= ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
mas01mc@340 1465 *secondPtr |= ( ( (numPointsThisBucket-1) & 0xC) >> 2 ) << SKIP_BITS_LEFT_SHIFT_MSB;}
mas01mc@340 1466
mas01mc@340 1467 Uns32T G::unserialize_hashtable_row_to_array(FILE* dbFile, bucket** rowPP, Uns32T numElements){
mas01mc@340 1468 Uns32T numPointsThisBucket = 0;
mas01mc@340 1469 Uns32T numBuckets = 0;
mas01mc@340 1470 Uns32T numPoints = 0;
mas01mc@340 1471 Uns32T* firstPtr = 0;
mas01mc@340 1472 Uns32T* secondPtr = 0;
mas01mc@340 1473
mas01mc@340 1474 // Initialize new row
mas01mc@340 1475 if(!*rowPP){
mas01mc@340 1476 *rowPP = new bucket();
mas01mc@340 1477 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1478 (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@340 1479 (*rowPP)->next = 0;
mas01mc@340 1480 #endif
mas01mc@340 1481 }
mas01mc@340 1482 bucket* rowPtr = *rowPP;
mas01mc@340 1483
mas01mc@340 1484 READ_UNS32T(&(H::t2),"t2");
mas01mc@340 1485 TEST_TOKEN(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2), "expected E or T2");
mas01mc@340 1486 // Because we encode points in 16-point blocks, we sometimes allocate repeated t2 elements
mas01mc@340 1487 // So over-allocate by a factor of two and realloc later to actual numElements
mas01mc@340 1488 CR_ASSERT(rowPtr->next = (bucket*) malloc((2*numElements+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1489 Uns32T* ap = reinterpret_cast<Uns32T*>(rowPtr->next); // Cast pointer to Uns32T* for array format
mas01mc@340 1490 while( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1) ){
mas01mc@340 1491 numPointsThisBucket = 0;// reset bucket point counter
mas01mc@340 1492 secondPtr = 0; // reset second-point pointer
mas01mc@340 1493 TEST_TOKEN(H::t2!=O2_SERIAL_TOKEN_T2, "expected T2");
mas01mc@340 1494 READ_UNS32T(&(H::t2), "Read error t2");
mas01mc@340 1495 *ap++ = H::t2; // Insert t2 value into array
mas01mc@340 1496 numBuckets++;
mas01mc@340 1497 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1498 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@340 1499 if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
mas01mc@340 1500 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1501 *ap++ = H::t2; // Extra element
mas01mc@340 1502 numBuckets++; // record this as a new bucket
mas01mc@340 1503 numPointsThisBucket=0; // reset bucket point counter
mas01mc@340 1504 secondPtr = 0; // reset second-point pointer
mas01mc@340 1505 }
mas01mc@340 1506 if( ++numPointsThisBucket == 1 )
mas01mc@340 1507 firstPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1508 else if( numPointsThisBucket == 2 )
mas01mc@340 1509 secondPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1510 numPoints++;
mas01mc@340 1511 *ap++ = H::p;
mas01mc@340 1512 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1513 }
mas01mc@340 1514 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1515 H::t2 = H::p; // Copy last found token to t2
mas01mc@340 1516 }
mas01mc@340 1517 // Reallocate the row to its actual size
mas01mc@340 1518 CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1519 // Record the sizes at the head of the row
mas01mc@344 1520 rowPtr->snext.numBuckets = numBuckets;
mas01mc@340 1521 rowPtr->t2 = numPoints;
mas01mc@340 1522 // Place end of row marker
mas01mc@340 1523 *ap++ = LSH_CORE_ARRAY_END_ROW_TOKEN;
mas01mc@340 1524 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
mas01mc@340 1525 rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
mas01mc@340 1526 // Allocate a new dynamic list head at the end of the array
mas01mc@340 1527 bucket** listPtr = reinterpret_cast<bucket**> (ap);
mas01mc@340 1528 *listPtr = 0;
mas01mc@340 1529 // Return current token
mas01mc@340 1530 return H::t2; // return H::t2 which holds current token [E or T1]
mas01mc@340 1531 }
mas01mc@340 1532
mas01mc@340 1533
mas01mc@340 1534
mas01mc@340 1535 // *p is a pointer to the beginning of a hashtable row array
mas01mc@340 1536 // The array consists of t2 hash keys and one or more point identifiers p for each hash key
mas01mc@340 1537 // Retrieval is performed by generating a hash key query_t2 for query point q
mas01mc@340 1538 // We identify the row that t2 is stored in using a secondary hash t1, this row is the entry
mas01mc@340 1539 // point for retrieve_from_core_hashtable_array
mas01mc@340 1540 #define SKIP_BITS (0xC0000000)
mas01mc@340 1541 void G::retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos){
mas01mc@340 1542 Uns32T skip;
mas01mc@340 1543 Uns32T t2;
mas01mc@340 1544 Uns32T p1;
mas01mc@340 1545 Uns32T p2;
mas01mc@340 1546
mas01mc@340 1547 CR_ASSERT(p);
mas01mc@340 1548
mas01mc@340 1549 do{
mas01mc@340 1550 t2 = *p++;
mas01mc@340 1551 if( t2 > H::t2 )
mas01mc@340 1552 return;
mas01mc@340 1553 p1 = *p++;
mas01mc@340 1554 p2 = *p++;
mas01mc@340 1555 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@340 1556 if( t2 == H::t2 ){
mas01mc@340 1557 add_point_callback(calling_instance, p1 ^ (p1 & SKIP_BITS), qpos, radius);
mas01mc@340 1558 if(skip--){
mas01mc@340 1559 add_point_callback(calling_instance, p2 ^ (p2 & SKIP_BITS), qpos, radius);
mas01mc@340 1560 while(skip-- )
mas01mc@340 1561 add_point_callback(calling_instance, *p++, qpos, radius);
mas01mc@340 1562 }
mas01mc@340 1563 }
mas01mc@340 1564 else
mas01mc@340 1565 if(*p != LSH_CORE_ARRAY_END_ROW_TOKEN)
mas01mc@340 1566 p = p + skip;
mas01mc@340 1567 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@340 1568 }
mas01mc@513 1569
mas01mc@513 1570 void G::dump_hashtables(){
mas01mc@513 1571 for(Uns32T x = 0; x < H::L ; x++)
mas01mc@513 1572 for(Uns32T y = 0; y < H::N ; y++){
mas01mc@513 1573 bucket* bPtr = h[x][y];
mas01mc@513 1574 if(bPtr){
mas01mc@513 1575 printf("C[%d,%d]", x, y);
mas01mc@513 1576 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@513 1577 printf("[numBuckets=%d]",bPtr->snext.numBuckets);
mas01mc@513 1578 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@513 1579 dump_core_hashtable_array((Uns32T*)(bPtr->next));
mas01mc@513 1580 }
mas01mc@513 1581 else {
mas01mc@513 1582 dump_hashtable_row(bPtr->next);
mas01mc@513 1583 }
mas01mc@513 1584 #else
mas01mc@513 1585 dump_hashtable_row(bPtr);
mas01mc@513 1586 #endif
mas01mc@513 1587 printf("\n");
mas01mc@513 1588 fflush(stdout);
mas01mc@513 1589 }
mas01mc@513 1590 }
mas01mc@513 1591 }
mas01mc@513 1592
mas01mc@513 1593 void G::dump_core_hashtable_array(Uns32T* p){
mas01mc@513 1594 Uns32T skip;
mas01mc@513 1595 Uns32T t2;
mas01mc@513 1596 Uns32T p1;
mas01mc@513 1597 Uns32T p2;
mas01mc@513 1598 CR_ASSERT(p);
mas01mc@513 1599 do{
mas01mc@513 1600 t2 = *p++;
mas01mc@513 1601 p1 = *p++;
mas01mc@513 1602 p2 = *p++;
mas01mc@513 1603 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@513 1604 printf("(%0x, %0x)", t2, p1 ^ (p1 & SKIP_BITS));
mas01mc@513 1605 if(skip--){
mas01mc@513 1606 printf("(%0x, %0x)", t2, p2 ^ (p2 & SKIP_BITS));
mas01mc@513 1607 while(skip-- )
mas01mc@513 1608 printf("(%0x, %0x)", t2, *p++);
mas01mc@513 1609 }
mas01mc@513 1610 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@513 1611 }
mas01mc@340 1612
mas01mc@292 1613 void G::dump_hashtable_row(bucket* p){
mas01mc@292 1614 while(p && p->t2!=IFLAG){
mas01mc@344 1615 sbucket* sbp = p->snext.ptr;
mas01mc@292 1616 while(sbp){
mas01mc@292 1617 printf("(%0X,%u)", p->t2, sbp->pointID);
mas01mc@292 1618 fflush(stdout);
mas01mc@292 1619 sbp=sbp->snext;
mas01mc@292 1620 }
mas01mc@292 1621 p=p->next;
mas01mc@292 1622 }
mas01mc@292 1623 printf("\n");
mas01mc@292 1624 }
mas01mc@292 1625
mas01mc@292 1626
mas01mc@292 1627 // G::serial_retrieve_point( ... )
mas01mc@292 1628 // retrieves (pointID) from a serialized LSH database
mas01mc@292 1629 //
mas01mc@292 1630 // inputs:
mas01mc@292 1631 // filename - file name of serialized LSH database
mas01mc@292 1632 // vv - query point set
mas01mc@292 1633 //
mas01mc@292 1634 // outputs:
mas01mc@292 1635 // inserts retrieved points into add_point() callback method
mas01mc@292 1636 void G::serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller)
mas01mc@292 1637 {
mas01mc@292 1638 int dbfid = serial_open(filename, 0); // open for read
mas01cr@648 1639 serial_get_header(dbfid);
mas01mc@292 1640
mas01mc@292 1641 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1642 serial_close(dbfid);
mas01mc@292 1643 error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1644 }
mas01mc@292 1645
mas01mc@292 1646 // size of each hash table
mas01mc@292 1647 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1648 calling_instance = caller; // class instance variable used in ...bucket_chain_point()
mas01mc@292 1649 add_point_callback = add_point;
mas01mc@292 1650
mas01cr@648 1651 SerialElementT *pe = (SerialElementT *)malloc(hashTableSize);
mas01mc@292 1652 for(Uns32T j=0; j<L; j++){
mas01cr@648 1653 // read a single hash table for random access
mas01cr@648 1654 serial_get_table(dbfid, j, pe, hashTableSize);
mas01mc@292 1655 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
mas01mc@293 1656 H::compute_hash_functions(vv[qpos]);
mas01mc@293 1657 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1658 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1659 }
mas01mc@292 1660 }
mas01cr@648 1661 free(pe);
mas01mc@292 1662 serial_close(dbfid);
mas01mc@292 1663 }
mas01mc@292 1664
mas01mc@292 1665 void G::serial_retrieve_point(char* filename, vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 1666 int dbfid = serial_open(filename, 0); // open for read
mas01cr@648 1667 serial_get_header(dbfid);
mas01mc@292 1668
mas01mc@292 1669 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1670 serial_close(dbfid);
mas01mc@292 1671 error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1672 }
mas01mc@292 1673
mas01mc@292 1674 // size of each hash table
mas01mc@292 1675 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1676 calling_instance = caller;
mas01mc@292 1677 add_point_callback = add_point;
mas01mc@293 1678 H::compute_hash_functions(v);
mas01cr@648 1679
mas01cr@648 1680 SerialElementT *pe = (SerialElementT *)malloc(hashTableSize);
mas01mc@292 1681 for(Uns32T j=0; j<L; j++){
mas01cr@648 1682 // read a single hash table for random access
mas01cr@648 1683 serial_get_table(dbfid, j, pe, hashTableSize);
mas01mc@293 1684 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1685 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01cr@648 1686 }
mas01cr@648 1687 free(pe);
mas01mc@292 1688 serial_close(dbfid);
mas01mc@292 1689 }
mas01mc@292 1690
mas01mc@292 1691 void G::serial_dump_tables(char* filename){
mas01mc@292 1692 int dbfid = serial_open(filename, 0); // open for read
mas01cr@648 1693 serial_get_header(dbfid);
mas01mc@292 1694 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01cr@648 1695 SerialElementT *db = (SerialElementT *)malloc(hashTableSize);
mas01mc@292 1696 for(Uns32T j=0; j<L; j++){
mas01cr@648 1697 // read a single hash table for random access
mas01cr@648 1698 serial_get_table(dbfid, j, db, hashTableSize);
mas01cr@648 1699 SerialElementT *pe = db;
mas01mc@292 1700 printf("*********** TABLE %d ***************\n", j);
mas01mc@292 1701 fflush(stdout);
mas01mc@292 1702 int count=0;
mas01mc@292 1703 do{
mas01mc@292 1704 printf("[%d,%d]", j, count++);
mas01mc@292 1705 fflush(stdout);
mas01mc@292 1706 serial_bucket_dump(pe);
mas01mc@292 1707 pe+=lshHeader->numCols;
mas01mc@292 1708 }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols);
mas01mc@292 1709 }
mas01cr@648 1710 free(db);
mas01mc@292 1711 }
mas01mc@292 1712
mas01mc@292 1713 void G::serial_bucket_dump(SerialElementT* pe){
mas01mc@292 1714 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1715 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1716 printf("(%0X,%u)",pe->hashValue,pe->pointID);
mas01mc@292 1717 pe++;
mas01mc@292 1718 }
mas01mc@292 1719 printf("\n");
mas01mc@292 1720 fflush(stdout);
mas01mc@292 1721 }
mas01mc@292 1722
mas01mc@292 1723 void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){
mas01mc@292 1724 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1725 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1726 if(pe->hashValue==t2){ // new match
mas01mc@292 1727 add_point_callback(calling_instance, pe->pointID, qpos, radius);
mas01mc@292 1728 }
mas01mc@292 1729 pe++;
mas01mc@292 1730 }
mas01mc@292 1731 }
mas01mc@292 1732
mas01mc@292 1733 void G::bucket_chain_point(bucket* p, Uns32T qpos){
mas01mc@292 1734 if(!p || p->t2==IFLAG)
mas01mc@292 1735 return;
mas01mc@292 1736 if(p->t2==t2){ // match
mas01mc@344 1737 sbucket_chain_point(p->snext.ptr, qpos); // add to reporter
mas01mc@292 1738 }
mas01mc@292 1739 if(p->next){
mas01mc@292 1740 bucket_chain_point(p->next, qpos); // recurse
mas01mc@292 1741 }
mas01mc@292 1742 }
mas01mc@292 1743
mas01mc@292 1744 void G::sbucket_chain_point(sbucket* p, Uns32T qpos){
mas01mc@292 1745 add_point_callback(calling_instance, p->pointID, qpos, radius);
mas01mc@292 1746 if(p->snext){
mas01mc@292 1747 sbucket_chain_point(p->snext, qpos);
mas01mc@292 1748 }
mas01mc@292 1749 }
mas01mc@292 1750
mas01mc@292 1751 void G::get_lock(int fd, bool exclusive) {
mas01cr@648 1752 #ifndef WIN32
mas01mc@292 1753 struct flock lock;
mas01mc@292 1754 int status;
mas01mc@292 1755 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01mc@292 1756 lock.l_whence = SEEK_SET;
mas01mc@292 1757 lock.l_start = 0;
mas01mc@292 1758 lock.l_len = 0; /* "the whole file" */
mas01mc@292 1759 retry:
mas01mc@292 1760 do {
mas01mc@292 1761 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1762 } while (status != 0 && errno == EINTR);
mas01mc@292 1763 if (status) {
mas01mc@292 1764 if (errno == EAGAIN) {
mas01mc@292 1765 sleep(1);
mas01mc@292 1766 goto retry;
mas01mc@292 1767 } else {
mas01mc@292 1768 error("fcntl lock error", "", "fcntl");
mas01mc@292 1769 }
mas01mc@292 1770 }
mas01cr@648 1771 #else
mas01cr@648 1772 int status;
mas01cr@648 1773
mas01cr@648 1774 retry:
mas01cr@648 1775 status = _locking(fd, _LK_NBLCK, getpagesize());
mas01cr@648 1776 if(status) {
mas01cr@648 1777 Sleep(1000);
mas01cr@648 1778 goto retry;
mas01cr@648 1779 }
mas01cr@648 1780 #endif
mas01mc@292 1781 }
mas01mc@292 1782
mas01mc@292 1783 void G::release_lock(int fd) {
mas01cr@648 1784 #ifndef WIN32
mas01mc@292 1785 struct flock lock;
mas01mc@292 1786 int status;
mas01mc@292 1787
mas01mc@292 1788 lock.l_type = F_UNLCK;
mas01mc@292 1789 lock.l_whence = SEEK_SET;
mas01mc@292 1790 lock.l_start = 0;
mas01mc@292 1791 lock.l_len = 0;
mas01mc@292 1792
mas01mc@292 1793 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1794
mas01mc@292 1795 if (status)
mas01mc@292 1796 error("fcntl unlock error", "", "fcntl");
mas01cr@648 1797 #else
mas01cr@648 1798 _locking(fd, _LK_UNLCK, getpagesize());
mas01cr@648 1799 #endif
mas01mc@292 1800 }