annotate lshlib.cpp @ 507:e7fd50483311

Free bits of the datum constructed in audioDB::query. We're not quite safe: error calls between allocation of some of these bits and pieces and their use will cause failure... but not freeing things here is definitely wrong.
author mas01cr
date Tue, 13 Jan 2009 21:37:10 +0000
parents f9d86b1db21c
children a30948382f56
rev   line source
mas01mc@292 1 #include "lshlib.h"
mas01mc@292 2
mas01mc@340 3 //#define LSH_DUMP_CORE_TABLES
mas01mc@292 4 //#define USE_U_FUNCTIONS
mas01mc@340 5
mas01mc@340 6 // Use backward-compatible CORE ARRAY lsh index
mas01mc@340 7 #define LSH_CORE_ARRAY // Set to use arrays for hashtables rather than linked-lists
mas01mc@340 8 #define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads
mas01mc@340 9
mas01mc@340 10 // Critical path logic
mas01mc@340 11 #if defined LSH_CORE_ARRAY && !defined LSH_LIST_HEAD_COUNTERS
mas01mc@340 12 #define LSH_LIST_HEAD_COUNTERS
mas01mc@340 13 #endif
mas01mc@340 14
mas01mc@340 15 #define LSH_CORE_ARRAY_BIT (0x80000000) // LSH_CORE_ARRAY test bit for list head
mas01mc@340 16
mas01mc@292 17
mas01mc@292 18 void err(char*s){cout << s << endl;exit(2);}
mas01mc@292 19
mas01mc@292 20 Uns32T get_page_logn(){
mas01mc@292 21 int pagesz = (int)sysconf(_SC_PAGESIZE);
mas01mc@292 22 return (Uns32T)log2((double)pagesz);
mas01mc@292 23 }
mas01mc@292 24
mas01cr@370 25 unsigned align_up(unsigned x, unsigned w) { return (((x) + ((1<<w)-1)) & ~((1<<w)-1)); }
mas01mc@292 26
mas01mc@292 27 void H::error(const char* a, const char* b, const char *sysFunc) {
mas01mc@292 28 cerr << a << ": " << b << endl;
mas01mc@292 29 if (sysFunc) {
mas01mc@292 30 perror(sysFunc);
mas01mc@292 31 }
mas01mc@292 32 exit(1);
mas01mc@292 33 }
mas01mc@292 34
mas01mc@293 35 H::H(){
mas01mc@293 36 // Delay initialization of lsh functions until we know the parameters
mas01mc@293 37 }
mas01mc@293 38
mas01mc@293 39 H::H(Uns32T kk, Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float ww, float rr):
mas01mc@292 40 #ifdef USE_U_FUNCTIONS
mas01mc@292 41 use_u_functions(true),
mas01mc@292 42 #else
mas01mc@292 43 use_u_functions(false),
mas01mc@292 44 #endif
mas01mc@293 45 maxp(0),
mas01mc@292 46 bucketCount(0),
mas01mc@292 47 pointCount(0),
mas01mc@292 48 N(NN),
mas01mc@292 49 C(CC),
mas01mc@292 50 k(kk),
mas01mc@292 51 m(mm),
mas01mc@293 52 L((mm*(mm-1))/2),
mas01mc@293 53 d(dd),
mas01mc@293 54 w(ww),
mas01mc@293 55 radius(rr)
mas01mc@292 56 {
mas01mc@292 57
mas01mc@292 58 if(m<2){
mas01mc@292 59 m=2;
mas01mc@292 60 L=1; // check value of L
mas01mc@292 61 cout << "warning: setting m=2, L=1" << endl;
mas01mc@292 62 }
mas01mc@292 63 if(use_u_functions && k%2){
mas01mc@292 64 k++; // make sure k is even
mas01mc@292 65 cout << "warning: setting k even" << endl;
mas01mc@292 66 }
mas01mc@293 67
mas01mc@293 68 // We have the necessary parameters, so construct hashfunction datastructures
mas01mc@293 69 initialize_lsh_functions();
mas01mc@292 70 }
mas01mc@292 71
mas01mc@293 72 void H::initialize_lsh_functions(){
mas01mc@292 73 H::P = UH_PRIME_DEFAULT;
mas01mc@292 74
mas01mc@292 75 /* FIXME: don't use time(); instead use /dev/random or similar */
mas01mc@292 76 /* FIXME: write out the seed somewhere, so that we can get
mas01mc@292 77 repeatability */
mas01mc@292 78 #ifdef MT19937
mas01mc@292 79 init_genrand(time(NULL));
mas01mc@292 80 #else
mas01mc@292 81 srand(time(NULL)); // seed random number generator
mas01mc@292 82 #endif
mas01mc@293 83 Uns32T i,j, kk;
mas01mc@293 84 #ifdef USE_U_FUNCTIONS
mas01mc@293 85 H::A = new float**[ H::m ]; // m x k x d random projectors
mas01mc@293 86 H::b = new float*[ H::m ]; // m x k random biases
mas01mc@293 87 #else
mas01mc@293 88 H::A = new float**[ H::L ]; // m x k x d random projectors
mas01mc@293 89 H::b = new float*[ H::L ]; // m x k random biases
mas01mc@293 90 #endif
mas01mc@293 91 H::g = new Uns32T*[ H::L ]; // L x k random projections
mas01mc@293 92 assert( H::g && H::A && H::b ); // failure
mas01mc@293 93 #ifdef USE_U_FUNCTIONS
mas01mc@293 94 // Use m \times u_i functions \in R^{(k/2) \times (d)}
mas01mc@293 95 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 96 for( j = 0; j < H::m ; j++ ){ // m functions u_i(v)
mas01mc@293 97 H::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients
mas01mc@293 98 H::b[j] = new float[ H::k/2 ]; // bias
mas01mc@293 99 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 100 for( kk = 0; kk < H::k/2 ; kk++ ){
mas01mc@293 101 H::A[j][kk] = new float[ H::d ];
mas01mc@293 102 assert( H::A[j][kk] ); // failure
mas01mc@293 103 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 104 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 105 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 106 }
mas01mc@293 107 }
mas01mc@293 108 #else
mas01mc@293 109 // Use m \times u_i functions \in R^{k \times (d)}
mas01mc@293 110 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 111 for( j = 0; j < H::L ; j++ ){ // m functions u_i(v)
mas01mc@293 112 H::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients
mas01mc@293 113 H::b[j] = new float[ H::k ]; // bias
mas01mc@293 114 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 115 for( kk = 0; kk < H::k ; kk++ ){
mas01mc@293 116 H::A[j][kk] = new float[ H::d ];
mas01mc@293 117 assert( H::A[j][kk] ); // failure
mas01mc@293 118 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 119 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 120 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 121 }
mas01mc@293 122 }
mas01mc@293 123 #endif
mas01mc@293 124
mas01mc@293 125 // Storage for LSH hash function output (Uns32T)
mas01mc@293 126 for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2)
mas01mc@293 127 H::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z
mas01mc@293 128 assert( H::g[j] );
mas01mc@292 129 }
mas01mc@292 130
mas01mc@293 131 // LSH Hash tables
mas01mc@293 132 H::h = new bucket**[ H::L ];
mas01mc@293 133 assert( H::h );
mas01mc@292 134 for( j = 0 ; j < H::L ; j++ ){
mas01mc@292 135 H::h[j] = new bucket*[ H::N ];
mas01mc@292 136 assert( H::h[j] );
mas01mc@292 137 for( i = 0 ; i < H::N ; i++)
mas01mc@292 138 H::h[j][i] = 0;
mas01mc@292 139 }
mas01mc@293 140
mas01mc@293 141 // Standard hash functions
mas01mc@293 142 H::r1 = new Uns32T*[ H::L ];
mas01mc@293 143 H::r2 = new Uns32T*[ H::L ];
mas01mc@293 144 assert( H::r1 && H::r2 ); // failure
mas01mc@293 145 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 146 H::r1[ j ] = new Uns32T[ H::k ];
mas01mc@293 147 H::r2[ j ] = new Uns32T[ H::k ];
mas01mc@293 148 assert( H::r1[j] && H::r2[j] ); // failure
mas01mc@293 149 for( i = 0; i<H::k; i++){
mas01mc@293 150 H::r1[j][i] = randr();
mas01mc@293 151 H::r2[j][i] = randr();
mas01mc@293 152 }
mas01mc@293 153 }
mas01mc@293 154
mas01mc@293 155 // Storage for whole or partial function evaluation depdenting on USE_U_FUNCTIONS
mas01mc@293 156 H::initialize_partial_functions();
mas01mc@293 157 }
mas01mc@293 158
mas01mc@293 159 void H::initialize_partial_functions(){
mas01mc@293 160
mas01mc@293 161 #ifdef USE_U_FUNCTIONS
mas01mc@293 162 H::uu = vector<vector<Uns32T> >(H::m);
mas01mc@293 163 for( Uns32T aa=0 ; aa < H::m ; aa++ )
mas01mc@293 164 H::uu[aa] = vector<Uns32T>( H::k/2 );
mas01mc@293 165 #else
mas01mc@293 166 H::uu = vector<vector<Uns32T> >(H::L);
mas01mc@293 167 for( Uns32T aa=0 ; aa < H::L ; aa++ )
mas01mc@293 168 H::uu[aa] = vector<Uns32T>( H::k );
mas01mc@293 169 #endif
mas01mc@293 170 }
mas01mc@293 171
mas01mc@293 172
mas01mc@293 173 // Generate z ~ N(0,1)
mas01mc@293 174 float H::randn(){
mas01mc@293 175 // Box-Muller
mas01mc@293 176 float x1, x2;
mas01mc@293 177 do{
mas01mc@293 178 x1 = ranf();
mas01mc@293 179 } while (x1 == 0); // cannot take log of 0
mas01mc@293 180 x2 = ranf();
mas01mc@293 181 float z;
mas01mc@293 182 z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2);
mas01mc@293 183 return z;
mas01mc@293 184 }
mas01mc@293 185
mas01mc@293 186 float H::ranf(){
mas01mc@293 187 #ifdef MT19937
mas01mc@293 188 return (float) genrand_real2();
mas01mc@293 189 #else
mas01mc@293 190 return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
mas01mc@293 191 #endif
mas01mc@293 192 }
mas01mc@293 193
mas01mc@293 194 // range is 1..2^29
mas01mc@293 195 /* FIXME: that looks like an ... odd range. Still. */
mas01mc@293 196 Uns32T H::randr(){
mas01mc@293 197 #ifdef MT19937
mas01mc@293 198 return (Uns32T)((genrand_int32() >> 3) + 1);
mas01mc@293 199 #else
mas01mc@293 200 return (Uns32T) ((rand() >> 2) + 1);
mas01mc@293 201 #endif
mas01mc@292 202 }
mas01mc@292 203
mas01mc@292 204 // Destruct hash tables
mas01mc@292 205 H::~H(){
mas01mc@293 206 Uns32T i,j,kk;
mas01mc@340 207 bucket** pp;
mas01mc@293 208 #ifdef USE_U_FUNCTIONS
mas01mc@293 209 for( j = 0 ; j < H::m ; j++ ){
mas01mc@293 210 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@293 211 delete[] A[j][kk];
mas01mc@293 212 delete[] A[j];
mas01mc@293 213 }
mas01mc@293 214 delete[] A;
mas01mc@293 215 for( j = 0 ; j < H::m ; j++ )
mas01mc@293 216 delete[] b[j];
mas01mc@293 217 delete[] b;
mas01mc@293 218 #else
mas01mc@293 219 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 220 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@293 221 delete[] A[j][kk];
mas01mc@293 222 delete[] A[j];
mas01mc@293 223 }
mas01mc@293 224 delete[] A;
mas01mc@293 225 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 226 delete[] b[j];
mas01mc@293 227 delete[] b;
mas01mc@293 228 #endif
mas01mc@293 229
mas01mc@293 230 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 231 delete[] g[j];
mas01mc@293 232 delete[] g;
mas01mc@292 233 for( j=0 ; j < H::L ; j++ ){
mas01mc@292 234 delete[] H::r1[ j ];
mas01mc@292 235 delete[] H::r2[ j ];
mas01mc@340 236 for(i = 0; i< H::N ; i++){
mas01mc@340 237 pp = 0;
mas01mc@340 238 #ifdef LSH_CORE_ARRAY
mas01mc@340 239 if(H::h[ j ][ i ])
mas01mc@340 240 if(H::h[ j ][ i ]->t2 & LSH_CORE_ARRAY_BIT){
mas01mc@340 241 pp = get_pointer_to_bucket_linked_list(H::h[ j ][ i ]);
mas01mc@340 242 if(*pp){
mas01mc@344 243 (*pp)->snext.ptr=0; // Head of list uses snext as a non-pointer value
mas01mc@340 244 delete *pp; // Now the destructor can do its work properly
mas01mc@340 245 }
mas01mc@340 246 free(H::h[ j ][ i ]->next);
mas01mc@340 247 H::h[ j ][ i ]->next = 0; // Zero next pointer
mas01mc@344 248 H::h[ j ][ i ]->snext.ptr = 0; // Zero head-of-list pointer as above
mas01mc@340 249 }
mas01mc@340 250 #endif
mas01mc@292 251 delete H::h[ j ][ i ];
mas01mc@340 252 }
mas01mc@292 253 delete[] H::h[ j ];
mas01mc@292 254 }
mas01mc@474 255 delete[] H::r1;
mas01mc@474 256 delete[] H::r2;
mas01mc@474 257 delete[] H::h;
mas01mc@292 258 }
mas01mc@292 259
mas01mc@292 260
mas01mc@293 261 // Compute all hash functions for vector v
mas01mc@293 262 // #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d}
mas01mc@293 263 // to make L \times g_j functions \in Z^k
mas01mc@293 264 void H::compute_hash_functions(vector<float>& v){ // v \in R^d
mas01mc@293 265 float iw = 1. / H::w; // hash bucket width
mas01mc@293 266 Uns32T aa, kk;
mas01mc@293 267 if( v.size() != H::d )
mas01mc@293 268 error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality
mas01mc@293 269 double tmp = 0;
mas01mc@293 270 float *pA, *pb;
mas01mc@293 271 Uns32T *pg;
mas01mc@293 272 int dd;
mas01mc@293 273 vector<float>::iterator vi;
mas01mc@293 274 vector<Uns32T>::iterator ui;
mas01mc@293 275
mas01mc@293 276 #ifdef USE_U_FUNCTIONS
mas01mc@293 277 Uns32T bb;
mas01mc@293 278 // Store m dot products to expand
mas01mc@293 279 for( aa=0; aa < H::m ; aa++ ){
mas01mc@293 280 ui = H::uu[aa].begin();
mas01mc@293 281 for( kk = 0 ; kk < H::k/2 ; kk++ ){
mas01mc@293 282 pb = *( H::b + aa ) + kk;
mas01mc@293 283 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 284 dd = H::d;
mas01mc@293 285 tmp = 0.;
mas01mc@293 286 vi = v.begin();
mas01mc@293 287 while( dd-- )
mas01mc@293 288 tmp += *pA++ * *vi++; // project
mas01mc@293 289 tmp += *pb; // translate
mas01mc@293 290 tmp *= iw; // scale
mas01mc@293 291 *ui++ = (Uns32T) floor(tmp); // floor
mas01mc@293 292 }
mas01mc@293 293 }
mas01mc@293 294 // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d}
mas01mc@293 295 Uns32T j;
mas01mc@293 296 for( aa=0, j=0 ; aa < H::m-1 ; aa++ )
mas01mc@293 297 for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){
mas01mc@293 298 pg= *( H::g + j ); // L \times functions g_j(v) \in Z^k
mas01mc@293 299 // u_1 \in Z^{(k/2) \times d}
mas01mc@293 300 ui = H::uu[aa].begin();
mas01mc@293 301 kk=H::k/2;
mas01mc@293 302 while( kk-- )
mas01mc@293 303 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z
mas01mc@293 304 // u_2 \in Z^{(k/2) \times d}
mas01mc@293 305 ui = H::uu[bb].begin();
mas01mc@293 306 kk=H::k/2;
mas01mc@293 307 while( kk--)
mas01mc@293 308 *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z
mas01mc@293 309 }
mas01mc@293 310 #else
mas01mc@293 311 for( aa=0; aa < H::L ; aa++ ){
mas01mc@293 312 ui = H::uu[aa].begin();
mas01mc@293 313 for( kk = 0 ; kk < H::k ; kk++ ){
mas01mc@293 314 pb = *( H::b + aa ) + kk;
mas01mc@293 315 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 316 dd = H::d;
mas01mc@293 317 tmp = 0.;
mas01mc@293 318 vi = v.begin();
mas01mc@293 319 while( dd-- )
mas01mc@293 320 tmp += *pA++ * *vi++; // project
mas01mc@293 321 tmp += *pb; // translate
mas01mc@293 322 tmp *= iw; // scale
mas01mc@293 323 *ui++ = (Uns32T) (floor(tmp)); // floor
mas01mc@293 324 }
mas01mc@293 325 }
mas01mc@293 326 // Compute hash functions
mas01mc@293 327 for( aa=0 ; aa < H::L ; aa++ ){
mas01mc@293 328 pg= *( H::g + aa ); // L \times functions g_j(v) \in Z^k
mas01mc@293 329 // u_1 \in Z^{k \times d}
mas01mc@293 330 ui = H::uu[aa].begin();
mas01mc@293 331 kk=H::k;
mas01mc@293 332 while( kk-- )
mas01mc@293 333 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z
mas01mc@293 334 }
mas01mc@293 335 #endif
mas01mc@293 336 }
mas01mc@293 337
mas01mc@292 338 // make hash value \in Z
mas01mc@293 339 void H::generate_hash_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
mas01mc@293 340 H::t1 = computeProductModDefaultPrime( g, r1, H::k ) % H::N;
mas01mc@293 341 H::t2 = computeProductModDefaultPrime( g, r2, H::k );
mas01mc@292 342 }
mas01mc@292 343
mas01mc@292 344 #define CR_ASSERT(b){if(!(b)){fprintf(stderr, "ASSERT failed on line %d, file %s.\n", __LINE__, __FILE__); exit(1);}}
mas01mc@292 345
mas01mc@292 346 // Computes (a.b) mod UH_PRIME_DEFAULT
mas01mc@293 347 inline Uns32T H::computeProductModDefaultPrime(Uns32T *a, Uns32T *b, IntT size){
mas01mc@292 348 LongUns64T h = 0;
mas01mc@292 349
mas01mc@292 350 for(IntT i = 0; i < size; i++){
mas01mc@292 351 h = h + (LongUns64T)a[i] * (LongUns64T)b[i];
mas01mc@292 352 h = (h & TWO_TO_32_MINUS_1) + 5 * (h >> 32);
mas01mc@292 353 if (h >= UH_PRIME_DEFAULT) {
mas01mc@292 354 h = h - UH_PRIME_DEFAULT;
mas01mc@292 355 }
mas01mc@292 356 CR_ASSERT(h < UH_PRIME_DEFAULT);
mas01mc@292 357 }
mas01mc@292 358 return h;
mas01mc@292 359 }
mas01mc@292 360
mas01mc@292 361 Uns32T H::bucket_insert_point(bucket **pp){
mas01mc@296 362 collisionCount = 0;
mas01mc@340 363 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 364 if(!*pp){
mas01mc@292 365 *pp = new bucket();
mas01mc@292 366 (*pp)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@292 367 (*pp)->next = new bucket();
mas01mc@292 368 }
mas01mc@340 369 // The list head holds point collision count
mas01mc@340 370 if( (*pp)->t2 & LSH_CORE_ARRAY_BIT )
mas01mc@340 371 bucket_insert_point(get_pointer_to_bucket_linked_list(*pp)); // recurse
mas01mc@340 372 else{
mas01mc@340 373 collisionCount = (*pp)->t2;
mas01mc@340 374 if(collisionCount < H::C){ // Block if row is full
mas01mc@340 375 (*pp)->t2++; // Increment collision counter (numPoints in row)
mas01mc@340 376 pointCount++;
mas01mc@340 377 collisionCount++;
mas01mc@340 378 // Locate the bucket linked list
mas01mc@340 379 __bucket_insert_point( (*pp)->next );
mas01mc@340 380 }
mas01mc@292 381 }
mas01mc@340 382 #else // NOT USING LSH_LIST_HEAD_COUNTERS
mas01mc@340 383 if(!*pp)
mas01mc@340 384 *pp = new bucket();
mas01mc@296 385 pointCount++;
mas01mc@296 386 __bucket_insert_point(*pp); // No collision count storage
mas01mc@292 387 #endif
mas01mc@292 388 return collisionCount;
mas01mc@292 389 }
mas01mc@292 390
mas01mc@340 391 // insert points into hashtable row collision chain
mas01mc@292 392 void H::__bucket_insert_point(bucket* p){
mas01mc@292 393 if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
mas01mc@292 394 p->t2 = H::t2;
mas01mc@292 395 bucketCount++; // Record start of new point-locale collision chain
mas01mc@344 396 p->snext.ptr = new sbucket();
mas01mc@344 397 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 398 return;
mas01mc@292 399 }
mas01mc@292 400
mas01mc@292 401 if(p->t2 == H::t2){
mas01mc@344 402 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 403 return;
mas01mc@292 404 }
mas01mc@292 405
mas01mc@292 406 if(p->next){
mas01mc@340 407 // Construct list in t2 order
mas01mc@340 408 if(H::t2 < p->next->t2){
mas01mc@340 409 bucket* tmp = new bucket();
mas01mc@340 410 tmp->next = p->next;
mas01mc@340 411 p->next = tmp;
mas01mc@340 412 __bucket_insert_point(tmp);
mas01mc@340 413 }
mas01mc@340 414 else
mas01mc@340 415 __bucket_insert_point(p->next);
mas01mc@292 416 }
mas01mc@340 417 else {
mas01mc@292 418 p->next = new bucket();
mas01mc@292 419 __bucket_insert_point(p->next);
mas01mc@292 420 }
mas01mc@292 421 }
mas01mc@292 422
mas01mc@340 423 // insert points into point-locale collision chain
mas01mc@292 424 void H::__sbucket_insert_point(sbucket* p){
mas01mc@292 425 if(p->pointID==IFLAG){
mas01mc@292 426 p->pointID = H::p;
mas01mc@292 427 return;
mas01mc@292 428 }
mas01mc@340 429
mas01mc@292 430 // Search for pointID
mas01mc@340 431 if(p->snext){
mas01mc@292 432 __sbucket_insert_point(p->snext);
mas01mc@292 433 }
mas01mc@292 434 else{
mas01mc@340 435 // Make new point collision bucket at end of list
mas01mc@340 436 p->snext = new sbucket();
mas01mc@340 437 __sbucket_insert_point(p->snext);
mas01mc@292 438 }
mas01mc@292 439 }
mas01mc@292 440
mas01mc@293 441 inline bucket** H::get_bucket(int j){
mas01mc@292 442 return *(h+j);
mas01mc@292 443 }
mas01mc@292 444
mas01mc@340 445 // Find the linked-list pointer at the end of the CORE_ARRAY
mas01mc@340 446 bucket** H::get_pointer_to_bucket_linked_list(bucket* rowPtr){
mas01mc@344 447 Uns32T numBuckets = rowPtr->snext.numBuckets; // Cast pointer to unsigned int
mas01mc@343 448 Uns32T numPoints = rowPtr->t2 & 0x7FFFFFFF; // Value is stored in low 31 bits of t2 field
mas01mc@343 449 bucket** listPtr = reinterpret_cast<bucket**> (reinterpret_cast<unsigned int*>(rowPtr->next)+numPoints+numBuckets+1);
mas01mc@343 450 return listPtr;
mas01mc@340 451 }
mas01mc@340 452
mas01mc@293 453 // Interface to Locality Sensitive Hashing G
mas01mc@293 454 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
mas01mc@293 455 H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
mas01mc@308 456 indexName(0),
mas01mc@293 457 lshHeader(0),
mas01mc@292 458 calling_instance(0),
mas01mc@293 459 add_point_callback(0)
mas01mc@292 460 {
mas01mc@293 461
mas01mc@292 462 }
mas01mc@292 463
mas01mc@292 464 // Serialize from file LSH constructor
mas01mc@292 465 // Read parameters from database file
mas01mc@292 466 // Load the hash functions, close the database
mas01mc@292 467 // Optionally load the LSH tables into head-allocated lists in core
mas01mc@292 468 G::G(char* filename, bool lshInCoreFlag):
mas01mc@293 469 H(), // default base-class constructor call delays data-structure initialization
mas01mc@309 470 indexName(0),
mas01mc@293 471 lshHeader(0),
mas01mc@292 472 calling_instance(0),
mas01mc@292 473 add_point_callback(0)
mas01mc@292 474 {
mas01mc@474 475 FILE* dbFile = 0;
mas01mc@292 476 int dbfid = unserialize_lsh_header(filename);
mas01mc@309 477 indexName = new char[O2_INDEX_MAXSTR];
mas01mc@309 478 strncpy(indexName, filename, O2_INDEX_MAXSTR); // COPY THE CONTENTS TO THE NEW POINTER
mas01mc@293 479 H::initialize_lsh_functions(); // Base-class data-structure initialization
mas01mc@293 480 unserialize_lsh_functions(dbfid); // populate with on-disk hashfunction values
mas01mc@292 481
mas01mc@292 482 // Format1 only needs unserializing if specifically requested
mas01mc@292 483 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){
mas01mc@292 484 unserialize_lsh_hashtables_format1(dbfid);
mas01mc@292 485 }
mas01mc@292 486
mas01mc@292 487 // Format2 always needs unserializing
mas01mc@292 488 if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){
mas01mc@474 489 dbFile = fdopen(dbfid, "rb");
mas01mc@336 490 if(!dbFile)
mas01mc@336 491 error("Cannot open LSH file for reading", filename);
mas01mc@336 492 unserialize_lsh_hashtables_format2(dbFile);
mas01mc@292 493 }
mas01mc@336 494 serial_close(dbfid);
mas01mc@474 495 if(dbFile){
mas01mc@474 496 fclose(dbFile);
mas01mc@474 497 dbFile = 0;
mas01mc@474 498 }
mas01mc@336 499 }
mas01mc@292 500
mas01mc@292 501 G::~G(){
mas01mc@292 502 delete lshHeader;
mas01mc@474 503 delete[] indexName;
mas01mc@292 504 }
mas01mc@292 505
mas01mc@292 506 // single point insertion; inserted values are hash value and pointID
mas01mc@292 507 Uns32T G::insert_point(vector<float>& v, Uns32T pp){
mas01mc@292 508 Uns32T collisionCount = 0;
mas01mc@292 509 H::p = pp;
mas01mc@299 510 if(H::maxp && pp<=H::maxp)
mas01mc@296 511 error("points must be indexed in strict ascending order", "LSH::insert_point(vector<float>&, Uns32T pointID)");
mas01mc@296 512 H::maxp=pp; // Store highest pointID in database
mas01mc@293 513 H::compute_hash_functions( v );
mas01mc@292 514 for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion
mas01mc@293 515 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@292 516 collisionCount += bucket_insert_point( *(h + j) + t1 );
mas01mc@292 517 }
mas01mc@292 518 return collisionCount;
mas01mc@292 519 }
mas01mc@292 520
mas01mc@292 521
mas01mc@292 522 // batch insert for a point set
mas01mc@292 523 // inserted values are vector hash value and pointID starting at basePointID
mas01mc@292 524 void G::insert_point_set(vector<vector<float> >& vv, Uns32T basePointID){
mas01mc@292 525 for(Uns32T point=0; point<vv.size(); point++)
mas01mc@292 526 insert_point(vv[point], basePointID+point);
mas01mc@292 527 }
mas01mc@292 528
mas01mc@292 529 // point retrieval routine
mas01mc@292 530 void G::retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 531 calling_instance = caller;
mas01mc@292 532 add_point_callback = add_point;
mas01mc@293 533 H::compute_hash_functions( v );
mas01mc@292 534 for(Uns32T j = 0 ; j < H::L ; j++ ){
mas01mc@293 535 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01cr@370 536 if( bucket* bPtr = *(get_bucket(j) + get_t1()) ) {
mas01mc@340 537 #ifdef LSH_LIST_HEAD_COUNTERS
mas01cr@370 538 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@340 539 retrieve_from_core_hashtable_array((Uns32T*)(bPtr->next), qpos);
mas01cr@370 540 } else {
mas01mc@340 541 bucket_chain_point( bPtr->next, qpos);
mas01cr@370 542 }
mas01mc@292 543 #else
mas01cr@370 544 bucket_chain_point( bPtr , qpos);
mas01mc@292 545 #endif
mas01cr@370 546 }
mas01mc@292 547 }
mas01mc@292 548 }
mas01mc@292 549
mas01mc@292 550 void G::retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 551 for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ )
mas01mc@292 552 retrieve_point(vv[qpos], qpos, add_point, caller);
mas01mc@292 553 }
mas01mc@292 554
mas01mc@292 555 // export lsh tables to table structure on disk
mas01mc@292 556 //
mas01mc@292 557 // LSH TABLE STRUCTURE
mas01mc@292 558 // ---header 64 bytes ---
mas01mc@292 559 // [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0]
mas01mc@292 560 //
mas01mc@292 561 // ---random projections L x k x d float ---
mas01mc@292 562 // A[0][0][0] A[0][0][1] ... A[0][0][d-1]
mas01mc@292 563 // A[0][1][0] A[0][1][1] ... A[1][1][d-1]
mas01mc@292 564 // ...
mas01mc@292 565 // A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1]
mas01mc@292 566 // ...
mas01mc@292 567 // ...
mas01mc@292 568 // A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1]
mas01mc@292 569 // A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1]
mas01mc@292 570 // ...
mas01mc@292 571 // A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1]
mas01mc@292 572 //
mas01mc@292 573 // ---bias L x k float ---
mas01mc@292 574 // b[0][0] b[0][1] ... b[0][k-1]
mas01mc@292 575 // b[1][0] b[1][1] ... b[1][k-1]
mas01mc@292 576 // ...
mas01mc@292 577 // b[L-1][0] b[L-1][1] ... b[L-1][k-1]
mas01mc@292 578 //
mas01mc@292 579 // ---random r1 L x k float ---
mas01mc@292 580 // r1[0][0] r1[0][1] ... r1[0][k-1]
mas01mc@292 581 // r1[1][0] r1[1][1] ... r1[1][k-1]
mas01mc@292 582 // ...
mas01mc@292 583 // r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1]
mas01mc@292 584 //
mas01mc@292 585 // ---random r2 L x k float ---
mas01mc@292 586 // r2[0][0] r2[0][1] ... r2[0][k-1]
mas01mc@292 587 // r2[1][0] r2[1][1] ... r2[1][k-1]
mas01mc@292 588 // ...
mas01mc@292 589 // r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1]
mas01mc@292 590 //
mas01mc@293 591 // ******* HASHTABLES FORMAT1 (optimized for LSH_ON_DISK retrieval) *******
mas01mc@292 592 // ---hash table 0: N x C x 8 ---
mas01mc@292 593 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 594 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 595 // ...
mas01mc@292 596 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 597 //
mas01mc@292 598 // ---hash table 1: N x C x 8 ---
mas01mc@292 599 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 600 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 601 // ...
mas01mc@292 602 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 603 //
mas01mc@292 604 // ...
mas01mc@292 605 //
mas01mc@292 606 // ---hash table L-1: N x C x 8 ---
mas01mc@292 607 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 608 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 609 // ...
mas01mc@292 610 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 611 //
mas01mc@293 612 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) *******
mas01mc@293 613 //
mas01mc@293 614 // State machine controlled by regular expression.
mas01mc@293 615 // legend:
mas01mc@293 616 //
mas01mc@306 617 // O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU
mas01mc@306 618 // O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU
mas01mc@306 619 // O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU
mas01mc@293 620 //
mas01mc@306 621 // T1 - T1 hash token
mas01mc@306 622 // t1 - t1 hash key (t1 range 0..2^29-1)
mas01mc@306 623 // T2 - T2 token
mas01mc@293 624 // t2 - t2 hash key (range 1..2^32-6)
mas01mc@293 625 // p - point identifier (range 0..2^32-1)
mas01mc@306 626 // E - end hash table token
mas01mc@293 627 // {...} required arguments
mas01mc@293 628 // [...] optional arguments
mas01mc@293 629 // * - match zero or more occurences
mas01mc@293 630 // + - match one or more occurences
mas01mc@293 631 // {...}^L - repeat argument L times
mas01mc@293 632 //
mas01mc@293 633 // FORMAT2 Regular expression:
mas01mc@306 634 // { [T1 t1 [T2 t2 p+]+ ]* E }^L
mas01mc@293 635 //
mas01mc@292 636
mas01mc@292 637 // Serial header constructors
mas01mc@292 638 SerialHeader::SerialHeader(){;}
mas01mc@296 639 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc):
mas01mc@292 640 lshMagic(O2_SERIAL_MAGIC),
mas01mc@292 641 binWidth(W),
mas01mc@292 642 numTables(L),
mas01mc@292 643 numRows(N),
mas01mc@292 644 numCols(C),
mas01mc@292 645 elementSize(O2_SERIAL_ELEMENT_SIZE),
mas01mc@296 646 version(O2_SERIAL_VERSION),
mas01mc@296 647 size(0), // we are deprecating this value
mas01mc@292 648 flags(FMT),
mas01mc@292 649 dataDim(d),
mas01mc@292 650 numFuns(k),
mas01mc@292 651 radius(r),
mas01mc@296 652 maxp(p),
mas01mc@296 653 size_long((unsigned long long)L * align_up((unsigned long long)N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables
mas01mc@296 654 + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions
mas01mc@296 655 (unsigned long long)L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())),
mas01mc@296 656 pointCount(pc){
mas01mc@296 657
mas01mc@296 658 if(FMT==O2_SERIAL_FILEFORMAT2)
mas01mc@296 659 size_long = (unsigned long long)align_up(O2_SERIAL_HEADER_SIZE
mas01mc@296 660 + (unsigned long long)L*k*(sizeof(float)*d+2+sizeof(Uns32T)
mas01mc@296 661 +sizeof(float)) + (unsigned long long)pc*16UL,get_page_logn());
mas01mc@296 662 } // header
mas01mc@292 663
mas01mc@292 664 float* G::get_serial_hashfunction_base(char* db){
mas01mc@292 665 if(db&&lshHeader)
mas01mc@292 666 return (float*)(db+O2_SERIAL_HEADER_SIZE);
mas01mc@292 667 else return NULL;
mas01mc@292 668 }
mas01mc@292 669
mas01mc@292 670 SerialElementT* G::get_serial_hashtable_base(char* db){
mas01mc@292 671 if(db&&lshHeader)
mas01mc@292 672 return (SerialElementT*)(db+get_serial_hashtable_offset());
mas01mc@292 673 else
mas01mc@292 674 return NULL;
mas01mc@292 675 }
mas01mc@292 676
mas01mc@292 677 Uns32T G::get_serial_hashtable_offset(){
mas01mc@292 678 if(lshHeader)
mas01mc@292 679 return align_up(O2_SERIAL_HEADER_SIZE +
mas01mc@292 680 L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn());
mas01mc@292 681 else
mas01mc@292 682 return 0;
mas01mc@292 683 }
mas01mc@292 684
mas01mc@292 685 void G::serialize(char* filename, Uns32T serialFormat){
mas01mc@292 686 int dbfid;
mas01mc@292 687 char* db;
mas01mc@292 688 int dbIsNew=0;
mas01mc@474 689 FILE* dbFile = 0;
mas01mc@292 690 // Check requested serialFormat
mas01mc@292 691 if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2))
mas01mc@292 692 error("Unrecognized serial file format request: ", "serialize()");
mas01mc@296 693
mas01mc@292 694 // Test to see if file exists
mas01cr@370 695 if((dbfid = open (filename, O_RDONLY)) < 0) {
mas01mc@292 696 // If it doesn't, then create the file (CREATE)
mas01cr@370 697 if(errno == ENOENT) {
mas01mc@292 698 // Create the file
mas01mc@292 699 std::cout << "Creating new serialized LSH database:" << filename << "...";
mas01mc@292 700 std::cout.flush();
mas01mc@292 701 serial_create(filename, serialFormat);
mas01mc@292 702 dbIsNew=1;
mas01cr@370 703 } else {
mas01mc@292 704 // The file can't be opened
mas01mc@292 705 error("Can't open the file", filename, "open");
mas01cr@370 706 }
mas01cr@370 707 }
mas01mc@292 708
mas01mc@292 709 // Load the on-disk header into core
mas01mc@292 710 dbfid = serial_open(filename, 1); // open for write
mas01mc@292 711 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@292 712 serial_get_header(db); // read header
mas01mc@292 713 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 714
mas01mc@292 715 // Check compatibility of core and disk data structures
mas01mc@292 716 if( !serial_can_merge(serialFormat) )
mas01mc@292 717 error("Incompatible core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 718
mas01mc@292 719 // For new LSH databases write the hashfunctions
mas01mc@292 720 if(dbIsNew)
mas01mc@292 721 serialize_lsh_hashfunctions(dbfid);
mas01mc@292 722 // Write the hashtables in the requested format
mas01mc@292 723 if(serialFormat == O2_SERIAL_FILEFORMAT1)
mas01mc@292 724 serialize_lsh_hashtables_format1(dbfid, !dbIsNew);
mas01mc@336 725 else{
mas01mc@474 726 dbFile = fdopen(dbfid, "r+b");
mas01mc@336 727 if(!dbFile)
mas01mc@336 728 error("Cannot open LSH file for writing",filename);
mas01mc@336 729 serialize_lsh_hashtables_format2(dbFile, !dbIsNew);
mas01mc@336 730 fflush(dbFile);
mas01mc@336 731 }
mas01mc@292 732
mas01mc@336 733 if(!dbIsNew) {
mas01mc@292 734 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@292 735 //serial_get_header(db); // read header
mas01mc@293 736 cout << "maxp = " << H::maxp << endl;
mas01mc@293 737 lshHeader->maxp=H::maxp;
mas01mc@292 738 // Default to FILEFORMAT1
mas01mc@292 739 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2))
mas01mc@294 740 lshHeader->flags|=O2_SERIAL_FILEFORMAT1;
mas01mc@292 741 memcpy((char*)db, (char*)lshHeader, sizeof(SerialHeaderT));
mas01mc@292 742 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@336 743 }
mas01mc@336 744 serial_close(dbfid);
mas01mc@474 745 if(dbFile){
mas01mc@474 746 fclose(dbFile);
mas01mc@474 747 dbFile = 0;
mas01mc@474 748 }
mas01mc@292 749 }
mas01mc@292 750
mas01mc@292 751 // Test to see if core structure and requested format is
mas01mc@292 752 // compatible with currently opened database
mas01mc@292 753 int G::serial_can_merge(Uns32T format){
mas01mc@292 754 SerialHeaderT* that = lshHeader;
mas01mc@292 755 if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 756 || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 757 || !( this->w == that->binWidth &&
mas01mc@296 758 this->L == that->numTables &&
mas01mc@296 759 this->N == that->numRows &&
mas01mc@296 760 this->k == that->numFuns &&
mas01mc@296 761 this->d == that->dataDim &&
mas01mc@296 762 sizeof(SerialElementT) == that->elementSize &&
mas01mc@296 763 this->radius == that->radius)){
mas01mc@292 764 serial_print_header(format);
mas01mc@292 765 return 0;
mas01mc@292 766 }
mas01mc@292 767 else
mas01mc@292 768 return 1;
mas01mc@292 769 }
mas01mc@292 770
mas01mc@292 771 // Used as an error message for serial_can_merge()
mas01mc@292 772 void G::serial_print_header(Uns32T format){
mas01mc@292 773 std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl;
mas01mc@292 774 std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl;
mas01mc@292 775 std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl;
mas01mc@292 776 std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl;
mas01mc@292 777 std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl;
mas01mc@292 778 std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl;
mas01mc@292 779 std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl;
mas01mc@292 780 std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl;
mas01mc@292 781 }
mas01mc@292 782
mas01mc@292 783 int G::serialize_lsh_hashfunctions(int fid){
mas01mc@292 784 float* pf;
mas01mc@292 785 Uns32T *pu;
mas01mc@292 786 Uns32T x,y,z;
mas01mc@292 787
mas01mc@293 788 char* db = serial_mmap(fid, get_serial_hashtable_offset(), 1);// get database pointer
mas01mc@292 789 pf = get_serial_hashfunction_base(db);
mas01mc@292 790
mas01mc@292 791 // HASH FUNCTIONS
mas01mc@292 792 // Write the random projectors A[][][]
mas01mc@292 793 #ifdef USE_U_FUNCTIONS
mas01mc@292 794 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 795 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 796 #else
mas01mc@292 797 for( x = 0 ; x < H::L ; x++ )
mas01mc@292 798 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 799 #endif
mas01mc@292 800 for( z = 0 ; z < d ; z++ )
mas01mc@293 801 *pf++ = H::A[x][y][z];
mas01mc@292 802
mas01mc@292 803 // Write the random biases b[][]
mas01mc@292 804 #ifdef USE_U_FUNCTIONS
mas01mc@292 805 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 806 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 807 #else
mas01mc@292 808 for( x = 0 ; x < H::L ; x++ )
mas01mc@292 809 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 810 #endif
mas01mc@293 811 *pf++ = H::b[x][y];
mas01mc@292 812
mas01mc@292 813 pu = (Uns32T*)pf;
mas01mc@292 814
mas01mc@292 815 // Write the Z projectors r1[][]
mas01mc@292 816 for( x = 0 ; x < H::L ; x++)
mas01mc@292 817 for( y = 0 ; y < H::k ; y++)
mas01mc@293 818 *pu++ = H::r1[x][y];
mas01mc@292 819
mas01mc@292 820 // Write the Z projectors r2[][]
mas01mc@292 821 for( x = 0 ; x < H::L ; x++)
mas01mc@292 822 for( y = 0; y < H::k ; y++)
mas01mc@293 823 *pu++ = H::r2[x][y];
mas01mc@292 824
mas01mc@292 825 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 826 return 1;
mas01mc@292 827 }
mas01mc@292 828
mas01mc@292 829 int G::serialize_lsh_hashtables_format1(int fid, int merge){
mas01mc@292 830 SerialElementT *pe, *pt;
mas01mc@292 831 Uns32T x,y;
mas01mc@292 832
mas01mc@292 833 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) )
mas01mc@292 834 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 835
mas01mc@292 836 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 837 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount;
mas01mc@292 838 // Write the hash tables
mas01mc@292 839 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 840 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1...";
mas01mc@292 841 std::cout.flush();
mas01mc@292 842 // memory map a single hash table for sequential access
mas01mc@292 843 // Align each hash table to page boundary
mas01mc@292 844 char* dbtable = serial_mmap(fid, hashTableSize, 1,
mas01mc@292 845 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@324 846 #ifdef __CYGWIN__
mas01mc@324 847 // No madvise in CYGWIN
mas01mc@324 848 #else
mas01mc@292 849 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 850 error("could not advise hashtable memory","","madvise");
mas01mc@324 851 #endif
mas01mc@292 852 maxColCount=0;
mas01mc@292 853 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 854 meanColCount=0;
mas01mc@292 855 colCountN=0;
mas01mc@292 856 pt=(SerialElementT*)dbtable;
mas01mc@292 857 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 858 // Move disk pointer to beginning of row
mas01mc@292 859 pe=pt+y*lshHeader->numCols;
mas01mc@292 860
mas01mc@292 861 colCount=0;
mas01cr@370 862 if(bucket* bPtr = h[x][y]) {
mas01cr@370 863 if(merge) {
mas01mc@340 864 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 865 serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01cr@370 866 } else {
mas01mc@292 867 serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 868 #else
mas01cr@370 869 serial_merge_hashtable_row_format1(pe, bPtr, colCount);
mas01cr@370 870 } else {
mas01cr@370 871 serial_write_hashtable_row_format1(pe, bPtr, colCount);
mas01mc@292 872 #endif
mas01cr@370 873 }
mas01cr@370 874 }
mas01mc@292 875 if(colCount){
mas01mc@292 876 if(colCount<minColCount)
mas01mc@292 877 minColCount=colCount;
mas01mc@292 878 if(colCount>maxColCount)
mas01mc@292 879 maxColCount=colCount;
mas01mc@292 880 meanColCount+=colCount;
mas01mc@292 881 colCountN++;
mas01mc@292 882 }
mas01mc@292 883 }
mas01mc@292 884 if(colCountN)
mas01mc@292 885 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 886 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 887 << endl;
mas01mc@292 888 serial_munmap(dbtable, hashTableSize);
mas01mc@292 889 }
mas01mc@292 890
mas01mc@292 891 // We're done writing
mas01mc@292 892 return 1;
mas01mc@292 893 }
mas01mc@292 894
mas01mc@292 895 void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){
mas01mc@292 896 while(b && b->t2!=IFLAG){
mas01mc@292 897 SerialElementT*pe=pr; // reset disk pointer to beginning of row
mas01mc@344 898 serial_merge_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 899 b=b->next;
mas01mc@292 900 }
mas01mc@292 901 }
mas01mc@292 902
mas01mc@292 903 void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 904 while(sb){
mas01mc@292 905 if(colCount==lshHeader->numCols){
mas01mc@292 906 std::cout << "!point-chain full " << endl;
mas01mc@292 907 return;
mas01mc@292 908 }
mas01mc@292 909 Uns32T c=0;
mas01mc@292 910 // Merge collision chains
mas01mc@292 911 while(c<lshHeader->numCols){
mas01mc@292 912 if( (pe+c)->hashValue==IFLAG){
mas01mc@292 913 (pe+c)->hashValue=t2;
mas01mc@292 914 (pe+c)->pointID=sb->pointID;
mas01mc@292 915 colCount=c+1;
mas01mc@292 916 if(c+1<lshHeader->numCols)
mas01mc@292 917 (pe+c+1)->hashValue=IFLAG;
mas01mc@292 918 break;
mas01mc@292 919 }
mas01mc@292 920 c++;
mas01mc@292 921 }
mas01mc@292 922 sb=sb->snext;
mas01mc@292 923 }
mas01mc@292 924 return;
mas01mc@292 925 }
mas01mc@292 926
mas01mc@292 927 void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){
mas01mc@292 928 pe->hashValue=IFLAG;
mas01mc@292 929 while(b && b->t2!=IFLAG){
mas01mc@344 930 serial_write_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 931 b=b->next;
mas01mc@292 932 }
mas01mc@292 933 }
mas01mc@292 934
mas01mc@292 935 void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 936 while(sb){
mas01mc@292 937 if(colCount==lshHeader->numCols){
mas01mc@292 938 std::cout << "!point-chain full " << endl;
mas01mc@292 939 return;
mas01mc@292 940 }
mas01mc@292 941 pe->hashValue=t2;
mas01mc@292 942 pe->pointID=sb->pointID;
mas01mc@292 943 pe++;
mas01mc@292 944 colCount++;
mas01mc@292 945 sb=sb->snext;
mas01mc@292 946 }
mas01mc@292 947 pe->hashValue=IFLAG;
mas01mc@292 948 return;
mas01mc@292 949 }
mas01mc@292 950
mas01mc@336 951 int G::serialize_lsh_hashtables_format2(FILE* dbFile, int merge){
mas01mc@292 952 Uns32T x,y;
mas01mc@292 953
mas01mc@292 954 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
mas01mc@292 955 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 956
mas01mc@340 957 // We must pereform FORMAT2 merges in core FORMAT1 (dynamic list structure)
mas01mc@292 958 if(merge)
mas01mc@340 959 unserialize_lsh_hashtables_format2(dbFile, merge);
mas01mc@292 960
mas01mc@292 961 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
mas01mc@336 962 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 963 fclose(dbFile);
mas01mc@336 964 error("fSeek error in serialize_lsh_hashtables_format2");
mas01mc@336 965 }
mas01mc@292 966
mas01mc@292 967 // Write the hash tables
mas01mc@292 968 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 969 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2...";
mas01mc@292 970 std::cout.flush();
mas01mc@292 971 maxColCount=0;
mas01mc@292 972 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 973 meanColCount=0;
mas01mc@292 974 colCountN=0;
mas01mc@292 975 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 976 colCount=0;
mas01mc@292 977 if(bucket* bPtr = h[x][y]){
mas01mc@306 978 // Check for empty row (even though row was allocated)
mas01mc@340 979 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@306 980 if(bPtr->next->t2==IFLAG){
mas01mc@336 981 fclose(dbFile);
mas01mc@306 982 error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 983 }
mas01mc@306 984 #else
mas01mc@306 985 if(bPtr->t2==IFLAG){
mas01mc@336 986 fclose(dbFile);
mas01mc@306 987 error("b->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 988 }
mas01mc@306 989 #endif
mas01mc@306 990 t1 = O2_SERIAL_TOKEN_T1;
mas01mc@340 991 WRITE_UNS32(&t1, "[T1]");
mas01mc@306 992 t1 = y;
mas01mc@340 993 WRITE_UNS32(&t1, "[t1]");
mas01mc@340 994 #ifdef LSH_CORE_ARRAY
mas01mc@340 995 t1 = count_buckets_and_points_hashtable_row(bPtr);
mas01mc@340 996 WRITE_UNS32(&t1,"[count]"); // write numElements
mas01mc@340 997 #endif
mas01mc@340 998 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@336 999 serial_write_hashtable_row_format2(dbFile, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 1000 #else
mas01mc@340 1001 serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
mas01mc@292 1002 #endif
mas01mc@292 1003 }
mas01mc@292 1004 if(colCount){
mas01mc@292 1005 if(colCount<minColCount)
mas01mc@292 1006 minColCount=colCount;
mas01mc@292 1007 if(colCount>maxColCount)
mas01mc@292 1008 maxColCount=colCount;
mas01mc@292 1009 meanColCount+=colCount;
mas01mc@292 1010 colCountN++;
mas01mc@292 1011 }
mas01mc@292 1012 }
mas01mc@292 1013 // Write END of table marker
mas01mc@306 1014 t1 = O2_SERIAL_TOKEN_ENDTABLE;
mas01mc@340 1015 WRITE_UNS32(&t1,"[end]");
mas01mc@292 1016 if(colCountN)
mas01mc@292 1017 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 1018 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 1019 << endl;
mas01mc@340 1020 }
mas01mc@292 1021 // We're done writing
mas01mc@292 1022 return 1;
mas01mc@292 1023 }
mas01mc@292 1024
mas01mc@340 1025 Uns32T G::count_buckets_and_points_hashtable_row(bucket* bPtr){
mas01mc@340 1026 Uns32T total_count = 0;
mas01mc@340 1027 bucket* p = 0;
mas01mc@340 1028
mas01mc@340 1029 // count points
mas01mc@340 1030 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1031 total_count = bPtr->t2; // points already counted
mas01mc@340 1032 p = bPtr->next;
mas01mc@340 1033 #else
mas01mc@340 1034 total_count = count_points_hashtable_row(bPtr);
mas01mc@340 1035 p = bPtr;
mas01mc@340 1036 #endif
mas01mc@340 1037
mas01mc@340 1038 // count buckets
mas01mc@340 1039 do{
mas01mc@340 1040 total_count++;
mas01mc@340 1041 }while((p=p->next));
mas01mc@340 1042
mas01mc@340 1043 return total_count;
mas01mc@340 1044 }
mas01mc@340 1045
mas01mc@340 1046 Uns32T G::count_points_hashtable_row(bucket* bPtr){
mas01mc@340 1047 Uns32T point_count = 0;
mas01mc@340 1048 bucket* p = bPtr;
mas01mc@340 1049 sbucket* s = 0;
mas01mc@340 1050 while(p){
mas01mc@344 1051 s = p->snext.ptr;
mas01mc@340 1052 while(s){
mas01mc@340 1053 point_count++;
mas01mc@340 1054 s=s->snext;
mas01mc@340 1055 }
mas01mc@340 1056 p=p->next;
mas01mc@340 1057 }
mas01mc@340 1058 return point_count;
mas01mc@340 1059 }
mas01mc@340 1060
mas01mc@336 1061 void G::serial_write_hashtable_row_format2(FILE* dbFile, bucket* b, Uns32T& colCount){
mas01mc@292 1062 while(b && b->t2!=IFLAG){
mas01mc@344 1063 if(!b->snext.ptr){
mas01mc@336 1064 fclose(dbFile);
mas01mc@306 1065 error("Empty collision chain in serial_write_hashtable_row_format2()");
mas01mc@306 1066 }
mas01mc@306 1067 t2 = O2_SERIAL_TOKEN_T2;
mas01mc@336 1068 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1069 fclose(dbFile);
mas01mc@292 1070 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1071 }
mas01mc@292 1072 t2 = b->t2;
mas01mc@336 1073 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1074 fclose(dbFile);
mas01mc@292 1075 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1076 }
mas01mc@344 1077 serial_write_element_format2(dbFile, b->snext.ptr, colCount);
mas01mc@292 1078 b=b->next;
mas01mc@292 1079 }
mas01mc@292 1080 }
mas01mc@292 1081
mas01mc@336 1082 void G::serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount){
mas01mc@292 1083 while(sb){
mas01mc@336 1084 if(fwrite(&sb->pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1085 fclose(dbFile);
mas01mc@292 1086 error("Write error in serial_write_element_format2()");
mas01mc@292 1087 }
mas01mc@292 1088 colCount++;
mas01mc@292 1089 sb=sb->snext;
mas01mc@292 1090 }
mas01mc@292 1091 }
mas01mc@292 1092
mas01mc@292 1093
mas01mc@292 1094 int G::serial_create(char* filename, Uns32T FMT){
mas01mc@292 1095 return serial_create(filename, w, L, N, C, k, d, FMT);
mas01mc@292 1096 }
mas01mc@292 1097
mas01mc@292 1098
mas01mc@292 1099 int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols,
mas01mc@292 1100 Uns32T numFuns, Uns32T dim, Uns32T FMT){
mas01mc@292 1101
mas01mc@292 1102 if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS
mas01mc@292 1103 || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS
mas01mc@292 1104 || dim>O2_SERIAL_MAX_DIM){
mas01mc@292 1105 error("LSH parameters out of bounds for serialization");
mas01mc@292 1106 }
mas01mc@292 1107
mas01mc@292 1108 int dbfid;
mas01mc@292 1109 if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01mc@292 1110 error("Can't create serial file", filename, "open");
mas01mc@292 1111 get_lock(dbfid, 1);
mas01mc@292 1112
mas01mc@292 1113 // Make header first to get size of serialized database
mas01mc@296 1114 lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT, pointCount);
mas01mc@296 1115
mas01mc@296 1116 cout << "file size: <=" << lshHeader->get_size()/1024UL << "KB" << endl;
mas01mc@296 1117 if(lshHeader->get_size()>O2_SERIAL_MAXFILESIZE)
mas01mc@296 1118 error("Maximum size of LSH file exceded: > 4000MB");
mas01mc@296 1119
mas01mc@292 1120 // go to the location corresponding to the last byte
mas01mc@292 1121 if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1)
mas01mc@292 1122 error("lseek error in db file", "", "lseek");
mas01mc@292 1123
mas01mc@292 1124 // write a dummy byte at the last location
mas01mc@292 1125 if (write (dbfid, "", 1) != 1)
mas01mc@292 1126 error("write error", "", "write");
mas01mc@292 1127
mas01mc@293 1128 char* db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);
mas01mc@296 1129
mas01mc@292 1130 memcpy (db, lshHeader, O2_SERIAL_HEADER_SIZE);
mas01mc@296 1131
mas01mc@292 1132 serial_munmap(db, O2_SERIAL_HEADER_SIZE);
mas01mc@296 1133
mas01mc@292 1134 close(dbfid);
mas01mc@292 1135
mas01mc@292 1136 std::cout << "done initializing tables." << endl;
mas01mc@292 1137
mas01mc@292 1138 return 1;
mas01mc@292 1139 }
mas01mc@292 1140
mas01mc@292 1141 char* G::serial_mmap(int dbfid, Uns32T memSize, Uns32T forWrite, off_t offset){
mas01mc@293 1142 char* db;
mas01mc@292 1143 if(forWrite){
mas01mc@292 1144 if ((db = (char*) mmap(0, memSize, PROT_READ | PROT_WRITE,
mas01mc@292 1145 MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@292 1146 error("mmap error in request for writable serialized database", "", "mmap");
mas01mc@292 1147 }
mas01mc@292 1148 else if ((db = (char*) mmap(0, memSize, PROT_READ, MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@292 1149 error("mmap error in read-only serialized database", "", "mmap");
mas01mc@292 1150
mas01mc@292 1151 return db;
mas01mc@292 1152 }
mas01mc@292 1153
mas01mc@292 1154 SerialHeaderT* G::serial_get_header(char* db){
mas01mc@292 1155 lshHeader = new SerialHeaderT();
mas01mc@292 1156 memcpy((char*)lshHeader, db, sizeof(SerialHeaderT));
mas01mc@292 1157
mas01mc@292 1158 if(lshHeader->lshMagic!=O2_SERIAL_MAGIC)
mas01mc@292 1159 error("Not an LSH database file");
mas01mc@292 1160
mas01mc@292 1161 return lshHeader;
mas01mc@292 1162 }
mas01mc@292 1163
mas01mc@292 1164 void G::serial_munmap(char* db, Uns32T N){
mas01mc@292 1165 munmap(db, N);
mas01mc@292 1166 }
mas01mc@292 1167
mas01mc@292 1168 int G::serial_open(char* filename, int writeFlag){
mas01mc@292 1169 int dbfid;
mas01mc@292 1170 if(writeFlag){
mas01mc@292 1171 if ((dbfid = open (filename, O_RDWR)) < 0)
mas01mc@292 1172 error("Can't open serial file for read/write", filename, "open");
mas01mc@292 1173 get_lock(dbfid, writeFlag);
mas01mc@292 1174 }
mas01mc@292 1175 else{
mas01mc@292 1176 if ((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1177 error("Can't open serial file for read", filename, "open");
mas01mc@292 1178 get_lock(dbfid, 0);
mas01mc@292 1179 }
mas01mc@292 1180
mas01mc@292 1181 return dbfid;
mas01mc@292 1182 }
mas01mc@292 1183
mas01mc@292 1184 void G::serial_close(int dbfid){
mas01mc@292 1185
mas01mc@292 1186 release_lock(dbfid);
mas01mc@292 1187 close(dbfid);
mas01mc@292 1188 }
mas01mc@292 1189
mas01mc@292 1190 int G::unserialize_lsh_header(char* filename){
mas01mc@292 1191
mas01mc@292 1192 int dbfid;
mas01mc@292 1193 char* db;
mas01mc@292 1194 // Test to see if file exists
mas01mc@292 1195 if((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1196 error("Can't open the file", filename, "open");
mas01mc@292 1197 close(dbfid);
mas01mc@292 1198 dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1199 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1200 serial_get_header(db); // read header
mas01mc@292 1201 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 1202
mas01mc@292 1203 // Unserialize header parameters
mas01mc@292 1204 H::L = lshHeader->numTables;
mas01mc@292 1205 H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0);
mas01mc@292 1206 H::N = lshHeader->numRows;
mas01mc@292 1207 H::C = lshHeader->numCols;
mas01mc@292 1208 H::k = lshHeader->numFuns;
mas01mc@292 1209 H::d = lshHeader->dataDim;
mas01mc@293 1210 H::w = lshHeader->binWidth;
mas01mc@293 1211 H::radius = lshHeader->radius;
mas01mc@293 1212 H::maxp = lshHeader->maxp;
mas01mc@296 1213 H::pointCount = lshHeader->pointCount;
mas01mc@292 1214
mas01mc@292 1215 return dbfid;
mas01mc@292 1216 }
mas01mc@292 1217
mas01mc@292 1218 // unserialize the LSH parameters
mas01mc@292 1219 // we leave the LSH tree on disk as a flat file
mas01mc@292 1220 // it is this flat file that we search by memory mapping
mas01mc@292 1221 void G::unserialize_lsh_functions(int dbfid){
mas01mc@292 1222 Uns32T j, kk;
mas01mc@292 1223 float* pf;
mas01mc@292 1224 Uns32T* pu;
mas01mc@292 1225
mas01mc@292 1226 // Load the hash functions into core
mas01mc@292 1227 char* db = serial_mmap(dbfid, get_serial_hashtable_offset(), 0);// get database pointer again
mas01mc@292 1228
mas01mc@292 1229 pf = get_serial_hashfunction_base(db);
mas01mc@292 1230
mas01mc@292 1231 #ifdef USE_U_FUNCTIONS
mas01mc@292 1232 for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v)
mas01mc@292 1233 for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1234 #else
mas01mc@292 1235 for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v)
mas01mc@292 1236 for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1237 #endif
mas01mc@292 1238 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 1239 H::A[j][kk][i] = *pf++; // Normally distributed random vectors
mas01mc@292 1240 }
mas01mc@292 1241 }
mas01mc@292 1242 #ifdef USE_U_FUNCTIONS
mas01mc@292 1243 for( j = 0 ; j < H::m ; j++ ) // biases b
mas01mc@292 1244 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@292 1245 #else
mas01mc@292 1246 for( j = 0 ; j < H::L ; j++ ) // biases b
mas01mc@292 1247 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1248 #endif
mas01mc@293 1249 H::b[j][kk] = *pf++;
mas01mc@292 1250
mas01mc@292 1251 pu = (Uns32T*)pf;
mas01mc@292 1252 for( j = 0 ; j < H::L ; j++ ) // Z projectors r1
mas01mc@292 1253 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1254 H::r1[j][kk] = *pu++;
mas01mc@292 1255
mas01mc@292 1256 for( j = 0 ; j < H::L ; j++ ) // Z projectors r2
mas01mc@292 1257 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1258 H::r2[j][kk] = *pu++;
mas01mc@292 1259
mas01mc@293 1260 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 1261 }
mas01mc@292 1262
mas01mc@292 1263 void G::unserialize_lsh_hashtables_format1(int fid){
mas01mc@292 1264 SerialElementT *pe, *pt;
mas01mc@292 1265 Uns32T x,y;
mas01mc@292 1266 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1267 // Read the hash tables into core
mas01mc@292 1268 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 1269 // memory map a single hash table
mas01mc@292 1270 // Align each hash table to page boundary
mas01mc@292 1271 char* dbtable = serial_mmap(fid, hashTableSize, 0,
mas01mc@292 1272 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@324 1273 #ifdef __CYGWIN__
mas01mc@324 1274 // No madvise in CYGWIN
mas01mc@324 1275 #else
mas01mc@292 1276 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 1277 error("could not advise hashtable memory","","madvise");
mas01mc@324 1278 #endif
mas01mc@292 1279 pt=(SerialElementT*)dbtable;
mas01mc@292 1280 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 1281 // Move disk pointer to beginning of row
mas01mc@292 1282 pe=pt+y*lshHeader->numCols;
mas01mc@292 1283 unserialize_hashtable_row_format1(pe, h[x]+y);
mas01mc@340 1284 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1285 printf("S[%d,%d]", x, y);
mas01mc@292 1286 serial_bucket_dump(pe);
mas01mc@292 1287 printf("C[%d,%d]", x, y);
mas01mc@292 1288 dump_hashtable_row(h[x][y]);
mas01mc@292 1289 #endif
mas01mc@292 1290 }
mas01mc@292 1291 serial_munmap(dbtable, hashTableSize);
mas01mc@292 1292 }
mas01mc@292 1293 }
mas01mc@292 1294
mas01mc@292 1295 void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){
mas01mc@292 1296 Uns32T colCount = 0;
mas01mc@292 1297 while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){
mas01mc@292 1298 H::p = pe->pointID; // current point ID
mas01mc@292 1299 t2 = pe->hashValue;
mas01mc@292 1300 bucket_insert_point(b);
mas01mc@292 1301 pe++;
mas01mc@292 1302 colCount++;
mas01mc@292 1303 }
mas01mc@292 1304 }
mas01mc@292 1305
mas01mc@340 1306 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
mas01mc@292 1307 Uns32T x=0,y=0;
mas01mc@292 1308
mas01mc@292 1309 // Seek to hashtable base offset
mas01mc@336 1310 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 1311 fclose(dbFile);
mas01mc@336 1312 error("fSeek error in unserialize_lsh_hashtables_format2");
mas01mc@292 1313 }
mas01mc@292 1314
mas01mc@292 1315 // Read the hash tables into core (structure is given in header)
mas01mc@292 1316 while( x < H::L){
mas01mc@336 1317 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1318 fclose(dbFile);
mas01mc@292 1319 error("Read error","unserialize_lsh_hashtables_format2()");
mas01mc@292 1320 }
mas01mc@306 1321 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE)
mas01mc@292 1322 x++; // End of table
mas01mc@292 1323 else
mas01mc@292 1324 while(y < H::N){
mas01mc@306 1325 // Read a row and move file pointer to beginning of next row or _bittable
mas01mc@306 1326 if(!(H::t1==O2_SERIAL_TOKEN_T1)){
mas01mc@336 1327 fclose(dbFile);
mas01mc@306 1328 error("State matchine error T1","unserialize_lsh_hashtables_format2()");
mas01mc@292 1329 }
mas01mc@336 1330 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1331 fclose(dbFile);
mas01mc@306 1332 error("Read error: t1","unserialize_lsh_hashtables_format2()");
mas01mc@306 1333 }
mas01mc@306 1334 y = H::t1;
mas01mc@292 1335 if(y>=H::N){
mas01mc@336 1336 fclose(dbFile);
mas01mc@292 1337 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
mas01mc@292 1338 }
mas01mc@340 1339 Uns32T token = 0;
mas01mc@340 1340 #ifdef LSH_CORE_ARRAY
mas01mc@340 1341 Uns32T numElements;
mas01mc@340 1342 if(fread(&numElements, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1343 fclose(dbFile);
mas01mc@340 1344 error("Read error: numElements","unserialize_lsh_hashtables_format2()");
mas01mc@340 1345 }
mas01mc@292 1346
mas01mc@340 1347 // BACKWARD COMPATIBILITY: check to see if T2 or END token was read
mas01mc@340 1348 if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
mas01mc@340 1349 forMerge=true; // Force use of dynamic linked list core format
mas01mc@340 1350 token = numElements;
mas01mc@340 1351 }
mas01mc@340 1352
mas01mc@340 1353 if(forMerge)
mas01mc@340 1354 // Use linked list CORE format
mas01mc@340 1355 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
mas01mc@340 1356 else
mas01mc@340 1357 // Use ARRAY CORE format with numElements counter
mas01mc@340 1358 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
mas01mc@340 1359 #else
mas01mc@340 1360 token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
mas01mc@340 1361 #endif
mas01mc@340 1362
mas01mc@340 1363 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1364 printf("C[%d,%d]", x, y);
mas01mc@292 1365 dump_hashtable_row(h[x][y]);
mas01mc@292 1366 #endif
mas01mc@292 1367 // Check that token is valid
mas01mc@306 1368 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
mas01mc@336 1369 fclose(dbFile);
mas01mc@292 1370 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
mas01mc@292 1371 }
mas01mc@292 1372 // Check for end of table flag
mas01mc@306 1373 if(token==O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@292 1374 x++;
mas01mc@292 1375 break;
mas01mc@292 1376 }
mas01mc@292 1377 // Check for new row flag
mas01mc@306 1378 if(token==O2_SERIAL_TOKEN_T1)
mas01mc@292 1379 H::t1 = token;
mas01mc@292 1380 }
mas01mc@292 1381 }
mas01mc@306 1382 }
mas01mc@292 1383
mas01mc@340 1384 Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token){
mas01mc@292 1385 bool pointFound = false;
mas01mc@340 1386
mas01mc@340 1387 if(token)
mas01mc@340 1388 H::t2 = token;
mas01mc@340 1389 else if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1390 fclose(dbFile);
mas01mc@340 1391 error("Read error T2 token","unserialize_hashtable_row_format2");
mas01mc@340 1392 }
mas01mc@340 1393
mas01mc@306 1394 if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){
mas01mc@336 1395 fclose(dbFile);
mas01mc@292 1396 error("State machine error: expected E or T2");
mas01mc@292 1397 }
mas01mc@340 1398
mas01mc@306 1399 while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){
mas01mc@292 1400 pointFound=false;
mas01mc@292 1401 // Check for T2 token
mas01mc@306 1402 if(H::t2!=O2_SERIAL_TOKEN_T2){
mas01mc@336 1403 fclose(dbFile);
mas01mc@292 1404 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
mas01mc@306 1405 }
mas01mc@292 1406 // Read t2 value
mas01mc@336 1407 if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1408 fclose(dbFile);
mas01mc@292 1409 error("Read error t2","unserialize_hashtable_row_format2");
mas01mc@292 1410 }
mas01mc@336 1411 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1412 fclose(dbFile);
mas01mc@292 1413 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1414 }
mas01mc@306 1415 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@292 1416 pointFound=true;
mas01mc@292 1417 bucket_insert_point(b);
mas01mc@336 1418 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1419 fclose(dbFile);
mas01mc@292 1420 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1421 }
mas01mc@292 1422 }
mas01mc@292 1423 if(!pointFound)
mas01mc@292 1424 error("State machine error: point", "unserialize_hashtable_row_format2()");
mas01mc@306 1425 H::t2 = H::p; // Copy last found token to t2
mas01mc@292 1426 }
mas01mc@292 1427 return H::t2; // holds current token
mas01mc@292 1428 }
mas01mc@292 1429
mas01mc@340 1430 // Unserialize format2 hashtable row to a CORE_ARRAY pointed to
mas01mc@340 1431 // by the hashtable row pointer: rowPtr
mas01mc@340 1432 //
mas01mc@340 1433 // numElements is the total number of t2 buckets plus points
mas01mc@340 1434 // memory required is numElements+1+sizeof(hashtable ptr)
mas01mc@340 1435 //
mas01mc@340 1436 // numElements = numPoints + numBuckets
mas01mc@340 1437 //
mas01mc@340 1438 // During inserts (merges) new hashtable entries are appened at rowPtr+numPoints+numBuckets+1
mas01mc@340 1439 //
mas01mc@340 1440 // ASSUME: that LSH_LIST_HEAD_COUNTERS is set so that the first hashtable bucket is used to count
mas01mc@340 1441 // point and bucket entries
mas01mc@340 1442 //
mas01mc@340 1443 // We store the values of numPoints and numBuckets in separate fields of the first bucket
mas01mc@340 1444 // rowPtr->t2 // numPoints
mas01mc@340 1445 // (Uns32T)(rowPtr->snext) // numBuckets
mas01mc@340 1446 //
mas01mc@340 1447 // We cast the rowPtr->next pointer to (Uns32*) malloc(numElements*sizeof(Uns32T) + sizeof(bucket*))
mas01mc@340 1448 // To get to the fist bucket, we use
mas01mc@340 1449 //
mas01mc@340 1450
mas01mc@340 1451 #define READ_UNS32T(VAL,TOKENSTR) if(fread(VAL, sizeof(Uns32T), 1, dbFile) != 1){\
mas01mc@340 1452 fclose(dbFile);error("Read error unserialize_hashtable_format2",TOKENSTR);}
mas01mc@340 1453
mas01mc@340 1454 #define TEST_TOKEN(TEST, TESTSTR) if(TEST){fclose(dbFile);error("State machine error: ", TESTSTR);}
mas01mc@340 1455
mas01mc@340 1456 #define SKIP_BITS_LEFT_SHIFT_MSB (30)
mas01mc@340 1457
mas01mc@340 1458 #define SKIP_BITS_RIGHT_SHIFT_MSB (28)
mas01mc@340 1459 #define SKIP_BITS_RIGHT_SHIFT_LSB (30)
mas01mc@340 1460
mas01mc@340 1461 #define MAX_POINTS_IN_BUCKET_CORE_ARRAY (16)
mas01mc@340 1462 #define LSH_CORE_ARRAY_END_ROW_TOKEN (0xFFFFFFFD)
mas01mc@340 1463
mas01mc@340 1464 // Encode the skip bits. Zero if only one point, MAX 8 (plus first == 9)
mas01mc@340 1465 #define ENCODE_POINT_SKIP_BITS TEST_TOKEN(!numPointsThisBucket, "no points found");\
mas01mc@340 1466 if(numPointsThisBucket==1){\
mas01mc@340 1467 secondPtr=ap++;\
mas01mc@340 1468 *secondPtr=0;\
mas01mc@340 1469 numPoints++;\
mas01mc@340 1470 }\
mas01mc@340 1471 if(numPointsThisBucket>1){\
mas01mc@340 1472 *firstPtr |= ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
mas01mc@340 1473 *secondPtr |= ( ( (numPointsThisBucket-1) & 0xC) >> 2 ) << SKIP_BITS_LEFT_SHIFT_MSB;}
mas01mc@340 1474
mas01mc@340 1475 Uns32T G::unserialize_hashtable_row_to_array(FILE* dbFile, bucket** rowPP, Uns32T numElements){
mas01mc@340 1476 Uns32T numPointsThisBucket = 0;
mas01mc@340 1477 Uns32T numBuckets = 0;
mas01mc@340 1478 Uns32T numPoints = 0;
mas01mc@340 1479 Uns32T* firstPtr = 0;
mas01mc@340 1480 Uns32T* secondPtr = 0;
mas01mc@340 1481
mas01mc@340 1482 // Initialize new row
mas01mc@340 1483 if(!*rowPP){
mas01mc@340 1484 *rowPP = new bucket();
mas01mc@340 1485 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1486 (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@340 1487 (*rowPP)->next = 0;
mas01mc@340 1488 #endif
mas01mc@340 1489 }
mas01mc@340 1490 bucket* rowPtr = *rowPP;
mas01mc@340 1491
mas01mc@340 1492 READ_UNS32T(&(H::t2),"t2");
mas01mc@340 1493 TEST_TOKEN(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2), "expected E or T2");
mas01mc@340 1494 // Because we encode points in 16-point blocks, we sometimes allocate repeated t2 elements
mas01mc@340 1495 // So over-allocate by a factor of two and realloc later to actual numElements
mas01mc@340 1496 CR_ASSERT(rowPtr->next = (bucket*) malloc((2*numElements+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1497 Uns32T* ap = reinterpret_cast<Uns32T*>(rowPtr->next); // Cast pointer to Uns32T* for array format
mas01mc@340 1498 while( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1) ){
mas01mc@340 1499 numPointsThisBucket = 0;// reset bucket point counter
mas01mc@340 1500 secondPtr = 0; // reset second-point pointer
mas01mc@340 1501 TEST_TOKEN(H::t2!=O2_SERIAL_TOKEN_T2, "expected T2");
mas01mc@340 1502 READ_UNS32T(&(H::t2), "Read error t2");
mas01mc@340 1503 *ap++ = H::t2; // Insert t2 value into array
mas01mc@340 1504 numBuckets++;
mas01mc@340 1505 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1506 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@340 1507 if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
mas01mc@340 1508 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1509 *ap++ = H::t2; // Extra element
mas01mc@340 1510 numBuckets++; // record this as a new bucket
mas01mc@340 1511 numPointsThisBucket=0; // reset bucket point counter
mas01mc@340 1512 secondPtr = 0; // reset second-point pointer
mas01mc@340 1513 }
mas01mc@340 1514 if( ++numPointsThisBucket == 1 )
mas01mc@340 1515 firstPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1516 else if( numPointsThisBucket == 2 )
mas01mc@340 1517 secondPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1518 numPoints++;
mas01mc@340 1519 *ap++ = H::p;
mas01mc@340 1520 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1521 }
mas01mc@340 1522 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1523 H::t2 = H::p; // Copy last found token to t2
mas01mc@340 1524 }
mas01mc@340 1525 // Reallocate the row to its actual size
mas01mc@340 1526 CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1527 // Record the sizes at the head of the row
mas01mc@344 1528 rowPtr->snext.numBuckets = numBuckets;
mas01mc@340 1529 rowPtr->t2 = numPoints;
mas01mc@340 1530 // Place end of row marker
mas01mc@340 1531 *ap++ = LSH_CORE_ARRAY_END_ROW_TOKEN;
mas01mc@340 1532 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
mas01mc@340 1533 rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
mas01mc@340 1534 // Allocate a new dynamic list head at the end of the array
mas01mc@340 1535 bucket** listPtr = reinterpret_cast<bucket**> (ap);
mas01mc@340 1536 *listPtr = 0;
mas01mc@340 1537 // Return current token
mas01mc@340 1538 return H::t2; // return H::t2 which holds current token [E or T1]
mas01mc@340 1539 }
mas01mc@340 1540
mas01mc@340 1541
mas01mc@340 1542
mas01mc@340 1543 // *p is a pointer to the beginning of a hashtable row array
mas01mc@340 1544 // The array consists of t2 hash keys and one or more point identifiers p for each hash key
mas01mc@340 1545 // Retrieval is performed by generating a hash key query_t2 for query point q
mas01mc@340 1546 // We identify the row that t2 is stored in using a secondary hash t1, this row is the entry
mas01mc@340 1547 // point for retrieve_from_core_hashtable_array
mas01mc@340 1548 #define SKIP_BITS (0xC0000000)
mas01mc@340 1549 void G::retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos){
mas01mc@340 1550 Uns32T skip;
mas01mc@340 1551 Uns32T t2;
mas01mc@340 1552 Uns32T p1;
mas01mc@340 1553 Uns32T p2;
mas01mc@340 1554
mas01mc@340 1555 CR_ASSERT(p);
mas01mc@340 1556
mas01mc@340 1557 do{
mas01mc@340 1558 t2 = *p++;
mas01mc@340 1559 if( t2 > H::t2 )
mas01mc@340 1560 return;
mas01mc@340 1561 p1 = *p++;
mas01mc@340 1562 p2 = *p++;
mas01mc@340 1563 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@340 1564 if( t2 == H::t2 ){
mas01mc@340 1565 add_point_callback(calling_instance, p1 ^ (p1 & SKIP_BITS), qpos, radius);
mas01mc@340 1566 if(skip--){
mas01mc@340 1567 add_point_callback(calling_instance, p2 ^ (p2 & SKIP_BITS), qpos, radius);
mas01mc@340 1568 while(skip-- )
mas01mc@340 1569 add_point_callback(calling_instance, *p++, qpos, radius);
mas01mc@340 1570 }
mas01mc@340 1571 }
mas01mc@340 1572 else
mas01mc@340 1573 if(*p != LSH_CORE_ARRAY_END_ROW_TOKEN)
mas01mc@340 1574 p = p + skip;
mas01mc@340 1575 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@340 1576 }
mas01mc@340 1577
mas01mc@292 1578 void G::dump_hashtable_row(bucket* p){
mas01mc@292 1579 while(p && p->t2!=IFLAG){
mas01mc@344 1580 sbucket* sbp = p->snext.ptr;
mas01mc@292 1581 while(sbp){
mas01mc@292 1582 printf("(%0X,%u)", p->t2, sbp->pointID);
mas01mc@292 1583 fflush(stdout);
mas01mc@292 1584 sbp=sbp->snext;
mas01mc@292 1585 }
mas01mc@292 1586 p=p->next;
mas01mc@292 1587 }
mas01mc@292 1588 printf("\n");
mas01mc@292 1589 }
mas01mc@292 1590
mas01mc@292 1591
mas01mc@292 1592 // G::serial_retrieve_point( ... )
mas01mc@292 1593 // retrieves (pointID) from a serialized LSH database
mas01mc@292 1594 //
mas01mc@292 1595 // inputs:
mas01mc@292 1596 // filename - file name of serialized LSH database
mas01mc@292 1597 // vv - query point set
mas01mc@292 1598 //
mas01mc@292 1599 // outputs:
mas01mc@292 1600 // inserts retrieved points into add_point() callback method
mas01mc@292 1601 void G::serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller)
mas01mc@292 1602 {
mas01mc@292 1603 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1604 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1605 serial_get_header(dbheader); // read header
mas01mc@292 1606 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1607
mas01mc@292 1608 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1609 serial_close(dbfid);
mas01mc@292 1610 error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1611 }
mas01mc@292 1612
mas01mc@292 1613 // size of each hash table
mas01mc@292 1614 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1615 calling_instance = caller; // class instance variable used in ...bucket_chain_point()
mas01mc@292 1616 add_point_callback = add_point;
mas01mc@292 1617
mas01mc@292 1618 for(Uns32T j=0; j<L; j++){
mas01mc@292 1619 // memory map a single hash table for random access
mas01mc@292 1620 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1621 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1622 #ifdef __CYGWIN__
mas01mc@324 1623 // No madvise in CYGWIN
mas01mc@324 1624 #else
mas01mc@292 1625 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@292 1626 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1627 #endif
mas01mc@292 1628 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1629 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
mas01mc@293 1630 H::compute_hash_functions(vv[qpos]);
mas01mc@293 1631 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1632 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1633 }
mas01mc@292 1634 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@292 1635 }
mas01mc@292 1636 serial_close(dbfid);
mas01mc@292 1637 }
mas01mc@292 1638
mas01mc@292 1639 void G::serial_retrieve_point(char* filename, vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 1640 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1641 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1642 serial_get_header(dbheader); // read header
mas01mc@292 1643 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1644
mas01mc@292 1645 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1646 serial_close(dbfid);
mas01mc@292 1647 error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1648 }
mas01mc@292 1649
mas01mc@292 1650 // size of each hash table
mas01mc@292 1651 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1652 calling_instance = caller;
mas01mc@292 1653 add_point_callback = add_point;
mas01mc@293 1654 H::compute_hash_functions(v);
mas01mc@292 1655 for(Uns32T j=0; j<L; j++){
mas01mc@292 1656 // memory map a single hash table for random access
mas01mc@292 1657 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1658 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1659 #ifdef __CYGWIN__
mas01mc@324 1660 // No madvise in CYGWIN
mas01mc@324 1661 #else
mas01mc@292 1662 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@292 1663 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1664 #endif
mas01mc@292 1665 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@293 1666 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1667 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1668 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@292 1669 }
mas01mc@292 1670 serial_close(dbfid);
mas01mc@292 1671 }
mas01mc@292 1672
mas01mc@292 1673 void G::serial_dump_tables(char* filename){
mas01mc@292 1674 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1675 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1676 serial_get_header(dbheader); // read header
mas01mc@292 1677 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1678 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1679 for(Uns32T j=0; j<L; j++){
mas01mc@292 1680 // memory map a single hash table for random access
mas01mc@292 1681 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1682 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1683 #ifdef __CYGWIN__
mas01mc@324 1684 // No madvise in CYGWIN
mas01mc@324 1685 #else
mas01mc@292 1686 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 1687 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1688 #endif
mas01mc@292 1689 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1690 printf("*********** TABLE %d ***************\n", j);
mas01mc@292 1691 fflush(stdout);
mas01mc@292 1692 int count=0;
mas01mc@292 1693 do{
mas01mc@292 1694 printf("[%d,%d]", j, count++);
mas01mc@292 1695 fflush(stdout);
mas01mc@292 1696 serial_bucket_dump(pe);
mas01mc@292 1697 pe+=lshHeader->numCols;
mas01mc@292 1698 }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols);
mas01mc@292 1699 }
mas01mc@292 1700
mas01mc@292 1701 }
mas01mc@292 1702
mas01mc@292 1703 void G::serial_bucket_dump(SerialElementT* pe){
mas01mc@292 1704 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1705 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1706 printf("(%0X,%u)",pe->hashValue,pe->pointID);
mas01mc@292 1707 pe++;
mas01mc@292 1708 }
mas01mc@292 1709 printf("\n");
mas01mc@292 1710 fflush(stdout);
mas01mc@292 1711 }
mas01mc@292 1712
mas01mc@292 1713 void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){
mas01mc@292 1714 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1715 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1716 if(pe->hashValue==t2){ // new match
mas01mc@292 1717 add_point_callback(calling_instance, pe->pointID, qpos, radius);
mas01mc@292 1718 }
mas01mc@292 1719 pe++;
mas01mc@292 1720 }
mas01mc@292 1721 }
mas01mc@292 1722
mas01mc@292 1723 void G::bucket_chain_point(bucket* p, Uns32T qpos){
mas01mc@292 1724 if(!p || p->t2==IFLAG)
mas01mc@292 1725 return;
mas01mc@292 1726 if(p->t2==t2){ // match
mas01mc@344 1727 sbucket_chain_point(p->snext.ptr, qpos); // add to reporter
mas01mc@292 1728 }
mas01mc@292 1729 if(p->next){
mas01mc@292 1730 bucket_chain_point(p->next, qpos); // recurse
mas01mc@292 1731 }
mas01mc@292 1732 }
mas01mc@292 1733
mas01mc@292 1734 void G::sbucket_chain_point(sbucket* p, Uns32T qpos){
mas01mc@292 1735 add_point_callback(calling_instance, p->pointID, qpos, radius);
mas01mc@292 1736 if(p->snext){
mas01mc@292 1737 sbucket_chain_point(p->snext, qpos);
mas01mc@292 1738 }
mas01mc@292 1739 }
mas01mc@292 1740
mas01mc@292 1741 void G::get_lock(int fd, bool exclusive) {
mas01mc@292 1742 struct flock lock;
mas01mc@292 1743 int status;
mas01mc@292 1744 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01mc@292 1745 lock.l_whence = SEEK_SET;
mas01mc@292 1746 lock.l_start = 0;
mas01mc@292 1747 lock.l_len = 0; /* "the whole file" */
mas01mc@292 1748 retry:
mas01mc@292 1749 do {
mas01mc@292 1750 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1751 } while (status != 0 && errno == EINTR);
mas01mc@292 1752 if (status) {
mas01mc@292 1753 if (errno == EAGAIN) {
mas01mc@292 1754 sleep(1);
mas01mc@292 1755 goto retry;
mas01mc@292 1756 } else {
mas01mc@292 1757 error("fcntl lock error", "", "fcntl");
mas01mc@292 1758 }
mas01mc@292 1759 }
mas01mc@292 1760 }
mas01mc@292 1761
mas01mc@292 1762 void G::release_lock(int fd) {
mas01mc@292 1763 struct flock lock;
mas01mc@292 1764 int status;
mas01mc@292 1765
mas01mc@292 1766 lock.l_type = F_UNLCK;
mas01mc@292 1767 lock.l_whence = SEEK_SET;
mas01mc@292 1768 lock.l_start = 0;
mas01mc@292 1769 lock.l_len = 0;
mas01mc@292 1770
mas01mc@292 1771 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1772
mas01mc@292 1773 if (status)
mas01mc@292 1774 error("fcntl unlock error", "", "fcntl");
mas01mc@292 1775 }