annotate lshlib.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents 77f7bc99dfd6
children
rev   line source
mas01mc@292 1 #include "lshlib.h"
mas01mc@292 2
mas01cr@648 3 #define getpagesize() (64*1024)
mas01mc@292 4
mas01mc@764 5 Uns32T get_page_logn(){
mas01mc@764 6 int pagesz = (int)sysconf(_SC_PAGESIZE);
mas01mc@764 7 return (Uns32T)log2((double)pagesz);
mas01mc@292 8 }
mas01mc@292 9
mas01mc@764 10 unsigned align_up(unsigned x, unsigned w) { return (((x) + ((1<<w)-1)) & ~((1<<w)-1)); }
mas01mc@764 11
mas01mc@292 12 void H::error(const char* a, const char* b, const char *sysFunc) {
mas01mc@292 13 cerr << a << ": " << b << endl;
mas01mc@292 14 if (sysFunc) {
mas01mc@292 15 perror(sysFunc);
mas01mc@292 16 }
mas01mc@292 17 exit(1);
mas01mc@292 18 }
mas01mc@292 19
mas01mc@764 20 H::H():
mas01mc@764 21 multiProbePtr(new MultiProbe()),
mas01mc@764 22 boundaryDistances(0)
mas01mc@764 23 {
mas01mc@293 24 // Delay initialization of lsh functions until we know the parameters
mas01mc@293 25 }
mas01mc@293 26
mas01mc@293 27 H::H(Uns32T kk, Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float ww, float rr):
mas01mc@292 28 #ifdef USE_U_FUNCTIONS
mas01mc@292 29 use_u_functions(true),
mas01mc@292 30 #else
mas01mc@292 31 use_u_functions(false),
mas01mc@292 32 #endif
mas01mc@293 33 maxp(0),
mas01mc@292 34 bucketCount(0),
mas01mc@292 35 pointCount(0),
mas01mc@292 36 N(NN),
mas01mc@292 37 C(CC),
mas01mc@292 38 k(kk),
mas01mc@292 39 m(mm),
mas01mc@293 40 L((mm*(mm-1))/2),
mas01mc@293 41 d(dd),
mas01mc@293 42 w(ww),
mas01mc@764 43 radius(rr),
mas01mc@764 44 multiProbePtr(new MultiProbe()),
mas01mc@764 45 boundaryDistances(0)
mas01mc@292 46 {
mas01mc@292 47
mas01mc@292 48 if(m<2){
mas01mc@292 49 m=2;
mas01mc@292 50 L=1; // check value of L
mas01mc@292 51 cout << "warning: setting m=2, L=1" << endl;
mas01mc@292 52 }
mas01mc@292 53 if(use_u_functions && k%2){
mas01mc@292 54 k++; // make sure k is even
mas01mc@292 55 cout << "warning: setting k even" << endl;
mas01mc@292 56 }
mas01mc@293 57
mas01mc@293 58 // We have the necessary parameters, so construct hashfunction datastructures
mas01mc@293 59 initialize_lsh_functions();
mas01mc@292 60 }
mas01mc@292 61
mas01mc@293 62 void H::initialize_lsh_functions(){
mas01mc@292 63 H::P = UH_PRIME_DEFAULT;
mas01mc@292 64
mas01mc@292 65 /* FIXME: don't use time(); instead use /dev/random or similar */
mas01mc@292 66 /* FIXME: write out the seed somewhere, so that we can get
mas01mc@292 67 repeatability */
mas01mc@292 68 #ifdef MT19937
mas01mc@292 69 init_genrand(time(NULL));
mas01mc@292 70 #else
mas01mc@292 71 srand(time(NULL)); // seed random number generator
mas01mc@292 72 #endif
mas01mc@293 73 Uns32T i,j, kk;
mas01mc@293 74 #ifdef USE_U_FUNCTIONS
mas01mc@293 75 H::A = new float**[ H::m ]; // m x k x d random projectors
mas01mc@293 76 H::b = new float*[ H::m ]; // m x k random biases
mas01mc@293 77 #else
mas01mc@293 78 H::A = new float**[ H::L ]; // m x k x d random projectors
mas01mc@293 79 H::b = new float*[ H::L ]; // m x k random biases
mas01mc@293 80 #endif
mas01mc@293 81 H::g = new Uns32T*[ H::L ]; // L x k random projections
mas01mc@293 82 assert( H::g && H::A && H::b ); // failure
mas01mc@293 83 #ifdef USE_U_FUNCTIONS
mas01mc@293 84 // Use m \times u_i functions \in R^{(k/2) \times (d)}
mas01mc@293 85 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 86 for( j = 0; j < H::m ; j++ ){ // m functions u_i(v)
mas01mc@293 87 H::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients
mas01mc@293 88 H::b[j] = new float[ H::k/2 ]; // bias
mas01mc@293 89 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 90 for( kk = 0; kk < H::k/2 ; kk++ ){
mas01mc@293 91 H::A[j][kk] = new float[ H::d ];
mas01mc@293 92 assert( H::A[j][kk] ); // failure
mas01mc@293 93 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 94 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 95 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 96 }
mas01mc@293 97 }
mas01mc@293 98 #else
mas01mc@293 99 // Use m \times u_i functions \in R^{k \times (d)}
mas01mc@293 100 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 101 for( j = 0; j < H::L ; j++ ){ // m functions u_i(v)
mas01mc@293 102 H::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients
mas01mc@293 103 H::b[j] = new float[ H::k ]; // bias
mas01mc@293 104 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 105 for( kk = 0; kk < H::k ; kk++ ){
mas01mc@293 106 H::A[j][kk] = new float[ H::d ];
mas01mc@293 107 assert( H::A[j][kk] ); // failure
mas01mc@293 108 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 109 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 110 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 111 }
mas01mc@293 112 }
mas01mc@293 113 #endif
mas01mc@293 114
mas01mc@293 115 // Storage for LSH hash function output (Uns32T)
mas01mc@293 116 for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2)
mas01mc@293 117 H::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z
mas01mc@293 118 assert( H::g[j] );
mas01mc@292 119 }
mas01mc@292 120
mas01mc@293 121 // LSH Hash tables
mas01mc@293 122 H::h = new bucket**[ H::L ];
mas01mc@293 123 assert( H::h );
mas01mc@292 124 for( j = 0 ; j < H::L ; j++ ){
mas01mc@292 125 H::h[j] = new bucket*[ H::N ];
mas01mc@292 126 assert( H::h[j] );
mas01mc@292 127 for( i = 0 ; i < H::N ; i++)
mas01mc@292 128 H::h[j][i] = 0;
mas01mc@292 129 }
mas01mc@293 130
mas01mc@293 131 // Standard hash functions
mas01mc@293 132 H::r1 = new Uns32T*[ H::L ];
mas01mc@293 133 H::r2 = new Uns32T*[ H::L ];
mas01mc@293 134 assert( H::r1 && H::r2 ); // failure
mas01mc@293 135 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 136 H::r1[ j ] = new Uns32T[ H::k ];
mas01mc@293 137 H::r2[ j ] = new Uns32T[ H::k ];
mas01mc@293 138 assert( H::r1[j] && H::r2[j] ); // failure
mas01mc@293 139 for( i = 0; i<H::k; i++){
mas01mc@293 140 H::r1[j][i] = randr();
mas01mc@293 141 H::r2[j][i] = randr();
mas01mc@293 142 }
mas01mc@293 143 }
mas01mc@293 144
mas01mc@534 145 // Storage for whole or partial function evaluation depending on USE_U_FUNCTIONS
mas01mc@293 146 H::initialize_partial_functions();
mas01mc@764 147
mas01mc@764 148 // MultiProbe distance functions, there are 2*k per hashtable
mas01mc@764 149 H::boundaryDistances = new float*[ H::L ]; // L x 2k boundary distances
mas01mc@764 150 assert( H::boundaryDistances ); // failure
mas01mc@764 151 for( j = 0; j < H::L ; j++ ){ // 2*k functions x_i(q)
mas01mc@764 152 H::boundaryDistances[j] = new float[ 2*H::k ];
mas01mc@764 153 assert( H::boundaryDistances[j] ); // failure
mas01mc@764 154 for( kk = 0; kk < 2*H::k ; kk++ )
mas01mc@764 155 H::boundaryDistances[j][kk] = 0.0f; // initialize with zeros
mas01mc@764 156 }
mas01mc@293 157 }
mas01mc@293 158
mas01mc@293 159 void H::initialize_partial_functions(){
mas01mc@293 160
mas01mc@293 161 #ifdef USE_U_FUNCTIONS
mas01mc@293 162 H::uu = vector<vector<Uns32T> >(H::m);
mas01mc@293 163 for( Uns32T aa=0 ; aa < H::m ; aa++ )
mas01mc@293 164 H::uu[aa] = vector<Uns32T>( H::k/2 );
mas01mc@293 165 #endif
mas01mc@293 166 }
mas01mc@293 167
mas01mc@293 168
mas01mc@293 169 // Generate z ~ N(0,1)
mas01mc@293 170 float H::randn(){
mas01mc@293 171 // Box-Muller
mas01mc@293 172 float x1, x2;
mas01mc@293 173 do{
mas01mc@293 174 x1 = ranf();
mas01mc@293 175 } while (x1 == 0); // cannot take log of 0
mas01mc@293 176 x2 = ranf();
mas01mc@293 177 float z;
mas01mc@293 178 z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2);
mas01mc@293 179 return z;
mas01mc@293 180 }
mas01mc@293 181
mas01mc@293 182 float H::ranf(){
mas01mc@293 183 #ifdef MT19937
mas01mc@293 184 return (float) genrand_real2();
mas01mc@293 185 #else
mas01mc@293 186 return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
mas01mc@293 187 #endif
mas01mc@293 188 }
mas01mc@293 189
mas01mc@293 190 // range is 1..2^29
mas01mc@293 191 /* FIXME: that looks like an ... odd range. Still. */
mas01mc@293 192 Uns32T H::randr(){
mas01mc@293 193 #ifdef MT19937
mas01mc@293 194 return (Uns32T)((genrand_int32() >> 3) + 1);
mas01mc@293 195 #else
mas01mc@293 196 return (Uns32T) ((rand() >> 2) + 1);
mas01mc@293 197 #endif
mas01mc@292 198 }
mas01mc@292 199
mas01mc@292 200 // Destruct hash tables
mas01mc@292 201 H::~H(){
mas01mc@293 202 Uns32T i,j,kk;
mas01mc@340 203 bucket** pp;
mas01mc@293 204 #ifdef USE_U_FUNCTIONS
mas01mc@293 205 for( j = 0 ; j < H::m ; j++ ){
mas01mc@293 206 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@293 207 delete[] A[j][kk];
mas01mc@293 208 delete[] A[j];
mas01mc@293 209 }
mas01mc@293 210 delete[] A;
mas01mc@293 211 for( j = 0 ; j < H::m ; j++ )
mas01mc@293 212 delete[] b[j];
mas01mc@293 213 delete[] b;
mas01mc@293 214 #else
mas01mc@293 215 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 216 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@293 217 delete[] A[j][kk];
mas01mc@293 218 delete[] A[j];
mas01mc@293 219 }
mas01mc@293 220 delete[] A;
mas01mc@293 221 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 222 delete[] b[j];
mas01mc@293 223 delete[] b;
mas01mc@293 224 #endif
mas01mc@293 225
mas01mc@293 226 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 227 delete[] g[j];
mas01mc@293 228 delete[] g;
mas01mc@292 229 for( j=0 ; j < H::L ; j++ ){
mas01mc@292 230 delete[] H::r1[ j ];
mas01mc@292 231 delete[] H::r2[ j ];
mas01mc@340 232 for(i = 0; i< H::N ; i++){
mas01mc@340 233 pp = 0;
mas01mc@340 234 #ifdef LSH_CORE_ARRAY
mas01mc@340 235 if(H::h[ j ][ i ])
mas01mc@340 236 if(H::h[ j ][ i ]->t2 & LSH_CORE_ARRAY_BIT){
mas01mc@340 237 pp = get_pointer_to_bucket_linked_list(H::h[ j ][ i ]);
mas01mc@340 238 if(*pp){
mas01mc@344 239 (*pp)->snext.ptr=0; // Head of list uses snext as a non-pointer value
mas01mc@340 240 delete *pp; // Now the destructor can do its work properly
mas01mc@340 241 }
mas01mc@340 242 free(H::h[ j ][ i ]->next);
mas01mc@340 243 H::h[ j ][ i ]->next = 0; // Zero next pointer
mas01mc@344 244 H::h[ j ][ i ]->snext.ptr = 0; // Zero head-of-list pointer as above
mas01mc@340 245 }
mas01mc@340 246 #endif
mas01mc@292 247 delete H::h[ j ][ i ];
mas01mc@340 248 }
mas01mc@292 249 delete[] H::h[ j ];
mas01mc@292 250 }
mas01mc@474 251 delete[] H::r1;
mas01mc@474 252 delete[] H::r2;
mas01mc@474 253 delete[] H::h;
mas01mc@764 254
mas01mc@764 255 // MultiProbe cleanup
mas01mc@764 256 for( j = 0 ; j < H::L ; j++ )
mas01mc@764 257 delete[] H::boundaryDistances[j];
mas01mc@764 258 delete[] H::boundaryDistances;
mas01mc@764 259 delete multiProbePtr;
mas01mc@292 260 }
mas01mc@292 261
mas01mc@292 262
mas01mc@293 263 // Compute all hash functions for vector v
mas01mc@293 264 // #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d}
mas01mc@293 265 // to make L \times g_j functions \in Z^k
mas01mc@293 266 void H::compute_hash_functions(vector<float>& v){ // v \in R^d
mas01mc@293 267 float iw = 1. / H::w; // hash bucket width
mas01mc@293 268 Uns32T aa, kk;
mas01mc@293 269 if( v.size() != H::d )
mas01mc@293 270 error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality
mas01mc@293 271 double tmp = 0;
mas01mc@764 272 float *pA, *pb, *bd;
mas01mc@293 273 Uns32T *pg;
mas01mc@293 274 int dd;
mas01mc@293 275 vector<float>::iterator vi;
mas01mc@293 276 vector<Uns32T>::iterator ui;
mas01mc@293 277
mas01mc@293 278 #ifdef USE_U_FUNCTIONS
mas01mc@293 279 Uns32T bb;
mas01mc@293 280 // Store m dot products to expand
mas01mc@293 281 for( aa=0; aa < H::m ; aa++ ){
mas01mc@293 282 ui = H::uu[aa].begin();
mas01mc@293 283 for( kk = 0 ; kk < H::k/2 ; kk++ ){
mas01mc@293 284 pb = *( H::b + aa ) + kk;
mas01mc@293 285 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 286 dd = H::d;
mas01mc@293 287 tmp = 0.;
mas01mc@293 288 vi = v.begin();
mas01mc@293 289 while( dd-- )
mas01mc@293 290 tmp += *pA++ * *vi++; // project
mas01mc@293 291 tmp += *pb; // translate
mas01mc@293 292 tmp *= iw; // scale
mas01mc@293 293 *ui++ = (Uns32T) floor(tmp); // floor
mas01mc@293 294 }
mas01mc@293 295 }
mas01mc@293 296 // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d}
mas01mc@293 297 Uns32T j;
mas01mc@293 298 for( aa=0, j=0 ; aa < H::m-1 ; aa++ )
mas01mc@293 299 for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){
mas01mc@293 300 pg= *( H::g + j ); // L \times functions g_j(v) \in Z^k
mas01mc@293 301 // u_1 \in Z^{(k/2) \times d}
mas01mc@293 302 ui = H::uu[aa].begin();
mas01mc@293 303 kk=H::k/2;
mas01mc@293 304 while( kk-- )
mas01mc@293 305 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z
mas01mc@293 306 // u_2 \in Z^{(k/2) \times d}
mas01mc@293 307 ui = H::uu[bb].begin();
mas01mc@293 308 kk=H::k/2;
mas01mc@293 309 while( kk--)
mas01mc@293 310 *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z
mas01mc@293 311 }
mas01mc@293 312 #else
mas01mc@293 313 for( aa=0; aa < H::L ; aa++ ){
mas01mc@514 314 pg= *( H::g + aa ); // L \times functions g_j(v) \in Z^k
mas01mc@764 315 bd= *( H::boundaryDistances + aa);
mas01mc@514 316 for( kk = 0 ; kk != H::k ; kk++ ){
mas01mc@293 317 pb = *( H::b + aa ) + kk;
mas01mc@293 318 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 319 dd = H::d;
mas01mc@293 320 tmp = 0.;
mas01mc@293 321 vi = v.begin();
mas01mc@293 322 while( dd-- )
mas01mc@293 323 tmp += *pA++ * *vi++; // project
mas01mc@293 324 tmp += *pb; // translate
mas01mc@764 325 tmp *= iw; // scale
mas01mc@764 326 tmp = floor(tmp); // handle negative values
mas01mc@764 327 while(tmp<0) // wrap around 0 to N
mas01mc@764 328 tmp += H::N;
mas01mc@764 329 *pg = (Uns32T) tmp; // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z
mas01mc@764 330 *bd = (tmp - *pg++);//*w; // boundary distance -1
mas01mc@764 331 *(bd+1) = (1.0f - *bd); //*w; // boundary distance +1
mas01mc@764 332 bd+=2;
mas01mc@293 333 }
mas01mc@293 334 }
mas01mc@293 335 #endif
mas01mc@293 336 }
mas01mc@293 337
mas01mc@292 338 // make hash value \in Z
mas01mc@293 339 void H::generate_hash_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
mas01mc@293 340 H::t1 = computeProductModDefaultPrime( g, r1, H::k ) % H::N;
mas01mc@293 341 H::t2 = computeProductModDefaultPrime( g, r2, H::k );
mas01mc@292 342 }
mas01mc@292 343
mas01mc@764 344 // make hash value by purturbating the given hash functions
mas01mc@764 345 // according the the boundary distances of the current query
mas01mc@764 346 void H::generate_multiprobe_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
mas01mc@764 347 assert(!multiProbePtr->empty()); // Test this for now, until all is stable
mas01mc@764 348 Uns32T* mpg = new Uns32T[H::k]; // temporary array storage
mas01mc@764 349
mas01mc@764 350 // Copy the hash bucket identifiers
mas01mc@764 351 Uns32T* mpgPtr = mpg;
mas01mc@764 352 Uns32T kk = H::k;
mas01mc@764 353 while(kk--)
mas01mc@764 354 *mpgPtr++ = *g++;
mas01mc@764 355
mas01mc@764 356 // Retrieve the next purturbation set
mas01mc@764 357 perturbation_set ps = multiProbePtr->getNextPerturbationSet();
mas01mc@764 358 perturbation_set::iterator it = ps.begin();
mas01mc@764 359
mas01mc@764 360 // Perturbate the hash functions g
mas01mc@764 361 while( it != ps.end() ){
mas01mc@764 362 *(mpg + multiProbePtr->getIndex(it)) += multiProbePtr->getBoundary(it);
mas01mc@764 363 it++;
mas01mc@764 364 }
mas01mc@764 365
mas01mc@764 366 H::t1 = computeProductModDefaultPrime( mpg, r1, H::k ) % H::N;
mas01mc@764 367 H::t2 = computeProductModDefaultPrime( mpg, r2, H::k );
mas01mc@764 368
mas01mc@764 369 delete[] mpg; // free up temporary storage
mas01mc@764 370 }
mas01mc@764 371
mas01mc@764 372
mas01mc@292 373 #define CR_ASSERT(b){if(!(b)){fprintf(stderr, "ASSERT failed on line %d, file %s.\n", __LINE__, __FILE__); exit(1);}}
mas01mc@292 374
mas01mc@292 375 // Computes (a.b) mod UH_PRIME_DEFAULT
mas01mc@293 376 inline Uns32T H::computeProductModDefaultPrime(Uns32T *a, Uns32T *b, IntT size){
mas01mc@292 377 LongUns64T h = 0;
mas01mc@292 378
mas01mc@292 379 for(IntT i = 0; i < size; i++){
mas01mc@292 380 h = h + (LongUns64T)a[i] * (LongUns64T)b[i];
mas01mc@292 381 h = (h & TWO_TO_32_MINUS_1) + 5 * (h >> 32);
mas01mc@292 382 if (h >= UH_PRIME_DEFAULT) {
mas01mc@292 383 h = h - UH_PRIME_DEFAULT;
mas01mc@292 384 }
mas01mc@292 385 CR_ASSERT(h < UH_PRIME_DEFAULT);
mas01mc@292 386 }
mas01mc@292 387 return h;
mas01mc@292 388 }
mas01mc@292 389
mas01mc@292 390 Uns32T H::bucket_insert_point(bucket **pp){
mas01mc@296 391 collisionCount = 0;
mas01mc@340 392 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 393 if(!*pp){
mas01mc@292 394 *pp = new bucket();
mas01mc@292 395 (*pp)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@292 396 (*pp)->next = new bucket();
mas01mc@292 397 }
mas01mc@340 398 // The list head holds point collision count
mas01mc@340 399 if( (*pp)->t2 & LSH_CORE_ARRAY_BIT )
mas01mc@340 400 bucket_insert_point(get_pointer_to_bucket_linked_list(*pp)); // recurse
mas01mc@340 401 else{
mas01mc@340 402 collisionCount = (*pp)->t2;
mas01mc@340 403 if(collisionCount < H::C){ // Block if row is full
mas01mc@340 404 (*pp)->t2++; // Increment collision counter (numPoints in row)
mas01mc@340 405 pointCount++;
mas01mc@340 406 collisionCount++;
mas01mc@340 407 // Locate the bucket linked list
mas01mc@340 408 __bucket_insert_point( (*pp)->next );
mas01mc@340 409 }
mas01mc@292 410 }
mas01mc@340 411 #else // NOT USING LSH_LIST_HEAD_COUNTERS
mas01mc@340 412 if(!*pp)
mas01mc@340 413 *pp = new bucket();
mas01mc@296 414 pointCount++;
mas01mc@296 415 __bucket_insert_point(*pp); // No collision count storage
mas01mc@292 416 #endif
mas01mc@292 417 return collisionCount;
mas01mc@292 418 }
mas01mc@292 419
mas01mc@340 420 // insert points into hashtable row collision chain
mas01mc@292 421 void H::__bucket_insert_point(bucket* p){
mas01mc@292 422 if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
mas01mc@292 423 p->t2 = H::t2;
mas01mc@292 424 bucketCount++; // Record start of new point-locale collision chain
mas01mc@344 425 p->snext.ptr = new sbucket();
mas01mc@344 426 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 427 return;
mas01mc@292 428 }
mas01mc@292 429
mas01mc@292 430 if(p->t2 == H::t2){
mas01mc@344 431 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 432 return;
mas01mc@292 433 }
mas01mc@764 434
mas01mc@764 435 // insert bucket before current bucket
mas01mc@764 436 if(H::t2 < p->t2){
mas01mc@764 437 bucket* tmp = new bucket();
mas01mc@764 438 // copy current bucket contents into new bucket
mas01mc@764 439 tmp->next = p->next;
mas01mc@764 440 tmp->t2 = p->t2;
mas01mc@764 441 tmp->snext.ptr = p->snext.ptr;
mas01mc@764 442 p->next = tmp;
mas01mc@764 443 p->t2 = IFLAG;
mas01mc@764 444 p->snext.ptr=0;
mas01mc@764 445 __bucket_insert_point(p);
mas01mc@764 446 return;
mas01mc@764 447 }
mas01mc@292 448
mas01mc@764 449 if(p->next)
mas01mc@764 450 __bucket_insert_point(p->next);
mas01mc@340 451 else {
mas01mc@292 452 p->next = new bucket();
mas01mc@292 453 __bucket_insert_point(p->next);
mas01mc@292 454 }
mas01mc@292 455 }
mas01mc@292 456
mas01mc@340 457 // insert points into point-locale collision chain
mas01mc@292 458 void H::__sbucket_insert_point(sbucket* p){
mas01mc@292 459 if(p->pointID==IFLAG){
mas01mc@292 460 p->pointID = H::p;
mas01mc@292 461 return;
mas01mc@292 462 }
mas01mc@340 463
mas01mc@292 464 // Search for pointID
mas01mc@340 465 if(p->snext){
mas01mc@292 466 __sbucket_insert_point(p->snext);
mas01mc@292 467 }
mas01mc@292 468 else{
mas01mc@340 469 // Make new point collision bucket at end of list
mas01mc@340 470 p->snext = new sbucket();
mas01mc@340 471 __sbucket_insert_point(p->snext);
mas01mc@292 472 }
mas01mc@292 473 }
mas01mc@292 474
mas01mc@293 475 inline bucket** H::get_bucket(int j){
mas01mc@292 476 return *(h+j);
mas01mc@292 477 }
mas01mc@292 478
mas01mc@340 479 // Find the linked-list pointer at the end of the CORE_ARRAY
mas01mc@340 480 bucket** H::get_pointer_to_bucket_linked_list(bucket* rowPtr){
mas01mc@344 481 Uns32T numBuckets = rowPtr->snext.numBuckets; // Cast pointer to unsigned int
mas01mc@343 482 Uns32T numPoints = rowPtr->t2 & 0x7FFFFFFF; // Value is stored in low 31 bits of t2 field
mas01mc@343 483 bucket** listPtr = reinterpret_cast<bucket**> (reinterpret_cast<unsigned int*>(rowPtr->next)+numPoints+numBuckets+1);
mas01mc@343 484 return listPtr;
mas01mc@340 485 }
mas01mc@340 486
mas01mc@293 487 // Interface to Locality Sensitive Hashing G
mas01mc@293 488 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
mas01mc@293 489 H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
mas01mc@308 490 indexName(0),
mas01mc@293 491 lshHeader(0),
mas01mc@292 492 calling_instance(0),
mas01mc@293 493 add_point_callback(0)
mas01mc@292 494 {
mas01mc@293 495
mas01mc@292 496 }
mas01mc@292 497
mas01mc@292 498 // Serialize from file LSH constructor
mas01mc@292 499 // Read parameters from database file
mas01mc@292 500 // Load the hash functions, close the database
mas01mc@292 501 // Optionally load the LSH tables into head-allocated lists in core
mas01mc@292 502 G::G(char* filename, bool lshInCoreFlag):
mas01mc@293 503 H(), // default base-class constructor call delays data-structure initialization
mas01mc@309 504 indexName(0),
mas01mc@293 505 lshHeader(0),
mas01mc@292 506 calling_instance(0),
mas01mc@292 507 add_point_callback(0)
mas01mc@292 508 {
mas01mc@474 509 FILE* dbFile = 0;
mas01mc@292 510 int dbfid = unserialize_lsh_header(filename);
mas01mc@764 511
mas01mc@309 512 indexName = new char[O2_INDEX_MAXSTR];
mas01mc@309 513 strncpy(indexName, filename, O2_INDEX_MAXSTR); // COPY THE CONTENTS TO THE NEW POINTER
mas01mc@293 514 H::initialize_lsh_functions(); // Base-class data-structure initialization
mas01mc@293 515 unserialize_lsh_functions(dbfid); // populate with on-disk hashfunction values
mas01mc@292 516
mas01mc@292 517 // Format1 only needs unserializing if specifically requested
mas01mc@292 518 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){
mas01mc@292 519 unserialize_lsh_hashtables_format1(dbfid);
mas01mc@292 520 }
mas01mc@292 521
mas01mc@292 522 // Format2 always needs unserializing
mas01mc@292 523 if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){
mas01mc@474 524 dbFile = fdopen(dbfid, "rb");
mas01mc@336 525 if(!dbFile)
mas01mc@336 526 error("Cannot open LSH file for reading", filename);
mas01mc@336 527 unserialize_lsh_hashtables_format2(dbFile);
mas01mc@292 528 }
mas01mc@336 529 serial_close(dbfid);
mas01mc@474 530 if(dbFile){
mas01mc@474 531 fclose(dbFile);
mas01mc@474 532 dbFile = 0;
mas01mc@474 533 }
mas01mc@336 534 }
mas01mc@292 535
mas01mc@292 536 G::~G(){
mas01mc@292 537 delete lshHeader;
mas01mc@474 538 delete[] indexName;
mas01mc@292 539 }
mas01mc@292 540
mas01mc@292 541 // single point insertion; inserted values are hash value and pointID
mas01mc@292 542 Uns32T G::insert_point(vector<float>& v, Uns32T pp){
mas01mc@292 543 Uns32T collisionCount = 0;
mas01mc@292 544 H::p = pp;
mas01mc@299 545 if(H::maxp && pp<=H::maxp)
mas01mc@296 546 error("points must be indexed in strict ascending order", "LSH::insert_point(vector<float>&, Uns32T pointID)");
mas01mc@296 547 H::maxp=pp; // Store highest pointID in database
mas01mc@293 548 H::compute_hash_functions( v );
mas01mc@292 549 for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion
mas01mc@293 550 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@292 551 collisionCount += bucket_insert_point( *(h + j) + t1 );
mas01mc@292 552 }
mas01mc@292 553 return collisionCount;
mas01mc@292 554 }
mas01mc@292 555
mas01mc@292 556
mas01mc@292 557 // batch insert for a point set
mas01mc@292 558 // inserted values are vector hash value and pointID starting at basePointID
mas01mc@292 559 void G::insert_point_set(vector<vector<float> >& vv, Uns32T basePointID){
mas01mc@292 560 for(Uns32T point=0; point<vv.size(); point++)
mas01mc@292 561 insert_point(vv[point], basePointID+point);
mas01mc@292 562 }
mas01mc@292 563
mas01mc@292 564 // point retrieval routine
mas01mc@292 565 void G::retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@764 566 // assert(LSH_MULTI_PROBE_COUNT);
mas01mc@292 567 calling_instance = caller;
mas01mc@292 568 add_point_callback = add_point;
mas01mc@293 569 H::compute_hash_functions( v );
mas01mc@292 570 for(Uns32T j = 0 ; j < H::L ; j++ ){
mas01mc@764 571 // MultiProbe loop
mas01mc@764 572 multiProbePtr->generatePerturbationSets( *( H::boundaryDistances + j ) , 2*H::k, (unsigned)LSH_MULTI_PROBE_COUNT);
mas01mc@764 573 for(Uns32T multiProbeIdx = 0 ; multiProbeIdx < multiProbePtr->size()+1 ; multiProbeIdx++ ){
mas01mc@764 574 if(!multiProbeIdx)
mas01mc@764 575 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@764 576 else
mas01mc@764 577 H::generate_multiprobe_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@764 578 if( bucket* bPtr = *(get_bucket(j) + get_t1()) ) {
mas01mc@340 579 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@764 580 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@764 581 retrieve_from_core_hashtable_array((Uns32T*)(bPtr->next), qpos);
mas01mc@764 582 } else {
mas01mc@764 583 bucket_chain_point( bPtr->next, qpos);
mas01mc@764 584 }
mas01mc@764 585 #else
mas01mc@764 586 bucket_chain_point( bPtr , qpos);
mas01mc@764 587 #endif
mas01cr@370 588 }
mas01cr@370 589 }
mas01mc@292 590 }
mas01mc@292 591 }
mas01mc@292 592
mas01mc@292 593 void G::retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 594 for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ )
mas01mc@292 595 retrieve_point(vv[qpos], qpos, add_point, caller);
mas01mc@292 596 }
mas01mc@292 597
mas01mc@292 598 // export lsh tables to table structure on disk
mas01mc@292 599 //
mas01mc@292 600 // LSH TABLE STRUCTURE
mas01mc@292 601 // ---header 64 bytes ---
mas01mc@292 602 // [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0]
mas01mc@292 603 //
mas01mc@292 604 // ---random projections L x k x d float ---
mas01mc@292 605 // A[0][0][0] A[0][0][1] ... A[0][0][d-1]
mas01mc@292 606 // A[0][1][0] A[0][1][1] ... A[1][1][d-1]
mas01mc@292 607 // ...
mas01mc@292 608 // A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1]
mas01mc@292 609 // ...
mas01mc@292 610 // ...
mas01mc@292 611 // A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1]
mas01mc@292 612 // A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1]
mas01mc@292 613 // ...
mas01mc@292 614 // A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1]
mas01mc@292 615 //
mas01mc@292 616 // ---bias L x k float ---
mas01mc@292 617 // b[0][0] b[0][1] ... b[0][k-1]
mas01mc@292 618 // b[1][0] b[1][1] ... b[1][k-1]
mas01mc@292 619 // ...
mas01mc@292 620 // b[L-1][0] b[L-1][1] ... b[L-1][k-1]
mas01mc@292 621 //
mas01mc@292 622 // ---random r1 L x k float ---
mas01mc@292 623 // r1[0][0] r1[0][1] ... r1[0][k-1]
mas01mc@292 624 // r1[1][0] r1[1][1] ... r1[1][k-1]
mas01mc@292 625 // ...
mas01mc@292 626 // r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1]
mas01mc@292 627 //
mas01mc@292 628 // ---random r2 L x k float ---
mas01mc@292 629 // r2[0][0] r2[0][1] ... r2[0][k-1]
mas01mc@292 630 // r2[1][0] r2[1][1] ... r2[1][k-1]
mas01mc@292 631 // ...
mas01mc@292 632 // r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1]
mas01mc@292 633 //
mas01mc@293 634 // ******* HASHTABLES FORMAT1 (optimized for LSH_ON_DISK retrieval) *******
mas01mc@292 635 // ---hash table 0: N x C x 8 ---
mas01mc@292 636 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 637 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 638 // ...
mas01mc@292 639 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 640 //
mas01mc@292 641 // ---hash table 1: N x C x 8 ---
mas01mc@292 642 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 643 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 644 // ...
mas01mc@292 645 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 646 //
mas01mc@292 647 // ...
mas01mc@292 648 //
mas01mc@292 649 // ---hash table L-1: N x C x 8 ---
mas01mc@292 650 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 651 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 652 // ...
mas01mc@292 653 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 654 //
mas01mc@293 655 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) *******
mas01mc@293 656 //
mas01mc@293 657 // State machine controlled by regular expression.
mas01mc@293 658 // legend:
mas01mc@293 659 //
mas01mc@306 660 // O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU
mas01mc@306 661 // O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU
mas01mc@306 662 // O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU
mas01mc@293 663 //
mas01mc@306 664 // T1 - T1 hash token
mas01mc@306 665 // t1 - t1 hash key (t1 range 0..2^29-1)
mas01mc@764 666 // %buckets+points% numElements in row for ARRAY encoding
mas01mc@306 667 // T2 - T2 token
mas01mc@293 668 // t2 - t2 hash key (range 1..2^32-6)
mas01mc@293 669 // p - point identifier (range 0..2^32-1)
mas01mc@306 670 // E - end hash table token
mas01mc@293 671 // {...} required arguments
mas01mc@293 672 // [...] optional arguments
mas01mc@293 673 // * - match zero or more occurences
mas01mc@293 674 // + - match one or more occurences
mas01mc@293 675 // {...}^L - repeat argument L times
mas01mc@293 676 //
mas01mc@293 677 // FORMAT2 Regular expression:
mas01mc@764 678 // { [T1 t1 %buckets+points% [T2 t2 p+]+ ]* E }^L
mas01mc@293 679 //
mas01mc@292 680
mas01mc@292 681 // Serial header constructors
mas01mc@292 682 SerialHeader::SerialHeader(){;}
mas01mc@296 683 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc):
mas01mc@292 684 lshMagic(O2_SERIAL_MAGIC),
mas01mc@292 685 binWidth(W),
mas01mc@292 686 numTables(L),
mas01mc@292 687 numRows(N),
mas01mc@292 688 numCols(C),
mas01mc@292 689 elementSize(O2_SERIAL_ELEMENT_SIZE),
mas01mc@296 690 version(O2_SERIAL_VERSION),
mas01mc@296 691 size(0), // we are deprecating this value
mas01mc@292 692 flags(FMT),
mas01mc@292 693 dataDim(d),
mas01mc@292 694 numFuns(k),
mas01mc@292 695 radius(r),
mas01mc@296 696 maxp(p),
mas01mc@296 697 size_long((unsigned long long)L * align_up((unsigned long long)N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables
mas01mc@296 698 + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions
mas01mc@296 699 (unsigned long long)L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())),
mas01mc@296 700 pointCount(pc){
mas01mc@296 701
mas01mc@296 702 if(FMT==O2_SERIAL_FILEFORMAT2)
mas01mc@296 703 size_long = (unsigned long long)align_up(O2_SERIAL_HEADER_SIZE
mas01mc@296 704 + (unsigned long long)L*k*(sizeof(float)*d+2+sizeof(Uns32T)
mas01mc@296 705 +sizeof(float)) + (unsigned long long)pc*16UL,get_page_logn());
mas01mc@296 706 } // header
mas01mc@292 707
mas01mc@292 708 float* G::get_serial_hashfunction_base(char* db){
mas01mc@292 709 if(db&&lshHeader)
mas01mc@292 710 return (float*)(db+O2_SERIAL_HEADER_SIZE);
mas01mc@292 711 else return NULL;
mas01mc@292 712 }
mas01mc@292 713
mas01mc@292 714 SerialElementT* G::get_serial_hashtable_base(char* db){
mas01mc@292 715 if(db&&lshHeader)
mas01mc@292 716 return (SerialElementT*)(db+get_serial_hashtable_offset());
mas01mc@292 717 else
mas01mc@292 718 return NULL;
mas01mc@292 719 }
mas01mc@292 720
mas01mc@292 721 Uns32T G::get_serial_hashtable_offset(){
mas01mc@292 722 if(lshHeader)
mas01mc@292 723 return align_up(O2_SERIAL_HEADER_SIZE +
mas01mc@292 724 L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn());
mas01mc@292 725 else
mas01mc@292 726 return 0;
mas01mc@292 727 }
mas01mc@292 728
mas01mc@292 729 void G::serialize(char* filename, Uns32T serialFormat){
mas01mc@292 730 int dbfid;
mas01mc@764 731 char* db;
mas01mc@292 732 int dbIsNew=0;
mas01mc@474 733 FILE* dbFile = 0;
mas01mc@292 734 // Check requested serialFormat
mas01mc@292 735 if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2))
mas01mc@292 736 error("Unrecognized serial file format request: ", "serialize()");
mas01mc@296 737
mas01mc@292 738 // Test to see if file exists
mas01cr@370 739 if((dbfid = open (filename, O_RDONLY)) < 0) {
mas01mc@292 740 // If it doesn't, then create the file (CREATE)
mas01cr@370 741 if(errno == ENOENT) {
mas01mc@292 742 // Create the file
mas01mc@292 743 std::cout << "Creating new serialized LSH database:" << filename << "...";
mas01mc@292 744 std::cout.flush();
mas01mc@292 745 serial_create(filename, serialFormat);
mas01mc@292 746 dbIsNew=1;
mas01cr@370 747 } else {
mas01mc@292 748 // The file can't be opened
mas01mc@292 749 error("Can't open the file", filename, "open");
mas01cr@370 750 }
mas01cr@370 751 }
mas01mc@292 752
mas01mc@292 753 // Load the on-disk header into core
mas01mc@292 754 dbfid = serial_open(filename, 1); // open for write
mas01mc@764 755 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@764 756 serial_get_header(db); // read header
mas01mc@764 757 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 758
mas01mc@292 759 // Check compatibility of core and disk data structures
mas01mc@292 760 if( !serial_can_merge(serialFormat) )
mas01mc@292 761 error("Incompatible core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 762
mas01mc@292 763 // For new LSH databases write the hashfunctions
mas01mc@292 764 if(dbIsNew)
mas01mc@292 765 serialize_lsh_hashfunctions(dbfid);
mas01mc@292 766 // Write the hashtables in the requested format
mas01mc@292 767 if(serialFormat == O2_SERIAL_FILEFORMAT1)
mas01mc@292 768 serialize_lsh_hashtables_format1(dbfid, !dbIsNew);
mas01mc@336 769 else{
mas01mc@474 770 dbFile = fdopen(dbfid, "r+b");
mas01mc@336 771 if(!dbFile)
mas01mc@336 772 error("Cannot open LSH file for writing",filename);
mas01mc@336 773 serialize_lsh_hashtables_format2(dbFile, !dbIsNew);
mas01mc@336 774 fflush(dbFile);
mas01mc@336 775 }
mas01mc@292 776
mas01mc@336 777 if(!dbIsNew) {
mas01mc@764 778 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@764 779 //serial_get_header(db); // read header
mas01mc@293 780 cout << "maxp = " << H::maxp << endl;
mas01mc@293 781 lshHeader->maxp=H::maxp;
mas01mc@292 782 // Default to FILEFORMAT1
mas01mc@292 783 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2))
mas01mc@294 784 lshHeader->flags|=O2_SERIAL_FILEFORMAT1;
mas01mc@764 785 memcpy((char*)db, (char*)lshHeader, sizeof(SerialHeaderT));
mas01mc@764 786 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@336 787 }
mas01mc@336 788 serial_close(dbfid);
mas01mc@474 789 if(dbFile){
mas01mc@474 790 fclose(dbFile);
mas01mc@474 791 dbFile = 0;
mas01mc@474 792 }
mas01mc@292 793 }
mas01mc@292 794
mas01mc@292 795 // Test to see if core structure and requested format is
mas01mc@292 796 // compatible with currently opened database
mas01mc@292 797 int G::serial_can_merge(Uns32T format){
mas01mc@292 798 SerialHeaderT* that = lshHeader;
mas01mc@292 799 if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 800 || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 801 || !( this->w == that->binWidth &&
mas01mc@296 802 this->L == that->numTables &&
mas01mc@296 803 this->N == that->numRows &&
mas01mc@296 804 this->k == that->numFuns &&
mas01mc@296 805 this->d == that->dataDim &&
mas01mc@296 806 sizeof(SerialElementT) == that->elementSize &&
mas01mc@296 807 this->radius == that->radius)){
mas01mc@292 808 serial_print_header(format);
mas01mc@292 809 return 0;
mas01mc@292 810 }
mas01mc@292 811 else
mas01mc@292 812 return 1;
mas01mc@292 813 }
mas01mc@292 814
mas01mc@292 815 // Used as an error message for serial_can_merge()
mas01mc@292 816 void G::serial_print_header(Uns32T format){
mas01mc@292 817 std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl;
mas01mc@292 818 std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl;
mas01mc@292 819 std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl;
mas01mc@292 820 std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl;
mas01mc@292 821 std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl;
mas01mc@292 822 std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl;
mas01mc@292 823 std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl;
mas01mc@292 824 std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl;
mas01mc@292 825 }
mas01mc@292 826
mas01mc@292 827 int G::serialize_lsh_hashfunctions(int fid){
mas01mc@292 828 float* pf;
mas01mc@292 829 Uns32T *pu;
mas01mc@292 830 Uns32T x,y,z;
mas01mc@292 831
mas01mc@764 832 char* db = serial_mmap(fid, get_serial_hashtable_offset(), 1);// get database pointer
mas01mc@764 833 pf = get_serial_hashfunction_base(db);
mas01mc@764 834
mas01mc@292 835 // HASH FUNCTIONS
mas01mc@292 836 // Write the random projectors A[][][]
mas01mc@292 837 #ifdef USE_U_FUNCTIONS
mas01mc@292 838 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 839 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 840 #else
mas01cr@648 841 for( x = 0 ; x < H::L ; x++ )
mas01cr@648 842 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 843 #endif
mas01cr@648 844 for( z = 0 ; z < d ; z++ )
mas01cr@648 845 *pf++ = H::A[x][y][z];
mas01mc@292 846
mas01mc@292 847 // Write the random biases b[][]
mas01mc@292 848 #ifdef USE_U_FUNCTIONS
mas01mc@292 849 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 850 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 851 #else
mas01cr@648 852 for( x = 0 ; x < H::L ; x++ )
mas01cr@648 853 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 854 #endif
mas01cr@648 855 *pf++ = H::b[x][y];
mas01mc@292 856
mas01mc@292 857 pu = (Uns32T*)pf;
mas01mc@292 858
mas01mc@292 859 // Write the Z projectors r1[][]
mas01mc@292 860 for( x = 0 ; x < H::L ; x++)
mas01mc@292 861 for( y = 0 ; y < H::k ; y++)
mas01mc@293 862 *pu++ = H::r1[x][y];
mas01mc@292 863
mas01mc@292 864 // Write the Z projectors r2[][]
mas01mc@292 865 for( x = 0 ; x < H::L ; x++)
mas01mc@292 866 for( y = 0; y < H::k ; y++)
mas01mc@293 867 *pu++ = H::r2[x][y];
mas01cr@648 868
mas01mc@764 869 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 870 return 1;
mas01mc@292 871 }
mas01mc@292 872
mas01mc@292 873 int G::serialize_lsh_hashtables_format1(int fid, int merge){
mas01mc@292 874 SerialElementT *pe, *pt;
mas01mc@292 875 Uns32T x,y;
mas01mc@292 876
mas01mc@292 877 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) )
mas01mc@292 878 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 879
mas01mc@764 880 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 881 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount;
mas01mc@292 882 // Write the hash tables
mas01mc@292 883 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 884 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1...";
mas01mc@292 885 std::cout.flush();
mas01mc@764 886 // memory map a single hash table for sequential access
mas01mc@764 887 // Align each hash table to page boundary
mas01mc@764 888 char* dbtable = serial_mmap(fid, hashTableSize, 1,
mas01mc@764 889 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@764 890 #ifdef __CYGWIN__
mas01mc@764 891 // No madvise in CYGWIN
mas01mc@764 892 #else
mas01mc@764 893 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@764 894 error("could not advise hashtable memory","","madvise");
mas01mc@764 895 #endif
mas01mc@292 896 maxColCount=0;
mas01mc@292 897 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 898 meanColCount=0;
mas01mc@292 899 colCountN=0;
mas01mc@764 900 pt=(SerialElementT*)dbtable;
mas01mc@292 901 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 902 // Move disk pointer to beginning of row
mas01mc@292 903 pe=pt+y*lshHeader->numCols;
mas01mc@292 904
mas01mc@292 905 colCount=0;
mas01cr@370 906 if(bucket* bPtr = h[x][y]) {
mas01cr@370 907 if(merge) {
mas01mc@340 908 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 909 serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01cr@370 910 } else {
mas01mc@292 911 serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 912 #else
mas01cr@370 913 serial_merge_hashtable_row_format1(pe, bPtr, colCount);
mas01cr@370 914 } else {
mas01cr@370 915 serial_write_hashtable_row_format1(pe, bPtr, colCount);
mas01mc@292 916 #endif
mas01cr@370 917 }
mas01cr@370 918 }
mas01mc@292 919 if(colCount){
mas01mc@292 920 if(colCount<minColCount)
mas01mc@292 921 minColCount=colCount;
mas01mc@292 922 if(colCount>maxColCount)
mas01mc@292 923 maxColCount=colCount;
mas01mc@292 924 meanColCount+=colCount;
mas01mc@292 925 colCountN++;
mas01mc@292 926 }
mas01mc@292 927 }
mas01mc@292 928 if(colCountN)
mas01mc@292 929 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 930 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 931 << endl;
mas01mc@764 932 serial_munmap(dbtable, hashTableSize);
mas01mc@292 933 }
mas01mc@292 934
mas01mc@292 935 // We're done writing
mas01mc@292 936 return 1;
mas01mc@292 937 }
mas01mc@292 938
mas01mc@292 939 void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){
mas01mc@292 940 while(b && b->t2!=IFLAG){
mas01mc@292 941 SerialElementT*pe=pr; // reset disk pointer to beginning of row
mas01mc@344 942 serial_merge_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 943 b=b->next;
mas01mc@292 944 }
mas01mc@292 945 }
mas01mc@292 946
mas01mc@292 947 void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 948 while(sb){
mas01mc@292 949 if(colCount==lshHeader->numCols){
mas01mc@292 950 std::cout << "!point-chain full " << endl;
mas01mc@292 951 return;
mas01mc@292 952 }
mas01mc@292 953 Uns32T c=0;
mas01mc@292 954 // Merge collision chains
mas01mc@292 955 while(c<lshHeader->numCols){
mas01mc@292 956 if( (pe+c)->hashValue==IFLAG){
mas01mc@292 957 (pe+c)->hashValue=t2;
mas01mc@292 958 (pe+c)->pointID=sb->pointID;
mas01mc@292 959 colCount=c+1;
mas01mc@292 960 if(c+1<lshHeader->numCols)
mas01mc@292 961 (pe+c+1)->hashValue=IFLAG;
mas01mc@292 962 break;
mas01mc@292 963 }
mas01mc@292 964 c++;
mas01mc@292 965 }
mas01mc@292 966 sb=sb->snext;
mas01mc@292 967 }
mas01mc@292 968 return;
mas01mc@292 969 }
mas01mc@292 970
mas01mc@292 971 void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){
mas01mc@292 972 pe->hashValue=IFLAG;
mas01mc@292 973 while(b && b->t2!=IFLAG){
mas01mc@344 974 serial_write_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 975 b=b->next;
mas01mc@292 976 }
mas01mc@292 977 }
mas01mc@292 978
mas01mc@292 979 void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 980 while(sb){
mas01mc@292 981 if(colCount==lshHeader->numCols){
mas01mc@292 982 std::cout << "!point-chain full " << endl;
mas01mc@292 983 return;
mas01mc@292 984 }
mas01mc@292 985 pe->hashValue=t2;
mas01mc@292 986 pe->pointID=sb->pointID;
mas01mc@292 987 pe++;
mas01mc@292 988 colCount++;
mas01mc@292 989 sb=sb->snext;
mas01mc@292 990 }
mas01mc@292 991 pe->hashValue=IFLAG;
mas01mc@292 992 return;
mas01mc@292 993 }
mas01mc@292 994
mas01mc@336 995 int G::serialize_lsh_hashtables_format2(FILE* dbFile, int merge){
mas01mc@292 996 Uns32T x,y;
mas01mc@292 997
mas01mc@292 998 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
mas01mc@292 999 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 1000
mas01mc@340 1001 // We must pereform FORMAT2 merges in core FORMAT1 (dynamic list structure)
mas01mc@292 1002 if(merge)
mas01mc@340 1003 unserialize_lsh_hashtables_format2(dbFile, merge);
mas01mc@292 1004
mas01mc@292 1005 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
mas01mc@336 1006 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 1007 fclose(dbFile);
mas01mc@336 1008 error("fSeek error in serialize_lsh_hashtables_format2");
mas01mc@336 1009 }
mas01mc@292 1010
mas01mc@292 1011 // Write the hash tables
mas01mc@292 1012 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 1013 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2...";
mas01mc@292 1014 std::cout.flush();
mas01mc@292 1015 maxColCount=0;
mas01mc@292 1016 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 1017 meanColCount=0;
mas01mc@292 1018 colCountN=0;
mas01mc@764 1019 H::tablesPointCount = 0;
mas01mc@292 1020 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 1021 colCount=0;
mas01mc@292 1022 if(bucket* bPtr = h[x][y]){
mas01mc@306 1023 // Check for empty row (even though row was allocated)
mas01mc@340 1024 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@306 1025 if(bPtr->next->t2==IFLAG){
mas01mc@336 1026 fclose(dbFile);
mas01mc@306 1027 error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 1028 }
mas01mc@306 1029 #else
mas01mc@306 1030 if(bPtr->t2==IFLAG){
mas01mc@336 1031 fclose(dbFile);
mas01mc@306 1032 error("b->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 1033 }
mas01mc@306 1034 #endif
mas01mc@306 1035 t1 = O2_SERIAL_TOKEN_T1;
mas01mc@340 1036 WRITE_UNS32(&t1, "[T1]");
mas01mc@306 1037 t1 = y;
mas01mc@340 1038 WRITE_UNS32(&t1, "[t1]");
mas01mc@340 1039 #ifdef LSH_CORE_ARRAY
mas01mc@340 1040 t1 = count_buckets_and_points_hashtable_row(bPtr);
mas01mc@340 1041 WRITE_UNS32(&t1,"[count]"); // write numElements
mas01mc@340 1042 #endif
mas01mc@340 1043 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@336 1044 serial_write_hashtable_row_format2(dbFile, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 1045 #else
mas01mc@340 1046 serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
mas01mc@292 1047 #endif
mas01mc@292 1048 }
mas01mc@292 1049 if(colCount){
mas01mc@292 1050 if(colCount<minColCount)
mas01mc@292 1051 minColCount=colCount;
mas01mc@292 1052 if(colCount>maxColCount)
mas01mc@292 1053 maxColCount=colCount;
mas01mc@292 1054 meanColCount+=colCount;
mas01mc@292 1055 colCountN++;
mas01mc@292 1056 }
mas01mc@764 1057 H::tablesPointCount+=colCount;
mas01mc@292 1058 }
mas01mc@292 1059 // Write END of table marker
mas01mc@306 1060 t1 = O2_SERIAL_TOKEN_ENDTABLE;
mas01mc@340 1061 WRITE_UNS32(&t1,"[end]");
mas01mc@292 1062 if(colCountN)
mas01mc@764 1063 std::cout << "#points: " << H::tablesPointCount << " #rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 1064 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 1065 << endl;
mas01mc@340 1066 }
mas01mc@292 1067 // We're done writing
mas01mc@292 1068 return 1;
mas01mc@292 1069 }
mas01mc@292 1070
mas01mc@340 1071 Uns32T G::count_buckets_and_points_hashtable_row(bucket* bPtr){
mas01mc@340 1072 Uns32T total_count = 0;
mas01mc@340 1073 bucket* p = 0;
mas01mc@340 1074
mas01mc@340 1075 // count points
mas01mc@340 1076 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1077 total_count = bPtr->t2; // points already counted
mas01mc@340 1078 p = bPtr->next;
mas01mc@340 1079 #else
mas01mc@340 1080 total_count = count_points_hashtable_row(bPtr);
mas01mc@340 1081 p = bPtr;
mas01mc@340 1082 #endif
mas01mc@340 1083
mas01mc@340 1084 // count buckets
mas01mc@340 1085 do{
mas01mc@340 1086 total_count++;
mas01mc@340 1087 }while((p=p->next));
mas01mc@340 1088
mas01mc@340 1089 return total_count;
mas01mc@340 1090 }
mas01mc@340 1091
mas01mc@340 1092 Uns32T G::count_points_hashtable_row(bucket* bPtr){
mas01mc@340 1093 Uns32T point_count = 0;
mas01mc@340 1094 bucket* p = bPtr;
mas01mc@340 1095 sbucket* s = 0;
mas01mc@340 1096 while(p){
mas01mc@344 1097 s = p->snext.ptr;
mas01mc@340 1098 while(s){
mas01mc@340 1099 point_count++;
mas01mc@340 1100 s=s->snext;
mas01mc@340 1101 }
mas01mc@340 1102 p=p->next;
mas01mc@340 1103 }
mas01mc@340 1104 return point_count;
mas01mc@340 1105 }
mas01mc@340 1106
mas01mc@336 1107 void G::serial_write_hashtable_row_format2(FILE* dbFile, bucket* b, Uns32T& colCount){
mas01mc@764 1108 #ifdef _LSH_DEBUG_
mas01mc@764 1109 Uns32T last_t2 = 0;
mas01mc@764 1110 #endif
mas01mc@292 1111 while(b && b->t2!=IFLAG){
mas01mc@344 1112 if(!b->snext.ptr){
mas01mc@336 1113 fclose(dbFile);
mas01mc@306 1114 error("Empty collision chain in serial_write_hashtable_row_format2()");
mas01mc@306 1115 }
mas01mc@306 1116 t2 = O2_SERIAL_TOKEN_T2;
mas01mc@764 1117
mas01mc@336 1118 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1119 fclose(dbFile);
mas01mc@292 1120 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1121 }
mas01mc@292 1122 t2 = b->t2;
mas01mc@764 1123 #ifdef _LSH_DEBUG_
mas01mc@764 1124 if(t2 < last_t2){
mas01mc@764 1125 fclose(dbFile);
mas01mc@764 1126 error("t2<last_t2 in serial_write_hashtable_row_format2()");
mas01mc@764 1127 }
mas01mc@764 1128 last_t2 = t2;
mas01mc@764 1129 #endif
mas01mc@764 1130
mas01mc@336 1131 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1132 fclose(dbFile);
mas01mc@292 1133 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1134 }
mas01mc@344 1135 serial_write_element_format2(dbFile, b->snext.ptr, colCount);
mas01mc@292 1136 b=b->next;
mas01mc@292 1137 }
mas01mc@292 1138 }
mas01mc@292 1139
mas01mc@336 1140 void G::serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount){
mas01mc@292 1141 while(sb){
mas01mc@336 1142 if(fwrite(&sb->pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1143 fclose(dbFile);
mas01mc@292 1144 error("Write error in serial_write_element_format2()");
mas01mc@292 1145 }
mas01mc@292 1146 colCount++;
mas01mc@292 1147 sb=sb->snext;
mas01mc@292 1148 }
mas01mc@292 1149 }
mas01mc@292 1150
mas01mc@292 1151
mas01mc@292 1152 int G::serial_create(char* filename, Uns32T FMT){
mas01mc@292 1153 return serial_create(filename, w, L, N, C, k, d, FMT);
mas01mc@292 1154 }
mas01mc@292 1155
mas01mc@292 1156
mas01mc@292 1157 int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols,
mas01mc@292 1158 Uns32T numFuns, Uns32T dim, Uns32T FMT){
mas01mc@292 1159
mas01mc@292 1160 if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS
mas01mc@292 1161 || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS
mas01mc@292 1162 || dim>O2_SERIAL_MAX_DIM){
mas01mc@292 1163 error("LSH parameters out of bounds for serialization");
mas01mc@292 1164 }
mas01mc@292 1165
mas01mc@292 1166 int dbfid;
mas01cr@648 1167 if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL,
mas01cr@654 1168 #if defined(WIN32)
mas01cr@648 1169 _S_IREAD|_S_IWRITE
mas01cr@648 1170 #else
mas01cr@648 1171 S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH
mas01cr@648 1172 #endif
mas01cr@648 1173 )) < 0)
mas01cr@648 1174
mas01mc@292 1175 error("Can't create serial file", filename, "open");
mas01mc@292 1176 get_lock(dbfid, 1);
mas01mc@292 1177
mas01mc@292 1178 // Make header first to get size of serialized database
mas01mc@296 1179 lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT, pointCount);
mas01mc@296 1180
mas01mc@296 1181 cout << "file size: <=" << lshHeader->get_size()/1024UL << "KB" << endl;
mas01mc@296 1182 if(lshHeader->get_size()>O2_SERIAL_MAXFILESIZE)
mas01mc@296 1183 error("Maximum size of LSH file exceded: > 4000MB");
mas01mc@296 1184
mas01mc@292 1185 // go to the location corresponding to the last byte
mas01mc@292 1186 if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1)
mas01mc@292 1187 error("lseek error in db file", "", "lseek");
mas01mc@292 1188
mas01mc@292 1189 // write a dummy byte at the last location
mas01mc@292 1190 if (write (dbfid, "", 1) != 1)
mas01mc@292 1191 error("write error", "", "write");
mas01mc@764 1192
mas01mc@764 1193 char* db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);
mas01mc@764 1194
mas01mc@764 1195 memcpy (db, lshHeader, O2_SERIAL_HEADER_SIZE);
mas01mc@764 1196
mas01mc@764 1197 serial_munmap(db, O2_SERIAL_HEADER_SIZE);
mas01mc@764 1198
mas01cr@648 1199
mas01mc@292 1200 close(dbfid);
mas01mc@292 1201
mas01mc@292 1202 std::cout << "done initializing tables." << endl;
mas01mc@292 1203
mas01mc@292 1204 return 1;
mas01mc@292 1205 }
mas01mc@292 1206
mas01mc@764 1207 char* G::serial_mmap(int dbfid, Uns32T memSize, Uns32T forWrite, off_t offset){
mas01mc@764 1208 char* db;
mas01mc@764 1209 if(forWrite){
mas01mc@764 1210 if ((db = (char*) mmap(0, memSize, PROT_READ | PROT_WRITE,
mas01mc@764 1211 MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@764 1212 error("mmap error in request for writable serialized database", "", "mmap");
mas01mc@764 1213 }
mas01mc@764 1214 else if ((db = (char*) mmap(0, memSize, PROT_READ, MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@764 1215 error("mmap error in read-only serialized database", "", "mmap");
mas01mc@764 1216
mas01mc@764 1217 return db;
mas01mc@764 1218 }
mas01mc@764 1219
mas01mc@764 1220 SerialHeaderT* G::serial_get_header(char* db){
mas01mc@292 1221 lshHeader = new SerialHeaderT();
mas01mc@764 1222 memcpy((char*)lshHeader, db, sizeof(SerialHeaderT));
mas01mc@292 1223
mas01mc@292 1224 if(lshHeader->lshMagic!=O2_SERIAL_MAGIC)
mas01mc@292 1225 error("Not an LSH database file");
mas01mc@764 1226
mas01mc@292 1227 return lshHeader;
mas01mc@292 1228 }
mas01mc@292 1229
mas01mc@764 1230 void G::serial_munmap(char* db, Uns32T N){
mas01mc@764 1231 munmap(db, N);
mas01mc@292 1232 }
mas01mc@292 1233
mas01mc@292 1234 int G::serial_open(char* filename, int writeFlag){
mas01mc@292 1235 int dbfid;
mas01mc@292 1236 if(writeFlag){
mas01mc@292 1237 if ((dbfid = open (filename, O_RDWR)) < 0)
mas01mc@292 1238 error("Can't open serial file for read/write", filename, "open");
mas01mc@292 1239 get_lock(dbfid, writeFlag);
mas01mc@292 1240 }
mas01mc@292 1241 else{
mas01mc@292 1242 if ((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1243 error("Can't open serial file for read", filename, "open");
mas01mc@292 1244 get_lock(dbfid, 0);
mas01mc@292 1245 }
mas01mc@292 1246
mas01mc@292 1247 return dbfid;
mas01mc@292 1248 }
mas01mc@292 1249
mas01mc@292 1250 void G::serial_close(int dbfid){
mas01mc@292 1251
mas01mc@292 1252 release_lock(dbfid);
mas01mc@292 1253 close(dbfid);
mas01mc@292 1254 }
mas01mc@292 1255
mas01mc@292 1256 int G::unserialize_lsh_header(char* filename){
mas01mc@292 1257 int dbfid;
mas01mc@764 1258 char* db;
mas01mc@292 1259 // Test to see if file exists
mas01mc@292 1260 if((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1261 error("Can't open the file", filename, "open");
mas01mc@292 1262 close(dbfid);
mas01mc@292 1263 dbfid = serial_open(filename, 0); // open for read
mas01mc@764 1264 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@764 1265 serial_get_header(db); // read header
mas01mc@764 1266 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 1267
mas01mc@292 1268 // Unserialize header parameters
mas01mc@292 1269 H::L = lshHeader->numTables;
mas01mc@292 1270 H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0);
mas01mc@292 1271 H::N = lshHeader->numRows;
mas01mc@292 1272 H::C = lshHeader->numCols;
mas01mc@292 1273 H::k = lshHeader->numFuns;
mas01mc@292 1274 H::d = lshHeader->dataDim;
mas01mc@293 1275 H::w = lshHeader->binWidth;
mas01mc@293 1276 H::radius = lshHeader->radius;
mas01mc@293 1277 H::maxp = lshHeader->maxp;
mas01mc@296 1278 H::pointCount = lshHeader->pointCount;
mas01mc@292 1279
mas01mc@292 1280 return dbfid;
mas01mc@292 1281 }
mas01mc@292 1282
mas01mc@292 1283 // unserialize the LSH parameters
mas01mc@292 1284 // we leave the LSH tree on disk as a flat file
mas01mc@292 1285 // it is this flat file that we search by memory mapping
mas01mc@292 1286 void G::unserialize_lsh_functions(int dbfid){
mas01mc@292 1287 Uns32T j, kk;
mas01mc@292 1288 float* pf;
mas01mc@292 1289 Uns32T* pu;
mas01mc@292 1290
mas01mc@292 1291 // Load the hash functions into core
mas01mc@764 1292 char* db = serial_mmap(dbfid, get_serial_hashtable_offset(), 0);// get database pointer again
mas01mc@764 1293
mas01mc@764 1294 pf = get_serial_hashfunction_base(db);
mas01mc@292 1295
mas01mc@292 1296 #ifdef USE_U_FUNCTIONS
mas01mc@292 1297 for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v)
mas01mc@292 1298 for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1299 #else
mas01mc@764 1300 for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v)
mas01mc@764 1301 for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1302 #endif
mas01mc@764 1303 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@764 1304 H::A[j][kk][i] = *pf++; // Normally distributed random vectors
mas01mc@764 1305 }
mas01mc@764 1306 }
mas01mc@292 1307 #ifdef USE_U_FUNCTIONS
mas01mc@764 1308 for( j = 0 ; j < H::m ; j++ ) // biases b
mas01mc@764 1309 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@292 1310 #else
mas01mc@764 1311 for( j = 0 ; j < H::L ; j++ ) // biases b
mas01mc@764 1312 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1313 #endif
mas01mc@764 1314 H::b[j][kk] = *pf++;
mas01mc@292 1315
mas01mc@764 1316 pu = (Uns32T*)pf;
mas01mc@764 1317 for( j = 0 ; j < H::L ; j++ ) // Z projectors r1
mas01mc@764 1318 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@764 1319 H::r1[j][kk] = *pu++;
mas01mc@764 1320
mas01mc@764 1321 for( j = 0 ; j < H::L ; j++ ) // Z projectors r2
mas01mc@764 1322 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@764 1323 H::r2[j][kk] = *pu++;
mas01mc@292 1324
mas01mc@764 1325 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 1326 }
mas01mc@292 1327
mas01mc@292 1328 void G::unserialize_lsh_hashtables_format1(int fid){
mas01mc@292 1329 SerialElementT *pe, *pt;
mas01mc@292 1330 Uns32T x,y;
mas01mc@292 1331 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1332 // Read the hash tables into core
mas01mc@292 1333 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 1334 // memory map a single hash table
mas01mc@292 1335 // Align each hash table to page boundary
mas01mc@764 1336 char* dbtable = serial_mmap(fid, hashTableSize, 0,
mas01mc@764 1337 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@764 1338 #ifdef __CYGWIN__
mas01mc@764 1339 // No madvise in CYGWIN
mas01mc@764 1340 #else
mas01mc@764 1341 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@764 1342 error("could not advise hashtable memory","","madvise");
mas01mc@764 1343 #endif
mas01mc@764 1344 pt=(SerialElementT*)dbtable;
mas01mc@292 1345 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 1346 // Move disk pointer to beginning of row
mas01mc@292 1347 pe=pt+y*lshHeader->numCols;
mas01mc@292 1348 unserialize_hashtable_row_format1(pe, h[x]+y);
mas01mc@340 1349 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1350 printf("S[%d,%d]", x, y);
mas01mc@292 1351 serial_bucket_dump(pe);
mas01mc@292 1352 printf("C[%d,%d]", x, y);
mas01mc@292 1353 dump_hashtable_row(h[x][y]);
mas01mc@292 1354 #endif
mas01mc@292 1355 }
mas01mc@764 1356 serial_munmap(dbtable, hashTableSize);
mas01mc@292 1357 }
mas01mc@292 1358 }
mas01mc@292 1359
mas01mc@292 1360 void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){
mas01mc@292 1361 Uns32T colCount = 0;
mas01mc@292 1362 while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){
mas01mc@292 1363 H::p = pe->pointID; // current point ID
mas01mc@292 1364 t2 = pe->hashValue;
mas01mc@292 1365 bucket_insert_point(b);
mas01mc@292 1366 pe++;
mas01mc@292 1367 colCount++;
mas01mc@292 1368 }
mas01mc@292 1369 }
mas01mc@292 1370
mas01mc@340 1371 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
mas01mc@292 1372 Uns32T x=0,y=0;
mas01mc@764 1373 #ifdef _LSH_DEBUG_
mas01mc@764 1374 cout << "Loading hashtables..." << endl;
mas01mc@764 1375 cout << "header pointCount = " << pointCount << endl;
mas01mc@764 1376 cout << "forMerge = " << forMerge << endl;
mas01mc@764 1377 Uns32T sumTablesPointCount = 0;
mas01mc@764 1378 #endif
mas01mc@292 1379 // Seek to hashtable base offset
mas01mc@336 1380 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 1381 fclose(dbFile);
mas01mc@336 1382 error("fSeek error in unserialize_lsh_hashtables_format2");
mas01mc@292 1383 }
mas01mc@292 1384
mas01mc@292 1385 // Read the hash tables into core (structure is given in header)
mas01mc@292 1386 while( x < H::L){
mas01mc@764 1387 tablesPointCount=0;
mas01mc@336 1388 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1389 fclose(dbFile);
mas01mc@292 1390 error("Read error","unserialize_lsh_hashtables_format2()");
mas01mc@292 1391 }
mas01mc@306 1392 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE)
mas01mc@292 1393 x++; // End of table
mas01mc@292 1394 else
mas01mc@292 1395 while(y < H::N){
mas01mc@306 1396 // Read a row and move file pointer to beginning of next row or _bittable
mas01mc@306 1397 if(!(H::t1==O2_SERIAL_TOKEN_T1)){
mas01mc@336 1398 fclose(dbFile);
mas01mc@306 1399 error("State matchine error T1","unserialize_lsh_hashtables_format2()");
mas01mc@292 1400 }
mas01mc@336 1401 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1402 fclose(dbFile);
mas01mc@306 1403 error("Read error: t1","unserialize_lsh_hashtables_format2()");
mas01mc@306 1404 }
mas01mc@306 1405 y = H::t1;
mas01mc@292 1406 if(y>=H::N){
mas01mc@336 1407 fclose(dbFile);
mas01mc@292 1408 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
mas01mc@292 1409 }
mas01mc@340 1410 Uns32T token = 0;
mas01mc@340 1411 #ifdef LSH_CORE_ARRAY
mas01mc@340 1412 Uns32T numElements;
mas01mc@340 1413 if(fread(&numElements, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1414 fclose(dbFile);
mas01mc@340 1415 error("Read error: numElements","unserialize_lsh_hashtables_format2()");
mas01mc@340 1416 }
mas01mc@764 1417
mas01mc@764 1418 /*
mas01mc@764 1419 #ifdef _LSH_DEBUG_
mas01mc@764 1420 cout << "[" << x << "," << y << "] numElements(disk) = " << numElements;
mas01mc@764 1421 #endif
mas01mc@764 1422 */
mas01mc@340 1423 // BACKWARD COMPATIBILITY: check to see if T2 or END token was read
mas01mc@340 1424 if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
mas01mc@340 1425 forMerge=true; // Force use of dynamic linked list core format
mas01mc@340 1426 token = numElements;
mas01mc@340 1427 }
mas01mc@340 1428
mas01mc@764 1429 if(forMerge) // FOR INDEXING use dynamic linked list data structure
mas01mc@340 1430 // Use linked list CORE format
mas01mc@340 1431 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
mas01mc@764 1432 else // FOR QUERY use static array data structure
mas01mc@340 1433 // Use ARRAY CORE format with numElements counter
mas01mc@340 1434 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
mas01mc@340 1435 #else
mas01mc@764 1436 token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
mas01mc@513 1437 #endif
mas01mc@292 1438 // Check that token is valid
mas01mc@306 1439 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
mas01mc@336 1440 fclose(dbFile);
mas01mc@292 1441 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
mas01mc@292 1442 }
mas01mc@292 1443 // Check for end of table flag
mas01mc@306 1444 if(token==O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@292 1445 x++;
mas01mc@292 1446 break;
mas01mc@292 1447 }
mas01mc@292 1448 // Check for new row flag
mas01mc@306 1449 if(token==O2_SERIAL_TOKEN_T1)
mas01mc@292 1450 H::t1 = token;
mas01mc@292 1451 }
mas01mc@764 1452 #ifdef _LSH_DEBUG_
mas01mc@764 1453 cout << "[T " << x-1 << "] pointCount = " << tablesPointCount << endl;
mas01mc@764 1454 sumTablesPointCount+=tablesPointCount;
mas01mc@764 1455 #endif
mas01mc@292 1456 }
mas01mc@764 1457 #ifdef _LSH_DEBUG_
mas01mc@764 1458 cout << "TOTAL pointCount = " << sumTablesPointCount << endl;
mas01mc@764 1459 #endif
mas01mc@513 1460 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@513 1461 dump_hashtables();
mas01mc@513 1462 #endif
mas01mc@306 1463 }
mas01mc@292 1464
mas01mc@340 1465 Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token){
mas01mc@292 1466 bool pointFound = false;
mas01mc@340 1467
mas01mc@340 1468 if(token)
mas01mc@340 1469 H::t2 = token;
mas01mc@340 1470 else if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1471 fclose(dbFile);
mas01mc@340 1472 error("Read error T2 token","unserialize_hashtable_row_format2");
mas01mc@340 1473 }
mas01mc@340 1474
mas01mc@306 1475 if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){
mas01mc@336 1476 fclose(dbFile);
mas01mc@292 1477 error("State machine error: expected E or T2");
mas01mc@292 1478 }
mas01mc@340 1479
mas01mc@306 1480 while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){
mas01mc@292 1481 pointFound=false;
mas01mc@292 1482 // Check for T2 token
mas01mc@306 1483 if(H::t2!=O2_SERIAL_TOKEN_T2){
mas01mc@336 1484 fclose(dbFile);
mas01mc@292 1485 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
mas01mc@306 1486 }
mas01mc@292 1487 // Read t2 value
mas01mc@336 1488 if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1489 fclose(dbFile);
mas01mc@292 1490 error("Read error t2","unserialize_hashtable_row_format2");
mas01mc@292 1491 }
mas01mc@336 1492 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1493 fclose(dbFile);
mas01mc@292 1494 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1495 }
mas01mc@306 1496 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@292 1497 pointFound=true;
mas01mc@292 1498 bucket_insert_point(b);
mas01mc@764 1499 tablesPointCount++;
mas01mc@336 1500 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1501 fclose(dbFile);
mas01mc@292 1502 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1503 }
mas01mc@292 1504 }
mas01mc@292 1505 if(!pointFound)
mas01mc@292 1506 error("State machine error: point", "unserialize_hashtable_row_format2()");
mas01mc@306 1507 H::t2 = H::p; // Copy last found token to t2
mas01mc@292 1508 }
mas01mc@292 1509 return H::t2; // holds current token
mas01mc@292 1510 }
mas01mc@292 1511
mas01mc@340 1512 // Unserialize format2 hashtable row to a CORE_ARRAY pointed to
mas01mc@340 1513 // by the hashtable row pointer: rowPtr
mas01mc@340 1514 //
mas01mc@340 1515 // numElements is the total number of t2 buckets plus points
mas01mc@340 1516 // memory required is numElements+1+sizeof(hashtable ptr)
mas01mc@340 1517 //
mas01mc@340 1518 // numElements = numPoints + numBuckets
mas01mc@340 1519 //
mas01mc@340 1520 // During inserts (merges) new hashtable entries are appened at rowPtr+numPoints+numBuckets+1
mas01mc@340 1521 //
mas01mc@340 1522 // ASSUME: that LSH_LIST_HEAD_COUNTERS is set so that the first hashtable bucket is used to count
mas01mc@340 1523 // point and bucket entries
mas01mc@340 1524 //
mas01mc@340 1525 // We store the values of numPoints and numBuckets in separate fields of the first bucket
mas01mc@340 1526 // rowPtr->t2 // numPoints
mas01mc@764 1527 // (rowPtr->snext.numBuckets) // numBuckets
mas01mc@340 1528 //
mas01mc@340 1529 // We cast the rowPtr->next pointer to (Uns32*) malloc(numElements*sizeof(Uns32T) + sizeof(bucket*))
mas01mc@340 1530 // To get to the fist bucket, we use
mas01mc@340 1531 //
mas01mc@340 1532
mas01mc@340 1533 #define READ_UNS32T(VAL,TOKENSTR) if(fread(VAL, sizeof(Uns32T), 1, dbFile) != 1){\
mas01mc@340 1534 fclose(dbFile);error("Read error unserialize_hashtable_format2",TOKENSTR);}
mas01mc@340 1535
mas01mc@340 1536 #define TEST_TOKEN(TEST, TESTSTR) if(TEST){fclose(dbFile);error("State machine error: ", TESTSTR);}
mas01mc@340 1537
mas01mc@340 1538 #define SKIP_BITS_LEFT_SHIFT_MSB (30)
mas01mc@340 1539
mas01mc@340 1540 #define SKIP_BITS_RIGHT_SHIFT_MSB (28)
mas01mc@340 1541 #define SKIP_BITS_RIGHT_SHIFT_LSB (30)
mas01mc@340 1542
mas01mc@340 1543 #define MAX_POINTS_IN_BUCKET_CORE_ARRAY (16)
mas01mc@340 1544 #define LSH_CORE_ARRAY_END_ROW_TOKEN (0xFFFFFFFD)
mas01mc@340 1545
mas01mc@340 1546 // Encode the skip bits. Zero if only one point, MAX 8 (plus first == 9)
mas01mc@340 1547 #define ENCODE_POINT_SKIP_BITS TEST_TOKEN(!numPointsThisBucket, "no points found");\
mas01mc@340 1548 if(numPointsThisBucket==1){\
mas01mc@340 1549 secondPtr=ap++;\
mas01mc@340 1550 *secondPtr=0;\
mas01mc@340 1551 numPoints++;\
mas01mc@764 1552 numSingletons++;\
mas01mc@340 1553 }\
mas01mc@340 1554 if(numPointsThisBucket>1){\
mas01mc@340 1555 *firstPtr |= ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
mas01mc@340 1556 *secondPtr |= ( ( (numPointsThisBucket-1) & 0xC) >> 2 ) << SKIP_BITS_LEFT_SHIFT_MSB;}
mas01mc@340 1557
mas01mc@340 1558 Uns32T G::unserialize_hashtable_row_to_array(FILE* dbFile, bucket** rowPP, Uns32T numElements){
mas01mc@340 1559 Uns32T numPointsThisBucket = 0;
mas01mc@340 1560 Uns32T numBuckets = 0;
mas01mc@340 1561 Uns32T numPoints = 0;
mas01mc@340 1562 Uns32T* firstPtr = 0;
mas01mc@340 1563 Uns32T* secondPtr = 0;
mas01mc@764 1564 Uns32T numSingletons = 0; // Count single point puckets because we encode them with 2 points (for skip)
mas01mc@340 1565
mas01mc@340 1566 // Initialize new row
mas01mc@340 1567 if(!*rowPP){
mas01mc@340 1568 *rowPP = new bucket();
mas01mc@340 1569 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1570 (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@340 1571 (*rowPP)->next = 0;
mas01mc@340 1572 #endif
mas01mc@340 1573 }
mas01mc@340 1574 bucket* rowPtr = *rowPP;
mas01mc@764 1575 Uns32T last_t2 = 0;
mas01mc@340 1576 READ_UNS32T(&(H::t2),"t2");
mas01mc@340 1577 TEST_TOKEN(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2), "expected E or T2");
mas01mc@340 1578 // Because we encode points in 16-point blocks, we sometimes allocate repeated t2 elements
mas01mc@340 1579 // So over-allocate by a factor of two and realloc later to actual numElements
mas01mc@340 1580 CR_ASSERT(rowPtr->next = (bucket*) malloc((2*numElements+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1581 Uns32T* ap = reinterpret_cast<Uns32T*>(rowPtr->next); // Cast pointer to Uns32T* for array format
mas01mc@340 1582 while( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1) ){
mas01mc@340 1583 numPointsThisBucket = 0;// reset bucket point counter
mas01mc@340 1584 secondPtr = 0; // reset second-point pointer
mas01mc@340 1585 TEST_TOKEN(H::t2!=O2_SERIAL_TOKEN_T2, "expected T2");
mas01mc@340 1586 READ_UNS32T(&(H::t2), "Read error t2");
mas01mc@764 1587 if(H::t2<last_t2)
mas01mc@764 1588 cout << "last_t2=" << last_t2 << ", t2=" << H::t2 << endl;
mas01mc@764 1589 TEST_TOKEN(H::t2<last_t2, "t2 tokens not in ascending order");
mas01mc@764 1590 last_t2 = H::t2;
mas01mc@764 1591 /*
mas01mc@764 1592 #ifdef _LSH_DEBUG_
mas01mc@764 1593 cout << "+" << H::t2 << "+";
mas01mc@764 1594 #endif
mas01mc@764 1595 */
mas01mc@340 1596 *ap++ = H::t2; // Insert t2 value into array
mas01mc@340 1597 numBuckets++;
mas01mc@340 1598 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1599 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@340 1600 if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
mas01mc@340 1601 ENCODE_POINT_SKIP_BITS;
mas01mc@764 1602 /*
mas01mc@764 1603 #ifdef _LSH_DEBUG_
mas01mc@764 1604 cout << "*" << H::t2 << "*";
mas01mc@764 1605 #endif
mas01mc@764 1606 */
mas01mc@340 1607 *ap++ = H::t2; // Extra element
mas01mc@340 1608 numBuckets++; // record this as a new bucket
mas01mc@340 1609 numPointsThisBucket=0; // reset bucket point counter
mas01mc@340 1610 secondPtr = 0; // reset second-point pointer
mas01mc@340 1611 }
mas01mc@340 1612 if( ++numPointsThisBucket == 1 )
mas01mc@340 1613 firstPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1614 else if( numPointsThisBucket == 2 )
mas01mc@340 1615 secondPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1616 numPoints++;
mas01mc@764 1617 /*
mas01mc@764 1618 #ifdef _LSH_DEBUG_
mas01mc@764 1619 cout << "(" << H::p << ":" << numPoints << ")";
mas01mc@764 1620 #endif
mas01mc@764 1621 */
mas01mc@764 1622
mas01mc@340 1623 *ap++ = H::p;
mas01mc@340 1624 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1625 }
mas01mc@340 1626 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1627 H::t2 = H::p; // Copy last found token to t2
mas01mc@340 1628 }
mas01mc@340 1629 // Reallocate the row to its actual size
mas01mc@340 1630 CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1631 // Record the sizes at the head of the row
mas01mc@344 1632 rowPtr->snext.numBuckets = numBuckets;
mas01mc@340 1633 rowPtr->t2 = numPoints;
mas01mc@340 1634 // Place end of row marker
mas01mc@340 1635 *ap++ = LSH_CORE_ARRAY_END_ROW_TOKEN;
mas01mc@340 1636 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
mas01mc@340 1637 rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
mas01mc@340 1638 // Allocate a new dynamic list head at the end of the array
mas01mc@340 1639 bucket** listPtr = reinterpret_cast<bucket**> (ap);
mas01mc@340 1640 *listPtr = 0;
mas01mc@764 1641 /*
mas01mc@764 1642 #ifdef _LSH_DEBUG_
mas01mc@764 1643 cout << " numBuckets=" << numBuckets << " numPoints=" << numPoints - numSingletons << " numElements(array) " << numBuckets+numPoints - numSingletons << " " << endl;
mas01mc@764 1644 #endif
mas01mc@764 1645 */
mas01mc@764 1646 H::tablesPointCount += numPoints - numSingletons;
mas01mc@340 1647 // Return current token
mas01mc@340 1648 return H::t2; // return H::t2 which holds current token [E or T1]
mas01mc@340 1649 }
mas01mc@340 1650
mas01mc@340 1651
mas01mc@340 1652
mas01mc@340 1653 // *p is a pointer to the beginning of a hashtable row array
mas01mc@340 1654 // The array consists of t2 hash keys and one or more point identifiers p for each hash key
mas01mc@340 1655 // Retrieval is performed by generating a hash key query_t2 for query point q
mas01mc@340 1656 // We identify the row that t2 is stored in using a secondary hash t1, this row is the entry
mas01mc@340 1657 // point for retrieve_from_core_hashtable_array
mas01mc@764 1658 #define SKIP_BITS (0xC0000000U)
mas01mc@340 1659 void G::retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos){
mas01mc@340 1660 Uns32T skip;
mas01mc@340 1661 Uns32T t2;
mas01mc@340 1662 Uns32T p1;
mas01mc@340 1663 Uns32T p2;
mas01mc@340 1664
mas01mc@340 1665 CR_ASSERT(p);
mas01mc@340 1666
mas01mc@340 1667 do{
mas01mc@340 1668 t2 = *p++;
mas01mc@340 1669 if( t2 > H::t2 )
mas01mc@340 1670 return;
mas01mc@340 1671 p1 = *p++;
mas01mc@340 1672 p2 = *p++;
mas01mc@340 1673 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@340 1674 if( t2 == H::t2 ){
mas01mc@764 1675 add_point_callback(calling_instance, p1 & ~SKIP_BITS, qpos, radius);
mas01mc@340 1676 if(skip--){
mas01mc@764 1677 add_point_callback(calling_instance, p2 & ~SKIP_BITS, qpos, radius);
mas01mc@340 1678 while(skip-- )
mas01mc@340 1679 add_point_callback(calling_instance, *p++, qpos, radius);
mas01mc@340 1680 }
mas01mc@340 1681 }
mas01mc@340 1682 else
mas01mc@340 1683 if(*p != LSH_CORE_ARRAY_END_ROW_TOKEN)
mas01mc@340 1684 p = p + skip;
mas01mc@340 1685 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@340 1686 }
mas01mc@513 1687
mas01mc@513 1688 void G::dump_hashtables(){
mas01mc@513 1689 for(Uns32T x = 0; x < H::L ; x++)
mas01mc@513 1690 for(Uns32T y = 0; y < H::N ; y++){
mas01mc@513 1691 bucket* bPtr = h[x][y];
mas01mc@513 1692 if(bPtr){
mas01mc@513 1693 printf("C[%d,%d]", x, y);
mas01mc@513 1694 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@513 1695 printf("[numBuckets=%d]",bPtr->snext.numBuckets);
mas01mc@513 1696 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@513 1697 dump_core_hashtable_array((Uns32T*)(bPtr->next));
mas01mc@513 1698 }
mas01mc@513 1699 else {
mas01mc@513 1700 dump_hashtable_row(bPtr->next);
mas01mc@513 1701 }
mas01mc@513 1702 #else
mas01mc@513 1703 dump_hashtable_row(bPtr);
mas01mc@513 1704 #endif
mas01mc@513 1705 printf("\n");
mas01mc@513 1706 fflush(stdout);
mas01mc@513 1707 }
mas01mc@513 1708 }
mas01mc@513 1709 }
mas01mc@513 1710
mas01mc@764 1711 void G::dump_core_row(Uns32T n){
mas01mc@764 1712 if(!(n<H::N)){
mas01mc@764 1713 printf("ROW OUT OF RANGE:%d (MAX:%d)\n", n, H::N-1);
mas01mc@764 1714 return;
mas01mc@764 1715 }
mas01mc@764 1716 for(Uns32T j = 0 ; j < H::L ; j++ ){
mas01mc@764 1717 bucket* bPtr = h[j][n];
mas01mc@764 1718 if(bPtr){
mas01mc@764 1719 printf("C[%d,%d]", j, n);
mas01mc@764 1720 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@764 1721 printf("[numBuckets=%d]",bPtr->snext.numBuckets);
mas01mc@764 1722 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@764 1723 dump_core_hashtable_array((Uns32T*)(bPtr->next));
mas01mc@764 1724 }
mas01mc@764 1725 else {
mas01mc@764 1726 dump_hashtable_row(bPtr->next);
mas01mc@764 1727 }
mas01mc@764 1728 #else
mas01mc@764 1729 dump_hashtable_row(bPtr);
mas01mc@764 1730 #endif
mas01mc@764 1731 printf("\n");
mas01mc@764 1732 }
mas01mc@764 1733 }
mas01mc@764 1734 }
mas01mc@764 1735
mas01mc@764 1736 void G::dump_disk_row(char* filename, Uns32T n){
mas01mc@764 1737 int dbfid = unserialize_lsh_header(filename);
mas01mc@764 1738 if(dbfid<0){
mas01mc@764 1739 cerr << "Could not read header from " << filename << endl;
mas01mc@764 1740 return;
mas01mc@764 1741 }
mas01mc@764 1742 FILE* dbFile = 0;
mas01mc@764 1743 dbFile = fdopen(dbfid, "rb");
mas01mc@764 1744 if(!dbFile){
mas01mc@764 1745 cerr << "Could not create FILE pointer from file:" << filename << " with fid:" << dbfid << endl;
mas01mc@764 1746 close(dbfid);
mas01mc@764 1747 return;
mas01mc@764 1748 }
mas01mc@764 1749
mas01mc@764 1750 Uns32T x=0,y=0;
mas01mc@764 1751
mas01mc@764 1752 // Seek to hashtable base offset
mas01mc@764 1753 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@764 1754 fclose(dbFile);
mas01mc@764 1755 error("fSeek error in unserialize_lsh_hashtables_format2");
mas01mc@764 1756 }
mas01mc@764 1757 Uns32T token = 0;
mas01mc@764 1758 Uns32T pointID;
mas01mc@764 1759
mas01mc@764 1760 // Read the hash tables into core (structure is given in header)
mas01mc@764 1761 while( x < H::L){
mas01mc@764 1762 y=0;
mas01mc@764 1763 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1764 fclose(dbFile);
mas01mc@764 1765 error("Read error T1","unserialize_lsh_hashtables_format2()");
mas01mc@764 1766 }
mas01mc@764 1767 while(token != O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@764 1768 if(token == O2_SERIAL_TOKEN_T1){
mas01mc@764 1769 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1770 fclose(dbFile);
mas01mc@764 1771 error("Read error t1","unserialize_lsh_hashtables_format2()");
mas01mc@764 1772 }
mas01mc@764 1773 y=token;
mas01mc@764 1774 if(y==n){
mas01mc@764 1775 printf("D[%d,%d]", x, y);
mas01mc@764 1776 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1777 fclose(dbFile);
mas01mc@764 1778 error("Read error 2","unserialize_lsh_hashtables_format2()");
mas01mc@764 1779 }
mas01mc@764 1780 printf("[numElements=%d]", token);
mas01mc@764 1781 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1782 fclose(dbFile);
mas01mc@764 1783 error("Read error 3","unserialize_lsh_hashtables_format2()");
mas01mc@764 1784 }
mas01mc@764 1785 while(!(token==O2_SERIAL_TOKEN_ENDTABLE || token==O2_SERIAL_TOKEN_T1)){
mas01mc@764 1786 // Check for T2 token
mas01mc@764 1787 if(token!=O2_SERIAL_TOKEN_T2){
mas01mc@764 1788 printf("t2=%d",token);
mas01mc@764 1789 fclose(dbFile);
mas01mc@764 1790 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
mas01mc@764 1791 }
mas01mc@764 1792 // Read t2 value
mas01mc@764 1793 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1794 fclose(dbFile);
mas01mc@764 1795 error("Read error t2","unserialize_hashtable_row_format2");
mas01mc@764 1796 }
mas01mc@764 1797 if(fread(&pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1798 fclose(dbFile);
mas01mc@764 1799 error("Read error pointID","unserialize_hashtable_row_format2");
mas01mc@764 1800 }
mas01mc@764 1801 while(!(pointID==O2_SERIAL_TOKEN_ENDTABLE || pointID==O2_SERIAL_TOKEN_T1 || pointID==O2_SERIAL_TOKEN_T2 )){
mas01mc@764 1802 printf("(%0X,%u)", token, pointID);
mas01mc@764 1803 if(fread(&pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1804 fclose(dbFile);
mas01mc@764 1805 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@764 1806 }
mas01mc@764 1807 }
mas01mc@764 1808 token = pointID; // Copy last found token
mas01mc@764 1809 }
mas01mc@764 1810 printf("\n");
mas01mc@764 1811 }
mas01mc@764 1812 else{ // gobble up rest of row
mas01mc@764 1813 while(!(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE)){
mas01mc@764 1814 if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@764 1815 fclose(dbFile);
mas01mc@764 1816 error("Read error 4","unserialize_lsh_hashtables_format2()");
mas01mc@764 1817 }
mas01mc@764 1818 }
mas01mc@764 1819 }
mas01mc@764 1820 }
mas01mc@764 1821 }
mas01mc@764 1822 if(token==O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@764 1823 x++;
mas01mc@764 1824 }
mas01mc@764 1825 }
mas01mc@764 1826 close(dbfid);
mas01mc@764 1827 }
mas01mc@764 1828
mas01mc@764 1829
mas01mc@513 1830 void G::dump_core_hashtable_array(Uns32T* p){
mas01mc@513 1831 Uns32T skip;
mas01mc@513 1832 Uns32T t2;
mas01mc@513 1833 Uns32T p1;
mas01mc@513 1834 Uns32T p2;
mas01mc@513 1835 CR_ASSERT(p);
mas01mc@513 1836 do{
mas01mc@513 1837 t2 = *p++;
mas01mc@513 1838 p1 = *p++;
mas01mc@513 1839 p2 = *p++;
mas01mc@513 1840 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@764 1841 printf("(%0X, %u)", t2, p1 & ~SKIP_BITS);
mas01mc@513 1842 if(skip--){
mas01mc@764 1843 printf("(%0X, %u)", t2, p2 & ~SKIP_BITS);
mas01mc@513 1844 while(skip-- )
mas01mc@764 1845 printf("(%0X, %u)", t2, *p++);
mas01mc@513 1846 }
mas01mc@513 1847 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@513 1848 }
mas01mc@340 1849
mas01mc@292 1850 void G::dump_hashtable_row(bucket* p){
mas01mc@292 1851 while(p && p->t2!=IFLAG){
mas01mc@344 1852 sbucket* sbp = p->snext.ptr;
mas01mc@292 1853 while(sbp){
mas01mc@292 1854 printf("(%0X,%u)", p->t2, sbp->pointID);
mas01mc@292 1855 fflush(stdout);
mas01mc@292 1856 sbp=sbp->snext;
mas01mc@292 1857 }
mas01mc@292 1858 p=p->next;
mas01mc@292 1859 }
mas01mc@292 1860 printf("\n");
mas01mc@292 1861 }
mas01mc@292 1862
mas01mc@292 1863
mas01mc@292 1864 // G::serial_retrieve_point( ... )
mas01mc@292 1865 // retrieves (pointID) from a serialized LSH database
mas01mc@292 1866 //
mas01mc@292 1867 // inputs:
mas01mc@292 1868 // filename - file name of serialized LSH database
mas01mc@292 1869 // vv - query point set
mas01mc@292 1870 //
mas01mc@292 1871 // outputs:
mas01mc@292 1872 // inserts retrieved points into add_point() callback method
mas01mc@292 1873 void G::serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller)
mas01mc@292 1874 {
mas01mc@292 1875 int dbfid = serial_open(filename, 0); // open for read
mas01mc@764 1876 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@764 1877 serial_get_header(dbheader); // read header
mas01mc@764 1878 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1879
mas01mc@292 1880 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1881 serial_close(dbfid);
mas01mc@292 1882 error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1883 }
mas01mc@292 1884
mas01mc@292 1885 // size of each hash table
mas01mc@292 1886 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1887 calling_instance = caller; // class instance variable used in ...bucket_chain_point()
mas01mc@292 1888 add_point_callback = add_point;
mas01mc@292 1889
mas01mc@292 1890 for(Uns32T j=0; j<L; j++){
mas01mc@764 1891 // memory map a single hash table for random access
mas01mc@764 1892 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@764 1893 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@764 1894 #ifdef __CYGWIN__
mas01mc@764 1895 // No madvise in CYGWIN
mas01mc@764 1896 #else
mas01mc@764 1897 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@764 1898 error("could not advise local hashtable memory","","madvise");
mas01mc@764 1899 #endif
mas01mc@764 1900 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1901 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
mas01mc@293 1902 H::compute_hash_functions(vv[qpos]);
mas01mc@293 1903 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1904 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1905 }
mas01mc@764 1906 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@292 1907 }
mas01mc@292 1908 serial_close(dbfid);
mas01mc@292 1909 }
mas01mc@292 1910
mas01mc@292 1911 void G::serial_retrieve_point(char* filename, vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 1912 int dbfid = serial_open(filename, 0); // open for read
mas01mc@764 1913 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@764 1914 serial_get_header(dbheader); // read header
mas01mc@764 1915 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1916
mas01mc@292 1917 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1918 serial_close(dbfid);
mas01mc@292 1919 error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1920 }
mas01mc@292 1921
mas01mc@292 1922 // size of each hash table
mas01mc@292 1923 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1924 calling_instance = caller;
mas01mc@292 1925 add_point_callback = add_point;
mas01mc@293 1926 H::compute_hash_functions(v);
mas01mc@292 1927 for(Uns32T j=0; j<L; j++){
mas01mc@764 1928 // memory map a single hash table for random access
mas01mc@764 1929 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@764 1930 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@764 1931 #ifdef __CYGWIN__
mas01mc@764 1932 // No madvise in CYGWIN
mas01mc@764 1933 #else
mas01mc@764 1934 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@764 1935 error("could not advise local hashtable memory","","madvise");
mas01mc@764 1936 #endif
mas01mc@764 1937 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@293 1938 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1939 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@764 1940 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@764 1941 }
mas01mc@292 1942 serial_close(dbfid);
mas01mc@292 1943 }
mas01mc@292 1944
mas01mc@292 1945 void G::serial_dump_tables(char* filename){
mas01mc@292 1946 int dbfid = serial_open(filename, 0); // open for read
mas01mc@764 1947 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@764 1948 serial_get_header(dbheader); // read header
mas01mc@764 1949 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1950 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1951 for(Uns32T j=0; j<L; j++){
mas01mc@764 1952 // memory map a single hash table for random access
mas01mc@764 1953 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@764 1954 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@764 1955 #ifdef __CYGWIN__
mas01mc@764 1956 // No madvise in CYGWIN
mas01mc@764 1957 #else
mas01mc@764 1958 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@764 1959 error("could not advise local hashtable memory","","madvise");
mas01mc@764 1960 #endif
mas01mc@764 1961 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1962 printf("*********** TABLE %d ***************\n", j);
mas01mc@292 1963 fflush(stdout);
mas01mc@292 1964 int count=0;
mas01mc@292 1965 do{
mas01mc@292 1966 printf("[%d,%d]", j, count++);
mas01mc@292 1967 fflush(stdout);
mas01mc@292 1968 serial_bucket_dump(pe);
mas01mc@292 1969 pe+=lshHeader->numCols;
mas01mc@292 1970 }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols);
mas01mc@292 1971 }
mas01mc@292 1972 }
mas01mc@292 1973
mas01mc@292 1974 void G::serial_bucket_dump(SerialElementT* pe){
mas01mc@292 1975 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1976 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1977 printf("(%0X,%u)",pe->hashValue,pe->pointID);
mas01mc@292 1978 pe++;
mas01mc@292 1979 }
mas01mc@292 1980 printf("\n");
mas01mc@292 1981 fflush(stdout);
mas01mc@292 1982 }
mas01mc@292 1983
mas01mc@292 1984 void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){
mas01mc@292 1985 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1986 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1987 if(pe->hashValue==t2){ // new match
mas01mc@292 1988 add_point_callback(calling_instance, pe->pointID, qpos, radius);
mas01mc@292 1989 }
mas01mc@292 1990 pe++;
mas01mc@292 1991 }
mas01mc@292 1992 }
mas01mc@292 1993
mas01mc@292 1994 void G::bucket_chain_point(bucket* p, Uns32T qpos){
mas01mc@292 1995 if(!p || p->t2==IFLAG)
mas01mc@292 1996 return;
mas01mc@292 1997 if(p->t2==t2){ // match
mas01mc@344 1998 sbucket_chain_point(p->snext.ptr, qpos); // add to reporter
mas01mc@292 1999 }
mas01mc@292 2000 if(p->next){
mas01mc@292 2001 bucket_chain_point(p->next, qpos); // recurse
mas01mc@292 2002 }
mas01mc@292 2003 }
mas01mc@292 2004
mas01mc@292 2005 void G::sbucket_chain_point(sbucket* p, Uns32T qpos){
mas01mc@292 2006 add_point_callback(calling_instance, p->pointID, qpos, radius);
mas01mc@292 2007 if(p->snext){
mas01mc@292 2008 sbucket_chain_point(p->snext, qpos);
mas01mc@292 2009 }
mas01mc@292 2010 }
mas01mc@292 2011
mas01mc@292 2012 void G::get_lock(int fd, bool exclusive) {
mas01cr@648 2013 #ifndef WIN32
mas01mc@292 2014 struct flock lock;
mas01mc@292 2015 int status;
mas01mc@292 2016 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01mc@292 2017 lock.l_whence = SEEK_SET;
mas01mc@292 2018 lock.l_start = 0;
mas01mc@292 2019 lock.l_len = 0; /* "the whole file" */
mas01mc@292 2020 retry:
mas01mc@292 2021 do {
mas01mc@292 2022 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 2023 } while (status != 0 && errno == EINTR);
mas01mc@292 2024 if (status) {
mas01mc@292 2025 if (errno == EAGAIN) {
mas01mc@292 2026 sleep(1);
mas01mc@292 2027 goto retry;
mas01mc@292 2028 } else {
mas01mc@292 2029 error("fcntl lock error", "", "fcntl");
mas01mc@292 2030 }
mas01mc@292 2031 }
mas01cr@648 2032 #else
mas01cr@648 2033 int status;
mas01cr@648 2034
mas01cr@648 2035 retry:
mas01cr@648 2036 status = _locking(fd, _LK_NBLCK, getpagesize());
mas01cr@648 2037 if(status) {
mas01cr@648 2038 Sleep(1000);
mas01cr@648 2039 goto retry;
mas01cr@648 2040 }
mas01cr@648 2041 #endif
mas01mc@292 2042 }
mas01mc@292 2043
mas01mc@292 2044 void G::release_lock(int fd) {
mas01cr@648 2045 #ifndef WIN32
mas01mc@292 2046 struct flock lock;
mas01mc@292 2047 int status;
mas01mc@292 2048
mas01mc@292 2049 lock.l_type = F_UNLCK;
mas01mc@292 2050 lock.l_whence = SEEK_SET;
mas01mc@292 2051 lock.l_start = 0;
mas01mc@292 2052 lock.l_len = 0;
mas01mc@292 2053
mas01mc@292 2054 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 2055
mas01mc@292 2056 if (status)
mas01mc@292 2057 error("fcntl unlock error", "", "fcntl");
mas01cr@648 2058 #else
mas01cr@648 2059 _locking(fd, _LK_UNLCK, getpagesize());
mas01cr@648 2060 #endif
mas01mc@292 2061 }