annotate lshlib.cpp @ 369:6564be3109c5 gcc-4.3-cleanups

gcc-4.3 warning cleanups for lshlib.cpp (I do not believe that any of these changes contain significant copyrightable "intellectual property". However, to the extent that they do, the changes are hereby released into the Public Domain, and may be therefore be used by anyone for any purpose without need for consideration of any kind.)
author mas01cr
date Wed, 12 Nov 2008 15:23:32 +0000
parents 223a5994a962
children f9d86b1db21c cead91ecf9a2
rev   line source
mas01mc@292 1 #include "lshlib.h"
mas01mc@292 2
mas01mc@340 3 //#define LSH_DUMP_CORE_TABLES
mas01mc@292 4 //#define USE_U_FUNCTIONS
mas01mc@340 5
mas01mc@340 6 // Use backward-compatible CORE ARRAY lsh index
mas01mc@340 7 #define LSH_CORE_ARRAY // Set to use arrays for hashtables rather than linked-lists
mas01mc@340 8 #define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads
mas01mc@340 9
mas01mc@340 10 // Critical path logic
mas01mc@340 11 #if defined LSH_CORE_ARRAY && !defined LSH_LIST_HEAD_COUNTERS
mas01mc@340 12 #define LSH_LIST_HEAD_COUNTERS
mas01mc@340 13 #endif
mas01mc@340 14
mas01mc@340 15 #define LSH_CORE_ARRAY_BIT (0x80000000) // LSH_CORE_ARRAY test bit for list head
mas01mc@340 16
mas01mc@292 17
mas01mc@292 18 void err(char*s){cout << s << endl;exit(2);}
mas01mc@292 19
mas01mc@292 20 Uns32T get_page_logn(){
mas01mc@292 21 int pagesz = (int)sysconf(_SC_PAGESIZE);
mas01mc@292 22 return (Uns32T)log2((double)pagesz);
mas01mc@292 23 }
mas01mc@292 24
mas01cr@369 25 unsigned align_up(unsigned x, unsigned w) { return (((x) + ((1<<w)-1)) & ~((1<<w)-1)); }
mas01mc@292 26
mas01mc@292 27 void H::error(const char* a, const char* b, const char *sysFunc) {
mas01mc@292 28 cerr << a << ": " << b << endl;
mas01mc@292 29 if (sysFunc) {
mas01mc@292 30 perror(sysFunc);
mas01mc@292 31 }
mas01mc@292 32 exit(1);
mas01mc@292 33 }
mas01mc@292 34
mas01mc@293 35 H::H(){
mas01mc@293 36 // Delay initialization of lsh functions until we know the parameters
mas01mc@293 37 }
mas01mc@293 38
mas01mc@293 39 H::H(Uns32T kk, Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float ww, float rr):
mas01mc@292 40 #ifdef USE_U_FUNCTIONS
mas01mc@292 41 use_u_functions(true),
mas01mc@292 42 #else
mas01mc@292 43 use_u_functions(false),
mas01mc@292 44 #endif
mas01mc@293 45 maxp(0),
mas01mc@292 46 bucketCount(0),
mas01mc@292 47 pointCount(0),
mas01mc@292 48 N(NN),
mas01mc@292 49 C(CC),
mas01mc@292 50 k(kk),
mas01mc@292 51 m(mm),
mas01mc@293 52 L((mm*(mm-1))/2),
mas01mc@293 53 d(dd),
mas01mc@293 54 w(ww),
mas01mc@293 55 radius(rr)
mas01mc@292 56 {
mas01mc@292 57
mas01mc@292 58 if(m<2){
mas01mc@292 59 m=2;
mas01mc@292 60 L=1; // check value of L
mas01mc@292 61 cout << "warning: setting m=2, L=1" << endl;
mas01mc@292 62 }
mas01mc@292 63 if(use_u_functions && k%2){
mas01mc@292 64 k++; // make sure k is even
mas01mc@292 65 cout << "warning: setting k even" << endl;
mas01mc@292 66 }
mas01mc@293 67
mas01mc@293 68 // We have the necessary parameters, so construct hashfunction datastructures
mas01mc@293 69 initialize_lsh_functions();
mas01mc@292 70 }
mas01mc@292 71
mas01mc@293 72 void H::initialize_lsh_functions(){
mas01mc@292 73 H::P = UH_PRIME_DEFAULT;
mas01mc@292 74
mas01mc@292 75 /* FIXME: don't use time(); instead use /dev/random or similar */
mas01mc@292 76 /* FIXME: write out the seed somewhere, so that we can get
mas01mc@292 77 repeatability */
mas01mc@292 78 #ifdef MT19937
mas01mc@292 79 init_genrand(time(NULL));
mas01mc@292 80 #else
mas01mc@292 81 srand(time(NULL)); // seed random number generator
mas01mc@292 82 #endif
mas01mc@293 83 Uns32T i,j, kk;
mas01mc@293 84 #ifdef USE_U_FUNCTIONS
mas01mc@293 85 H::A = new float**[ H::m ]; // m x k x d random projectors
mas01mc@293 86 H::b = new float*[ H::m ]; // m x k random biases
mas01mc@293 87 #else
mas01mc@293 88 H::A = new float**[ H::L ]; // m x k x d random projectors
mas01mc@293 89 H::b = new float*[ H::L ]; // m x k random biases
mas01mc@293 90 #endif
mas01mc@293 91 H::g = new Uns32T*[ H::L ]; // L x k random projections
mas01mc@293 92 assert( H::g && H::A && H::b ); // failure
mas01mc@293 93 #ifdef USE_U_FUNCTIONS
mas01mc@293 94 // Use m \times u_i functions \in R^{(k/2) \times (d)}
mas01mc@293 95 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 96 for( j = 0; j < H::m ; j++ ){ // m functions u_i(v)
mas01mc@293 97 H::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients
mas01mc@293 98 H::b[j] = new float[ H::k/2 ]; // bias
mas01mc@293 99 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 100 for( kk = 0; kk < H::k/2 ; kk++ ){
mas01mc@293 101 H::A[j][kk] = new float[ H::d ];
mas01mc@293 102 assert( H::A[j][kk] ); // failure
mas01mc@293 103 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 104 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 105 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 106 }
mas01mc@293 107 }
mas01mc@293 108 #else
mas01mc@293 109 // Use m \times u_i functions \in R^{k \times (d)}
mas01mc@293 110 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
mas01mc@293 111 for( j = 0; j < H::L ; j++ ){ // m functions u_i(v)
mas01mc@293 112 H::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients
mas01mc@293 113 H::b[j] = new float[ H::k ]; // bias
mas01mc@293 114 assert( H::A[j] && H::b[j] ); // failure
mas01mc@293 115 for( kk = 0; kk < H::k ; kk++ ){
mas01mc@293 116 H::A[j][kk] = new float[ H::d ];
mas01mc@293 117 assert( H::A[j][kk] ); // failure
mas01mc@293 118 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 119 H::A[j][kk][i] = H::randn(); // Normal
mas01mc@293 120 H::b[j][kk] = H::ranf()*H::w; // Uniform
mas01mc@293 121 }
mas01mc@293 122 }
mas01mc@293 123 #endif
mas01mc@293 124
mas01mc@293 125 // Storage for LSH hash function output (Uns32T)
mas01mc@293 126 for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2)
mas01mc@293 127 H::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z
mas01mc@293 128 assert( H::g[j] );
mas01mc@292 129 }
mas01mc@292 130
mas01mc@293 131 // LSH Hash tables
mas01mc@293 132 H::h = new bucket**[ H::L ];
mas01mc@293 133 assert( H::h );
mas01mc@292 134 for( j = 0 ; j < H::L ; j++ ){
mas01mc@292 135 H::h[j] = new bucket*[ H::N ];
mas01mc@292 136 assert( H::h[j] );
mas01mc@292 137 for( i = 0 ; i < H::N ; i++)
mas01mc@292 138 H::h[j][i] = 0;
mas01mc@292 139 }
mas01mc@293 140
mas01mc@293 141 // Standard hash functions
mas01mc@293 142 H::r1 = new Uns32T*[ H::L ];
mas01mc@293 143 H::r2 = new Uns32T*[ H::L ];
mas01mc@293 144 assert( H::r1 && H::r2 ); // failure
mas01mc@293 145 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 146 H::r1[ j ] = new Uns32T[ H::k ];
mas01mc@293 147 H::r2[ j ] = new Uns32T[ H::k ];
mas01mc@293 148 assert( H::r1[j] && H::r2[j] ); // failure
mas01mc@293 149 for( i = 0; i<H::k; i++){
mas01mc@293 150 H::r1[j][i] = randr();
mas01mc@293 151 H::r2[j][i] = randr();
mas01mc@293 152 }
mas01mc@293 153 }
mas01mc@293 154
mas01mc@293 155 // Storage for whole or partial function evaluation depdenting on USE_U_FUNCTIONS
mas01mc@293 156 H::initialize_partial_functions();
mas01mc@293 157 }
mas01mc@293 158
mas01mc@293 159 void H::initialize_partial_functions(){
mas01mc@293 160
mas01mc@293 161 #ifdef USE_U_FUNCTIONS
mas01mc@293 162 H::uu = vector<vector<Uns32T> >(H::m);
mas01mc@293 163 for( Uns32T aa=0 ; aa < H::m ; aa++ )
mas01mc@293 164 H::uu[aa] = vector<Uns32T>( H::k/2 );
mas01mc@293 165 #else
mas01mc@293 166 H::uu = vector<vector<Uns32T> >(H::L);
mas01mc@293 167 for( Uns32T aa=0 ; aa < H::L ; aa++ )
mas01mc@293 168 H::uu[aa] = vector<Uns32T>( H::k );
mas01mc@293 169 #endif
mas01mc@293 170 }
mas01mc@293 171
mas01mc@293 172
mas01mc@293 173 // Generate z ~ N(0,1)
mas01mc@293 174 float H::randn(){
mas01mc@293 175 // Box-Muller
mas01mc@293 176 float x1, x2;
mas01mc@293 177 do{
mas01mc@293 178 x1 = ranf();
mas01mc@293 179 } while (x1 == 0); // cannot take log of 0
mas01mc@293 180 x2 = ranf();
mas01mc@293 181 float z;
mas01mc@293 182 z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2);
mas01mc@293 183 return z;
mas01mc@293 184 }
mas01mc@293 185
mas01mc@293 186 float H::ranf(){
mas01mc@293 187 #ifdef MT19937
mas01mc@293 188 return (float) genrand_real2();
mas01mc@293 189 #else
mas01mc@293 190 return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
mas01mc@293 191 #endif
mas01mc@293 192 }
mas01mc@293 193
mas01mc@293 194 // range is 1..2^29
mas01mc@293 195 /* FIXME: that looks like an ... odd range. Still. */
mas01mc@293 196 Uns32T H::randr(){
mas01mc@293 197 #ifdef MT19937
mas01mc@293 198 return (Uns32T)((genrand_int32() >> 3) + 1);
mas01mc@293 199 #else
mas01mc@293 200 return (Uns32T) ((rand() >> 2) + 1);
mas01mc@293 201 #endif
mas01mc@292 202 }
mas01mc@292 203
mas01mc@292 204 // Destruct hash tables
mas01mc@292 205 H::~H(){
mas01mc@293 206 Uns32T i,j,kk;
mas01mc@340 207 bucket** pp;
mas01mc@293 208 #ifdef USE_U_FUNCTIONS
mas01mc@293 209 for( j = 0 ; j < H::m ; j++ ){
mas01mc@293 210 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@293 211 delete[] A[j][kk];
mas01mc@293 212 delete[] A[j];
mas01mc@293 213 }
mas01mc@293 214 delete[] A;
mas01mc@293 215 for( j = 0 ; j < H::m ; j++ )
mas01mc@293 216 delete[] b[j];
mas01mc@293 217 delete[] b;
mas01mc@293 218 #else
mas01mc@293 219 for( j = 0 ; j < H::L ; j++ ){
mas01mc@293 220 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@293 221 delete[] A[j][kk];
mas01mc@293 222 delete[] A[j];
mas01mc@293 223 }
mas01mc@293 224 delete[] A;
mas01mc@293 225 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 226 delete[] b[j];
mas01mc@293 227 delete[] b;
mas01mc@293 228 #endif
mas01mc@293 229
mas01mc@293 230 for( j = 0 ; j < H::L ; j++ )
mas01mc@293 231 delete[] g[j];
mas01mc@293 232 delete[] g;
mas01mc@292 233 for( j=0 ; j < H::L ; j++ ){
mas01mc@292 234 delete[] H::r1[ j ];
mas01mc@292 235 delete[] H::r2[ j ];
mas01mc@340 236 for(i = 0; i< H::N ; i++){
mas01mc@340 237 pp = 0;
mas01mc@340 238 #ifdef LSH_CORE_ARRAY
mas01mc@340 239 if(H::h[ j ][ i ])
mas01mc@340 240 if(H::h[ j ][ i ]->t2 & LSH_CORE_ARRAY_BIT){
mas01mc@340 241 pp = get_pointer_to_bucket_linked_list(H::h[ j ][ i ]);
mas01mc@340 242 if(*pp){
mas01mc@344 243 (*pp)->snext.ptr=0; // Head of list uses snext as a non-pointer value
mas01mc@340 244 delete *pp; // Now the destructor can do its work properly
mas01mc@340 245 }
mas01mc@340 246 free(H::h[ j ][ i ]->next);
mas01mc@340 247 H::h[ j ][ i ]->next = 0; // Zero next pointer
mas01mc@344 248 H::h[ j ][ i ]->snext.ptr = 0; // Zero head-of-list pointer as above
mas01mc@340 249 }
mas01mc@340 250 #endif
mas01mc@292 251 delete H::h[ j ][ i ];
mas01mc@340 252 }
mas01mc@292 253 delete[] H::h[ j ];
mas01mc@292 254 }
mas01mc@292 255 delete[] H::r1;
mas01mc@292 256 delete[] H::r2;
mas01mc@292 257 delete[] H::h;
mas01mc@292 258 }
mas01mc@292 259
mas01mc@292 260
mas01mc@293 261 // Compute all hash functions for vector v
mas01mc@293 262 // #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d}
mas01mc@293 263 // to make L \times g_j functions \in Z^k
mas01mc@293 264 void H::compute_hash_functions(vector<float>& v){ // v \in R^d
mas01mc@293 265 float iw = 1. / H::w; // hash bucket width
mas01mc@293 266 Uns32T aa, kk;
mas01mc@293 267 if( v.size() != H::d )
mas01mc@293 268 error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality
mas01mc@293 269 double tmp = 0;
mas01mc@293 270 float *pA, *pb;
mas01mc@293 271 Uns32T *pg;
mas01mc@293 272 int dd;
mas01mc@293 273 vector<float>::iterator vi;
mas01mc@293 274 vector<Uns32T>::iterator ui;
mas01mc@293 275
mas01mc@293 276 #ifdef USE_U_FUNCTIONS
mas01mc@293 277 Uns32T bb;
mas01mc@293 278 // Store m dot products to expand
mas01mc@293 279 for( aa=0; aa < H::m ; aa++ ){
mas01mc@293 280 ui = H::uu[aa].begin();
mas01mc@293 281 for( kk = 0 ; kk < H::k/2 ; kk++ ){
mas01mc@293 282 pb = *( H::b + aa ) + kk;
mas01mc@293 283 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 284 dd = H::d;
mas01mc@293 285 tmp = 0.;
mas01mc@293 286 vi = v.begin();
mas01mc@293 287 while( dd-- )
mas01mc@293 288 tmp += *pA++ * *vi++; // project
mas01mc@293 289 tmp += *pb; // translate
mas01mc@293 290 tmp *= iw; // scale
mas01mc@293 291 *ui++ = (Uns32T) floor(tmp); // floor
mas01mc@293 292 }
mas01mc@293 293 }
mas01mc@293 294 // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d}
mas01mc@293 295 Uns32T j;
mas01mc@293 296 for( aa=0, j=0 ; aa < H::m-1 ; aa++ )
mas01mc@293 297 for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){
mas01mc@293 298 pg= *( H::g + j ); // L \times functions g_j(v) \in Z^k
mas01mc@293 299 // u_1 \in Z^{(k/2) \times d}
mas01mc@293 300 ui = H::uu[aa].begin();
mas01mc@293 301 kk=H::k/2;
mas01mc@293 302 while( kk-- )
mas01mc@293 303 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z
mas01mc@293 304 // u_2 \in Z^{(k/2) \times d}
mas01mc@293 305 ui = H::uu[bb].begin();
mas01mc@293 306 kk=H::k/2;
mas01mc@293 307 while( kk--)
mas01mc@293 308 *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z
mas01mc@293 309 }
mas01mc@293 310 #else
mas01mc@293 311 for( aa=0; aa < H::L ; aa++ ){
mas01mc@293 312 ui = H::uu[aa].begin();
mas01mc@293 313 for( kk = 0 ; kk < H::k ; kk++ ){
mas01mc@293 314 pb = *( H::b + aa ) + kk;
mas01mc@293 315 pA = * ( * ( H::A + aa ) + kk );
mas01mc@293 316 dd = H::d;
mas01mc@293 317 tmp = 0.;
mas01mc@293 318 vi = v.begin();
mas01mc@293 319 while( dd-- )
mas01mc@293 320 tmp += *pA++ * *vi++; // project
mas01mc@293 321 tmp += *pb; // translate
mas01mc@293 322 tmp *= iw; // scale
mas01mc@293 323 *ui++ = (Uns32T) (floor(tmp)); // floor
mas01mc@293 324 }
mas01mc@293 325 }
mas01mc@293 326 // Compute hash functions
mas01mc@293 327 for( aa=0 ; aa < H::L ; aa++ ){
mas01mc@293 328 pg= *( H::g + aa ); // L \times functions g_j(v) \in Z^k
mas01mc@293 329 // u_1 \in Z^{k \times d}
mas01mc@293 330 ui = H::uu[aa].begin();
mas01mc@293 331 kk=H::k;
mas01mc@293 332 while( kk-- )
mas01mc@293 333 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z
mas01mc@293 334 }
mas01mc@293 335 #endif
mas01mc@293 336 }
mas01mc@293 337
mas01mc@292 338 // make hash value \in Z
mas01mc@293 339 void H::generate_hash_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
mas01mc@293 340 H::t1 = computeProductModDefaultPrime( g, r1, H::k ) % H::N;
mas01mc@293 341 H::t2 = computeProductModDefaultPrime( g, r2, H::k );
mas01mc@292 342 }
mas01mc@292 343
mas01mc@292 344 #define CR_ASSERT(b){if(!(b)){fprintf(stderr, "ASSERT failed on line %d, file %s.\n", __LINE__, __FILE__); exit(1);}}
mas01mc@292 345
mas01mc@292 346 // Computes (a.b) mod UH_PRIME_DEFAULT
mas01mc@293 347 inline Uns32T H::computeProductModDefaultPrime(Uns32T *a, Uns32T *b, IntT size){
mas01mc@292 348 LongUns64T h = 0;
mas01mc@292 349
mas01mc@292 350 for(IntT i = 0; i < size; i++){
mas01mc@292 351 h = h + (LongUns64T)a[i] * (LongUns64T)b[i];
mas01mc@292 352 h = (h & TWO_TO_32_MINUS_1) + 5 * (h >> 32);
mas01mc@292 353 if (h >= UH_PRIME_DEFAULT) {
mas01mc@292 354 h = h - UH_PRIME_DEFAULT;
mas01mc@292 355 }
mas01mc@292 356 CR_ASSERT(h < UH_PRIME_DEFAULT);
mas01mc@292 357 }
mas01mc@292 358 return h;
mas01mc@292 359 }
mas01mc@292 360
mas01mc@292 361 Uns32T H::bucket_insert_point(bucket **pp){
mas01mc@296 362 collisionCount = 0;
mas01mc@340 363 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 364 if(!*pp){
mas01mc@292 365 *pp = new bucket();
mas01mc@292 366 (*pp)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@292 367 (*pp)->next = new bucket();
mas01mc@292 368 }
mas01mc@340 369 // The list head holds point collision count
mas01mc@340 370 if( (*pp)->t2 & LSH_CORE_ARRAY_BIT )
mas01mc@340 371 bucket_insert_point(get_pointer_to_bucket_linked_list(*pp)); // recurse
mas01mc@340 372 else{
mas01mc@340 373 collisionCount = (*pp)->t2;
mas01mc@340 374 if(collisionCount < H::C){ // Block if row is full
mas01mc@340 375 (*pp)->t2++; // Increment collision counter (numPoints in row)
mas01mc@340 376 pointCount++;
mas01mc@340 377 collisionCount++;
mas01mc@340 378 // Locate the bucket linked list
mas01mc@340 379 __bucket_insert_point( (*pp)->next );
mas01mc@340 380 }
mas01mc@292 381 }
mas01mc@340 382 #else // NOT USING LSH_LIST_HEAD_COUNTERS
mas01mc@340 383 if(!*pp)
mas01mc@340 384 *pp = new bucket();
mas01mc@296 385 pointCount++;
mas01mc@296 386 __bucket_insert_point(*pp); // No collision count storage
mas01mc@292 387 #endif
mas01mc@292 388 return collisionCount;
mas01mc@292 389 }
mas01mc@292 390
mas01mc@340 391 // insert points into hashtable row collision chain
mas01mc@292 392 void H::__bucket_insert_point(bucket* p){
mas01mc@292 393 if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
mas01mc@292 394 p->t2 = H::t2;
mas01mc@292 395 bucketCount++; // Record start of new point-locale collision chain
mas01mc@344 396 p->snext.ptr = new sbucket();
mas01mc@344 397 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 398 return;
mas01mc@292 399 }
mas01mc@292 400
mas01mc@292 401 if(p->t2 == H::t2){
mas01mc@344 402 __sbucket_insert_point(p->snext.ptr);
mas01mc@292 403 return;
mas01mc@292 404 }
mas01mc@292 405
mas01mc@292 406 if(p->next){
mas01mc@340 407 // Construct list in t2 order
mas01mc@340 408 if(H::t2 < p->next->t2){
mas01mc@340 409 bucket* tmp = new bucket();
mas01mc@340 410 tmp->next = p->next;
mas01mc@340 411 p->next = tmp;
mas01mc@340 412 __bucket_insert_point(tmp);
mas01mc@340 413 }
mas01mc@340 414 else
mas01mc@340 415 __bucket_insert_point(p->next);
mas01mc@292 416 }
mas01mc@340 417 else {
mas01mc@292 418 p->next = new bucket();
mas01mc@292 419 __bucket_insert_point(p->next);
mas01mc@292 420 }
mas01mc@292 421 }
mas01mc@292 422
mas01mc@340 423 // insert points into point-locale collision chain
mas01mc@292 424 void H::__sbucket_insert_point(sbucket* p){
mas01mc@292 425 if(p->pointID==IFLAG){
mas01mc@292 426 p->pointID = H::p;
mas01mc@292 427 return;
mas01mc@292 428 }
mas01mc@340 429
mas01mc@292 430 // Search for pointID
mas01mc@340 431 if(p->snext){
mas01mc@292 432 __sbucket_insert_point(p->snext);
mas01mc@292 433 }
mas01mc@292 434 else{
mas01mc@340 435 // Make new point collision bucket at end of list
mas01mc@340 436 p->snext = new sbucket();
mas01mc@340 437 __sbucket_insert_point(p->snext);
mas01mc@292 438 }
mas01mc@292 439 }
mas01mc@292 440
mas01mc@293 441 inline bucket** H::get_bucket(int j){
mas01mc@292 442 return *(h+j);
mas01mc@292 443 }
mas01mc@292 444
mas01mc@340 445 // Find the linked-list pointer at the end of the CORE_ARRAY
mas01mc@340 446 bucket** H::get_pointer_to_bucket_linked_list(bucket* rowPtr){
mas01mc@344 447 Uns32T numBuckets = rowPtr->snext.numBuckets; // Cast pointer to unsigned int
mas01mc@343 448 Uns32T numPoints = rowPtr->t2 & 0x7FFFFFFF; // Value is stored in low 31 bits of t2 field
mas01mc@343 449 bucket** listPtr = reinterpret_cast<bucket**> (reinterpret_cast<unsigned int*>(rowPtr->next)+numPoints+numBuckets+1);
mas01mc@343 450 return listPtr;
mas01mc@340 451 }
mas01mc@340 452
mas01mc@293 453 // Interface to Locality Sensitive Hashing G
mas01mc@293 454 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
mas01mc@293 455 H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
mas01mc@308 456 indexName(0),
mas01mc@293 457 lshHeader(0),
mas01mc@292 458 calling_instance(0),
mas01mc@293 459 add_point_callback(0)
mas01mc@292 460 {
mas01mc@293 461
mas01mc@292 462 }
mas01mc@292 463
mas01mc@292 464 // Serialize from file LSH constructor
mas01mc@292 465 // Read parameters from database file
mas01mc@292 466 // Load the hash functions, close the database
mas01mc@292 467 // Optionally load the LSH tables into head-allocated lists in core
mas01mc@292 468 G::G(char* filename, bool lshInCoreFlag):
mas01mc@293 469 H(), // default base-class constructor call delays data-structure initialization
mas01mc@309 470 indexName(0),
mas01mc@293 471 lshHeader(0),
mas01mc@292 472 calling_instance(0),
mas01mc@292 473 add_point_callback(0)
mas01mc@292 474 {
mas01mc@292 475 int dbfid = unserialize_lsh_header(filename);
mas01mc@309 476 indexName = new char[O2_INDEX_MAXSTR];
mas01mc@309 477 strncpy(indexName, filename, O2_INDEX_MAXSTR); // COPY THE CONTENTS TO THE NEW POINTER
mas01mc@293 478 H::initialize_lsh_functions(); // Base-class data-structure initialization
mas01mc@293 479 unserialize_lsh_functions(dbfid); // populate with on-disk hashfunction values
mas01mc@292 480
mas01mc@292 481 // Format1 only needs unserializing if specifically requested
mas01mc@292 482 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){
mas01mc@292 483 unserialize_lsh_hashtables_format1(dbfid);
mas01mc@292 484 }
mas01mc@292 485
mas01mc@292 486 // Format2 always needs unserializing
mas01mc@292 487 if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){
mas01mc@336 488 FILE* dbFile = fdopen(dbfid, "rb");
mas01mc@336 489 if(!dbFile)
mas01mc@336 490 error("Cannot open LSH file for reading", filename);
mas01mc@336 491 unserialize_lsh_hashtables_format2(dbFile);
mas01mc@292 492 }
mas01mc@336 493 serial_close(dbfid);
mas01mc@336 494 }
mas01mc@292 495
mas01mc@292 496 G::~G(){
mas01mc@292 497 delete lshHeader;
mas01mc@292 498 }
mas01mc@292 499
mas01mc@292 500 // single point insertion; inserted values are hash value and pointID
mas01mc@292 501 Uns32T G::insert_point(vector<float>& v, Uns32T pp){
mas01mc@292 502 Uns32T collisionCount = 0;
mas01mc@292 503 H::p = pp;
mas01mc@299 504 if(H::maxp && pp<=H::maxp)
mas01mc@296 505 error("points must be indexed in strict ascending order", "LSH::insert_point(vector<float>&, Uns32T pointID)");
mas01mc@296 506 H::maxp=pp; // Store highest pointID in database
mas01mc@293 507 H::compute_hash_functions( v );
mas01mc@292 508 for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion
mas01mc@293 509 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01mc@292 510 collisionCount += bucket_insert_point( *(h + j) + t1 );
mas01mc@292 511 }
mas01mc@292 512 return collisionCount;
mas01mc@292 513 }
mas01mc@292 514
mas01mc@292 515
mas01mc@292 516 // batch insert for a point set
mas01mc@292 517 // inserted values are vector hash value and pointID starting at basePointID
mas01mc@292 518 void G::insert_point_set(vector<vector<float> >& vv, Uns32T basePointID){
mas01mc@292 519 for(Uns32T point=0; point<vv.size(); point++)
mas01mc@292 520 insert_point(vv[point], basePointID+point);
mas01mc@292 521 }
mas01mc@292 522
mas01mc@292 523 // point retrieval routine
mas01mc@292 524 void G::retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 525 calling_instance = caller;
mas01mc@292 526 add_point_callback = add_point;
mas01mc@293 527 H::compute_hash_functions( v );
mas01mc@292 528 for(Uns32T j = 0 ; j < H::L ; j++ ){
mas01mc@293 529 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
mas01cr@369 530 if( bucket* bPtr = *(get_bucket(j) + get_t1()) ) {
mas01mc@340 531 #ifdef LSH_LIST_HEAD_COUNTERS
mas01cr@369 532 if(bPtr->t2&LSH_CORE_ARRAY_BIT) {
mas01mc@340 533 retrieve_from_core_hashtable_array((Uns32T*)(bPtr->next), qpos);
mas01cr@369 534 } else {
mas01mc@340 535 bucket_chain_point( bPtr->next, qpos);
mas01cr@369 536 }
mas01mc@292 537 #else
mas01cr@369 538 bucket_chain_point( bPtr , qpos);
mas01mc@292 539 #endif
mas01cr@369 540 }
mas01mc@292 541 }
mas01mc@292 542 }
mas01mc@292 543
mas01mc@292 544 void G::retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 545 for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ )
mas01mc@292 546 retrieve_point(vv[qpos], qpos, add_point, caller);
mas01mc@292 547 }
mas01mc@292 548
mas01mc@292 549 // export lsh tables to table structure on disk
mas01mc@292 550 //
mas01mc@292 551 // LSH TABLE STRUCTURE
mas01mc@292 552 // ---header 64 bytes ---
mas01mc@292 553 // [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0]
mas01mc@292 554 //
mas01mc@292 555 // ---random projections L x k x d float ---
mas01mc@292 556 // A[0][0][0] A[0][0][1] ... A[0][0][d-1]
mas01mc@292 557 // A[0][1][0] A[0][1][1] ... A[1][1][d-1]
mas01mc@292 558 // ...
mas01mc@292 559 // A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1]
mas01mc@292 560 // ...
mas01mc@292 561 // ...
mas01mc@292 562 // A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1]
mas01mc@292 563 // A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1]
mas01mc@292 564 // ...
mas01mc@292 565 // A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1]
mas01mc@292 566 //
mas01mc@292 567 // ---bias L x k float ---
mas01mc@292 568 // b[0][0] b[0][1] ... b[0][k-1]
mas01mc@292 569 // b[1][0] b[1][1] ... b[1][k-1]
mas01mc@292 570 // ...
mas01mc@292 571 // b[L-1][0] b[L-1][1] ... b[L-1][k-1]
mas01mc@292 572 //
mas01mc@292 573 // ---random r1 L x k float ---
mas01mc@292 574 // r1[0][0] r1[0][1] ... r1[0][k-1]
mas01mc@292 575 // r1[1][0] r1[1][1] ... r1[1][k-1]
mas01mc@292 576 // ...
mas01mc@292 577 // r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1]
mas01mc@292 578 //
mas01mc@292 579 // ---random r2 L x k float ---
mas01mc@292 580 // r2[0][0] r2[0][1] ... r2[0][k-1]
mas01mc@292 581 // r2[1][0] r2[1][1] ... r2[1][k-1]
mas01mc@292 582 // ...
mas01mc@292 583 // r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1]
mas01mc@292 584 //
mas01mc@293 585 // ******* HASHTABLES FORMAT1 (optimized for LSH_ON_DISK retrieval) *******
mas01mc@292 586 // ---hash table 0: N x C x 8 ---
mas01mc@292 587 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 588 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 589 // ...
mas01mc@292 590 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 591 //
mas01mc@292 592 // ---hash table 1: N x C x 8 ---
mas01mc@292 593 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 594 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 595 // ...
mas01mc@292 596 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 597 //
mas01mc@292 598 // ...
mas01mc@292 599 //
mas01mc@292 600 // ---hash table L-1: N x C x 8 ---
mas01mc@292 601 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 602 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 603 // ...
mas01mc@292 604 // [t2 pointID][t2 pointID]...[t2 pointID]
mas01mc@292 605 //
mas01mc@293 606 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) *******
mas01mc@293 607 //
mas01mc@293 608 // State machine controlled by regular expression.
mas01mc@293 609 // legend:
mas01mc@293 610 //
mas01mc@306 611 // O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU
mas01mc@306 612 // O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU
mas01mc@306 613 // O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU
mas01mc@293 614 //
mas01mc@306 615 // T1 - T1 hash token
mas01mc@306 616 // t1 - t1 hash key (t1 range 0..2^29-1)
mas01mc@306 617 // T2 - T2 token
mas01mc@293 618 // t2 - t2 hash key (range 1..2^32-6)
mas01mc@293 619 // p - point identifier (range 0..2^32-1)
mas01mc@306 620 // E - end hash table token
mas01mc@293 621 // {...} required arguments
mas01mc@293 622 // [...] optional arguments
mas01mc@293 623 // * - match zero or more occurences
mas01mc@293 624 // + - match one or more occurences
mas01mc@293 625 // {...}^L - repeat argument L times
mas01mc@293 626 //
mas01mc@293 627 // FORMAT2 Regular expression:
mas01mc@306 628 // { [T1 t1 [T2 t2 p+]+ ]* E }^L
mas01mc@293 629 //
mas01mc@292 630
mas01mc@292 631 // Serial header constructors
mas01mc@292 632 SerialHeader::SerialHeader(){;}
mas01mc@296 633 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc):
mas01mc@292 634 lshMagic(O2_SERIAL_MAGIC),
mas01mc@292 635 binWidth(W),
mas01mc@292 636 numTables(L),
mas01mc@292 637 numRows(N),
mas01mc@292 638 numCols(C),
mas01mc@292 639 elementSize(O2_SERIAL_ELEMENT_SIZE),
mas01mc@296 640 version(O2_SERIAL_VERSION),
mas01mc@296 641 size(0), // we are deprecating this value
mas01mc@292 642 flags(FMT),
mas01mc@292 643 dataDim(d),
mas01mc@292 644 numFuns(k),
mas01mc@292 645 radius(r),
mas01mc@296 646 maxp(p),
mas01mc@296 647 size_long((unsigned long long)L * align_up((unsigned long long)N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables
mas01mc@296 648 + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions
mas01mc@296 649 (unsigned long long)L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())),
mas01mc@296 650 pointCount(pc){
mas01mc@296 651
mas01mc@296 652 if(FMT==O2_SERIAL_FILEFORMAT2)
mas01mc@296 653 size_long = (unsigned long long)align_up(O2_SERIAL_HEADER_SIZE
mas01mc@296 654 + (unsigned long long)L*k*(sizeof(float)*d+2+sizeof(Uns32T)
mas01mc@296 655 +sizeof(float)) + (unsigned long long)pc*16UL,get_page_logn());
mas01mc@296 656 } // header
mas01mc@292 657
mas01mc@292 658 float* G::get_serial_hashfunction_base(char* db){
mas01mc@292 659 if(db&&lshHeader)
mas01mc@292 660 return (float*)(db+O2_SERIAL_HEADER_SIZE);
mas01mc@292 661 else return NULL;
mas01mc@292 662 }
mas01mc@292 663
mas01mc@292 664 SerialElementT* G::get_serial_hashtable_base(char* db){
mas01mc@292 665 if(db&&lshHeader)
mas01mc@292 666 return (SerialElementT*)(db+get_serial_hashtable_offset());
mas01mc@292 667 else
mas01mc@292 668 return NULL;
mas01mc@292 669 }
mas01mc@292 670
mas01mc@292 671 Uns32T G::get_serial_hashtable_offset(){
mas01mc@292 672 if(lshHeader)
mas01mc@292 673 return align_up(O2_SERIAL_HEADER_SIZE +
mas01mc@292 674 L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn());
mas01mc@292 675 else
mas01mc@292 676 return 0;
mas01mc@292 677 }
mas01mc@292 678
mas01mc@292 679 void G::serialize(char* filename, Uns32T serialFormat){
mas01mc@292 680 int dbfid;
mas01mc@292 681 char* db;
mas01mc@292 682 int dbIsNew=0;
mas01mc@292 683
mas01mc@292 684 // Check requested serialFormat
mas01mc@292 685 if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2))
mas01mc@292 686 error("Unrecognized serial file format request: ", "serialize()");
mas01mc@296 687
mas01mc@292 688 // Test to see if file exists
mas01cr@369 689 if((dbfid = open (filename, O_RDONLY)) < 0) {
mas01mc@292 690 // If it doesn't, then create the file (CREATE)
mas01cr@369 691 if(errno == ENOENT) {
mas01mc@292 692 // Create the file
mas01mc@292 693 std::cout << "Creating new serialized LSH database:" << filename << "...";
mas01mc@292 694 std::cout.flush();
mas01mc@292 695 serial_create(filename, serialFormat);
mas01mc@292 696 dbIsNew=1;
mas01cr@369 697 } else {
mas01mc@292 698 // The file can't be opened
mas01mc@292 699 error("Can't open the file", filename, "open");
mas01cr@369 700 }
mas01cr@369 701 }
mas01mc@292 702
mas01mc@292 703 // Load the on-disk header into core
mas01mc@292 704 dbfid = serial_open(filename, 1); // open for write
mas01mc@292 705 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@292 706 serial_get_header(db); // read header
mas01mc@292 707 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 708
mas01mc@292 709 // Check compatibility of core and disk data structures
mas01mc@292 710 if( !serial_can_merge(serialFormat) )
mas01mc@292 711 error("Incompatible core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 712
mas01mc@292 713 // For new LSH databases write the hashfunctions
mas01mc@292 714 if(dbIsNew)
mas01mc@292 715 serialize_lsh_hashfunctions(dbfid);
mas01mc@292 716 // Write the hashtables in the requested format
mas01mc@292 717 if(serialFormat == O2_SERIAL_FILEFORMAT1)
mas01mc@292 718 serialize_lsh_hashtables_format1(dbfid, !dbIsNew);
mas01mc@336 719 else{
mas01mc@336 720 FILE* dbFile = fdopen(dbfid, "r+b");
mas01mc@336 721 if(!dbFile)
mas01mc@336 722 error("Cannot open LSH file for writing",filename);
mas01mc@336 723 serialize_lsh_hashtables_format2(dbFile, !dbIsNew);
mas01mc@336 724 fflush(dbFile);
mas01mc@336 725 }
mas01mc@292 726
mas01mc@336 727 if(!dbIsNew) {
mas01mc@292 728 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
mas01mc@292 729 //serial_get_header(db); // read header
mas01mc@293 730 cout << "maxp = " << H::maxp << endl;
mas01mc@293 731 lshHeader->maxp=H::maxp;
mas01mc@292 732 // Default to FILEFORMAT1
mas01mc@292 733 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2))
mas01mc@294 734 lshHeader->flags|=O2_SERIAL_FILEFORMAT1;
mas01mc@292 735 memcpy((char*)db, (char*)lshHeader, sizeof(SerialHeaderT));
mas01mc@292 736 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@336 737 }
mas01mc@336 738 serial_close(dbfid);
mas01mc@292 739 }
mas01mc@292 740
mas01mc@292 741 // Test to see if core structure and requested format is
mas01mc@292 742 // compatible with currently opened database
mas01mc@292 743 int G::serial_can_merge(Uns32T format){
mas01mc@292 744 SerialHeaderT* that = lshHeader;
mas01mc@292 745 if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 746 || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2)
mas01mc@292 747 || !( this->w == that->binWidth &&
mas01mc@296 748 this->L == that->numTables &&
mas01mc@296 749 this->N == that->numRows &&
mas01mc@296 750 this->k == that->numFuns &&
mas01mc@296 751 this->d == that->dataDim &&
mas01mc@296 752 sizeof(SerialElementT) == that->elementSize &&
mas01mc@296 753 this->radius == that->radius)){
mas01mc@292 754 serial_print_header(format);
mas01mc@292 755 return 0;
mas01mc@292 756 }
mas01mc@292 757 else
mas01mc@292 758 return 1;
mas01mc@292 759 }
mas01mc@292 760
mas01mc@292 761 // Used as an error message for serial_can_merge()
mas01mc@292 762 void G::serial_print_header(Uns32T format){
mas01mc@292 763 std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl;
mas01mc@292 764 std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl;
mas01mc@292 765 std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl;
mas01mc@292 766 std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl;
mas01mc@292 767 std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl;
mas01mc@292 768 std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl;
mas01mc@292 769 std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl;
mas01mc@292 770 std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl;
mas01mc@292 771 }
mas01mc@292 772
mas01mc@292 773 int G::serialize_lsh_hashfunctions(int fid){
mas01mc@292 774 float* pf;
mas01mc@292 775 Uns32T *pu;
mas01mc@292 776 Uns32T x,y,z;
mas01mc@292 777
mas01mc@293 778 char* db = serial_mmap(fid, get_serial_hashtable_offset(), 1);// get database pointer
mas01mc@292 779 pf = get_serial_hashfunction_base(db);
mas01mc@292 780
mas01mc@292 781 // HASH FUNCTIONS
mas01mc@292 782 // Write the random projectors A[][][]
mas01mc@292 783 #ifdef USE_U_FUNCTIONS
mas01mc@292 784 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 785 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 786 #else
mas01mc@292 787 for( x = 0 ; x < H::L ; x++ )
mas01mc@292 788 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 789 #endif
mas01mc@292 790 for( z = 0 ; z < d ; z++ )
mas01mc@293 791 *pf++ = H::A[x][y][z];
mas01mc@292 792
mas01mc@292 793 // Write the random biases b[][]
mas01mc@292 794 #ifdef USE_U_FUNCTIONS
mas01mc@292 795 for( x = 0 ; x < H::m ; x++ )
mas01mc@292 796 for( y = 0 ; y < H::k/2 ; y++ )
mas01mc@292 797 #else
mas01mc@292 798 for( x = 0 ; x < H::L ; x++ )
mas01mc@292 799 for( y = 0 ; y < H::k ; y++ )
mas01mc@292 800 #endif
mas01mc@293 801 *pf++ = H::b[x][y];
mas01mc@292 802
mas01mc@292 803 pu = (Uns32T*)pf;
mas01mc@292 804
mas01mc@292 805 // Write the Z projectors r1[][]
mas01mc@292 806 for( x = 0 ; x < H::L ; x++)
mas01mc@292 807 for( y = 0 ; y < H::k ; y++)
mas01mc@293 808 *pu++ = H::r1[x][y];
mas01mc@292 809
mas01mc@292 810 // Write the Z projectors r2[][]
mas01mc@292 811 for( x = 0 ; x < H::L ; x++)
mas01mc@292 812 for( y = 0; y < H::k ; y++)
mas01mc@293 813 *pu++ = H::r2[x][y];
mas01mc@292 814
mas01mc@292 815 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 816 return 1;
mas01mc@292 817 }
mas01mc@292 818
mas01mc@292 819 int G::serialize_lsh_hashtables_format1(int fid, int merge){
mas01mc@292 820 SerialElementT *pe, *pt;
mas01mc@292 821 Uns32T x,y;
mas01mc@292 822
mas01mc@292 823 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) )
mas01mc@292 824 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 825
mas01mc@292 826 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 827 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount;
mas01mc@292 828 // Write the hash tables
mas01mc@292 829 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 830 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1...";
mas01mc@292 831 std::cout.flush();
mas01mc@292 832 // memory map a single hash table for sequential access
mas01mc@292 833 // Align each hash table to page boundary
mas01mc@292 834 char* dbtable = serial_mmap(fid, hashTableSize, 1,
mas01mc@292 835 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@324 836 #ifdef __CYGWIN__
mas01mc@324 837 // No madvise in CYGWIN
mas01mc@324 838 #else
mas01mc@292 839 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 840 error("could not advise hashtable memory","","madvise");
mas01mc@324 841 #endif
mas01mc@292 842 maxColCount=0;
mas01mc@292 843 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 844 meanColCount=0;
mas01mc@292 845 colCountN=0;
mas01mc@292 846 pt=(SerialElementT*)dbtable;
mas01mc@292 847 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 848 // Move disk pointer to beginning of row
mas01mc@292 849 pe=pt+y*lshHeader->numCols;
mas01mc@292 850
mas01mc@292 851 colCount=0;
mas01cr@369 852 if(bucket* bPtr = h[x][y]) {
mas01cr@369 853 if(merge) {
mas01mc@340 854 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@292 855 serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01cr@369 856 } else {
mas01mc@292 857 serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 858 #else
mas01cr@369 859 serial_merge_hashtable_row_format1(pe, bPtr, colCount);
mas01cr@369 860 } else {
mas01cr@369 861 serial_write_hashtable_row_format1(pe, bPtr, colCount);
mas01mc@292 862 #endif
mas01cr@369 863 }
mas01cr@369 864 }
mas01mc@292 865 if(colCount){
mas01mc@292 866 if(colCount<minColCount)
mas01mc@292 867 minColCount=colCount;
mas01mc@292 868 if(colCount>maxColCount)
mas01mc@292 869 maxColCount=colCount;
mas01mc@292 870 meanColCount+=colCount;
mas01mc@292 871 colCountN++;
mas01mc@292 872 }
mas01mc@292 873 }
mas01mc@292 874 if(colCountN)
mas01mc@292 875 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 876 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 877 << endl;
mas01mc@292 878 serial_munmap(dbtable, hashTableSize);
mas01mc@292 879 }
mas01mc@292 880
mas01mc@292 881 // We're done writing
mas01mc@292 882 return 1;
mas01mc@292 883 }
mas01mc@292 884
mas01mc@292 885 void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){
mas01mc@292 886 while(b && b->t2!=IFLAG){
mas01mc@292 887 SerialElementT*pe=pr; // reset disk pointer to beginning of row
mas01mc@344 888 serial_merge_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 889 b=b->next;
mas01mc@292 890 }
mas01mc@292 891 }
mas01mc@292 892
mas01mc@292 893 void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 894 while(sb){
mas01mc@292 895 if(colCount==lshHeader->numCols){
mas01mc@292 896 std::cout << "!point-chain full " << endl;
mas01mc@292 897 return;
mas01mc@292 898 }
mas01mc@292 899 Uns32T c=0;
mas01mc@292 900 // Merge collision chains
mas01mc@292 901 while(c<lshHeader->numCols){
mas01mc@292 902 if( (pe+c)->hashValue==IFLAG){
mas01mc@292 903 (pe+c)->hashValue=t2;
mas01mc@292 904 (pe+c)->pointID=sb->pointID;
mas01mc@292 905 colCount=c+1;
mas01mc@292 906 if(c+1<lshHeader->numCols)
mas01mc@292 907 (pe+c+1)->hashValue=IFLAG;
mas01mc@292 908 break;
mas01mc@292 909 }
mas01mc@292 910 c++;
mas01mc@292 911 }
mas01mc@292 912 sb=sb->snext;
mas01mc@292 913 }
mas01mc@292 914 return;
mas01mc@292 915 }
mas01mc@292 916
mas01mc@292 917 void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){
mas01mc@292 918 pe->hashValue=IFLAG;
mas01mc@292 919 while(b && b->t2!=IFLAG){
mas01mc@344 920 serial_write_element_format1(pe, b->snext.ptr, b->t2, colCount);
mas01mc@292 921 b=b->next;
mas01mc@292 922 }
mas01mc@292 923 }
mas01mc@292 924
mas01mc@292 925 void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
mas01mc@292 926 while(sb){
mas01mc@292 927 if(colCount==lshHeader->numCols){
mas01mc@292 928 std::cout << "!point-chain full " << endl;
mas01mc@292 929 return;
mas01mc@292 930 }
mas01mc@292 931 pe->hashValue=t2;
mas01mc@292 932 pe->pointID=sb->pointID;
mas01mc@292 933 pe++;
mas01mc@292 934 colCount++;
mas01mc@292 935 sb=sb->snext;
mas01mc@292 936 }
mas01mc@292 937 pe->hashValue=IFLAG;
mas01mc@292 938 return;
mas01mc@292 939 }
mas01mc@292 940
mas01mc@336 941 int G::serialize_lsh_hashtables_format2(FILE* dbFile, int merge){
mas01mc@292 942 Uns32T x,y;
mas01mc@292 943
mas01mc@292 944 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
mas01mc@292 945 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
mas01mc@292 946
mas01mc@340 947 // We must pereform FORMAT2 merges in core FORMAT1 (dynamic list structure)
mas01mc@292 948 if(merge)
mas01mc@340 949 unserialize_lsh_hashtables_format2(dbFile, merge);
mas01mc@292 950
mas01mc@292 951 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
mas01mc@336 952 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 953 fclose(dbFile);
mas01mc@336 954 error("fSeek error in serialize_lsh_hashtables_format2");
mas01mc@336 955 }
mas01mc@292 956
mas01mc@292 957 // Write the hash tables
mas01mc@292 958 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 959 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2...";
mas01mc@292 960 std::cout.flush();
mas01mc@292 961 maxColCount=0;
mas01mc@292 962 minColCount=O2_SERIAL_MAX_COLS;
mas01mc@292 963 meanColCount=0;
mas01mc@292 964 colCountN=0;
mas01mc@292 965 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 966 colCount=0;
mas01mc@292 967 if(bucket* bPtr = h[x][y]){
mas01mc@306 968 // Check for empty row (even though row was allocated)
mas01mc@340 969 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@306 970 if(bPtr->next->t2==IFLAG){
mas01mc@336 971 fclose(dbFile);
mas01mc@306 972 error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 973 }
mas01mc@306 974 #else
mas01mc@306 975 if(bPtr->t2==IFLAG){
mas01mc@336 976 fclose(dbFile);
mas01mc@306 977 error("b->t2==IFLAG","serialize_lsh_hashtables_format2()");
mas01mc@306 978 }
mas01mc@306 979 #endif
mas01mc@306 980 t1 = O2_SERIAL_TOKEN_T1;
mas01mc@340 981 WRITE_UNS32(&t1, "[T1]");
mas01mc@306 982 t1 = y;
mas01mc@340 983 WRITE_UNS32(&t1, "[t1]");
mas01mc@340 984 #ifdef LSH_CORE_ARRAY
mas01mc@340 985 t1 = count_buckets_and_points_hashtable_row(bPtr);
mas01mc@340 986 WRITE_UNS32(&t1,"[count]"); // write numElements
mas01mc@340 987 #endif
mas01mc@340 988 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@336 989 serial_write_hashtable_row_format2(dbFile, bPtr->next, colCount); // skip collision counter bucket
mas01mc@292 990 #else
mas01mc@340 991 serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
mas01mc@292 992 #endif
mas01mc@292 993 }
mas01mc@292 994 if(colCount){
mas01mc@292 995 if(colCount<minColCount)
mas01mc@292 996 minColCount=colCount;
mas01mc@292 997 if(colCount>maxColCount)
mas01mc@292 998 maxColCount=colCount;
mas01mc@292 999 meanColCount+=colCount;
mas01mc@292 1000 colCountN++;
mas01mc@292 1001 }
mas01mc@292 1002 }
mas01mc@292 1003 // Write END of table marker
mas01mc@306 1004 t1 = O2_SERIAL_TOKEN_ENDTABLE;
mas01mc@340 1005 WRITE_UNS32(&t1,"[end]");
mas01mc@292 1006 if(colCountN)
mas01mc@292 1007 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
mas01mc@292 1008 << ", min = " << minColCount << ", max = " << maxColCount
mas01mc@292 1009 << endl;
mas01mc@340 1010 }
mas01mc@292 1011 // We're done writing
mas01mc@292 1012 return 1;
mas01mc@292 1013 }
mas01mc@292 1014
mas01mc@340 1015 Uns32T G::count_buckets_and_points_hashtable_row(bucket* bPtr){
mas01mc@340 1016 Uns32T total_count = 0;
mas01mc@340 1017 bucket* p = 0;
mas01mc@340 1018
mas01mc@340 1019 // count points
mas01mc@340 1020 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1021 total_count = bPtr->t2; // points already counted
mas01mc@340 1022 p = bPtr->next;
mas01mc@340 1023 #else
mas01mc@340 1024 total_count = count_points_hashtable_row(bPtr);
mas01mc@340 1025 p = bPtr;
mas01mc@340 1026 #endif
mas01mc@340 1027
mas01mc@340 1028 // count buckets
mas01mc@340 1029 do{
mas01mc@340 1030 total_count++;
mas01mc@340 1031 }while((p=p->next));
mas01mc@340 1032
mas01mc@340 1033 return total_count;
mas01mc@340 1034 }
mas01mc@340 1035
mas01mc@340 1036 Uns32T G::count_points_hashtable_row(bucket* bPtr){
mas01mc@340 1037 Uns32T point_count = 0;
mas01mc@340 1038 bucket* p = bPtr;
mas01mc@340 1039 sbucket* s = 0;
mas01mc@340 1040 while(p){
mas01mc@344 1041 s = p->snext.ptr;
mas01mc@340 1042 while(s){
mas01mc@340 1043 point_count++;
mas01mc@340 1044 s=s->snext;
mas01mc@340 1045 }
mas01mc@340 1046 p=p->next;
mas01mc@340 1047 }
mas01mc@340 1048 return point_count;
mas01mc@340 1049 }
mas01mc@340 1050
mas01mc@336 1051 void G::serial_write_hashtable_row_format2(FILE* dbFile, bucket* b, Uns32T& colCount){
mas01mc@292 1052 while(b && b->t2!=IFLAG){
mas01mc@344 1053 if(!b->snext.ptr){
mas01mc@336 1054 fclose(dbFile);
mas01mc@306 1055 error("Empty collision chain in serial_write_hashtable_row_format2()");
mas01mc@306 1056 }
mas01mc@306 1057 t2 = O2_SERIAL_TOKEN_T2;
mas01mc@336 1058 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1059 fclose(dbFile);
mas01mc@292 1060 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1061 }
mas01mc@292 1062 t2 = b->t2;
mas01mc@336 1063 if( fwrite(&t2, sizeof(Uns32T), 1, dbFile) != 1 ){
mas01mc@336 1064 fclose(dbFile);
mas01mc@292 1065 error("write error in serial_write_hashtable_row_format2()");
mas01mc@292 1066 }
mas01mc@344 1067 serial_write_element_format2(dbFile, b->snext.ptr, colCount);
mas01mc@292 1068 b=b->next;
mas01mc@292 1069 }
mas01mc@292 1070 }
mas01mc@292 1071
mas01mc@336 1072 void G::serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount){
mas01mc@292 1073 while(sb){
mas01mc@336 1074 if(fwrite(&sb->pointID, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1075 fclose(dbFile);
mas01mc@292 1076 error("Write error in serial_write_element_format2()");
mas01mc@292 1077 }
mas01mc@292 1078 colCount++;
mas01mc@292 1079 sb=sb->snext;
mas01mc@292 1080 }
mas01mc@292 1081 }
mas01mc@292 1082
mas01mc@292 1083
mas01mc@292 1084 int G::serial_create(char* filename, Uns32T FMT){
mas01mc@292 1085 return serial_create(filename, w, L, N, C, k, d, FMT);
mas01mc@292 1086 }
mas01mc@292 1087
mas01mc@292 1088
mas01mc@292 1089 int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols,
mas01mc@292 1090 Uns32T numFuns, Uns32T dim, Uns32T FMT){
mas01mc@292 1091
mas01mc@292 1092 if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS
mas01mc@292 1093 || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS
mas01mc@292 1094 || dim>O2_SERIAL_MAX_DIM){
mas01mc@292 1095 error("LSH parameters out of bounds for serialization");
mas01mc@292 1096 }
mas01mc@292 1097
mas01mc@292 1098 int dbfid;
mas01mc@292 1099 if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
mas01mc@292 1100 error("Can't create serial file", filename, "open");
mas01mc@292 1101 get_lock(dbfid, 1);
mas01mc@292 1102
mas01mc@292 1103 // Make header first to get size of serialized database
mas01mc@296 1104 lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT, pointCount);
mas01mc@296 1105
mas01mc@296 1106 cout << "file size: <=" << lshHeader->get_size()/1024UL << "KB" << endl;
mas01mc@296 1107 if(lshHeader->get_size()>O2_SERIAL_MAXFILESIZE)
mas01mc@296 1108 error("Maximum size of LSH file exceded: > 4000MB");
mas01mc@296 1109
mas01mc@292 1110 // go to the location corresponding to the last byte
mas01mc@292 1111 if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1)
mas01mc@292 1112 error("lseek error in db file", "", "lseek");
mas01mc@292 1113
mas01mc@292 1114 // write a dummy byte at the last location
mas01mc@292 1115 if (write (dbfid, "", 1) != 1)
mas01mc@292 1116 error("write error", "", "write");
mas01mc@292 1117
mas01mc@293 1118 char* db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);
mas01mc@296 1119
mas01mc@292 1120 memcpy (db, lshHeader, O2_SERIAL_HEADER_SIZE);
mas01mc@296 1121
mas01mc@292 1122 serial_munmap(db, O2_SERIAL_HEADER_SIZE);
mas01mc@296 1123
mas01mc@292 1124 close(dbfid);
mas01mc@292 1125
mas01mc@292 1126 std::cout << "done initializing tables." << endl;
mas01mc@292 1127
mas01mc@292 1128 return 1;
mas01mc@292 1129 }
mas01mc@292 1130
mas01mc@292 1131 char* G::serial_mmap(int dbfid, Uns32T memSize, Uns32T forWrite, off_t offset){
mas01mc@293 1132 char* db;
mas01mc@292 1133 if(forWrite){
mas01mc@292 1134 if ((db = (char*) mmap(0, memSize, PROT_READ | PROT_WRITE,
mas01mc@292 1135 MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@292 1136 error("mmap error in request for writable serialized database", "", "mmap");
mas01mc@292 1137 }
mas01mc@292 1138 else if ((db = (char*) mmap(0, memSize, PROT_READ, MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
mas01mc@292 1139 error("mmap error in read-only serialized database", "", "mmap");
mas01mc@292 1140
mas01mc@292 1141 return db;
mas01mc@292 1142 }
mas01mc@292 1143
mas01mc@292 1144 SerialHeaderT* G::serial_get_header(char* db){
mas01mc@292 1145 lshHeader = new SerialHeaderT();
mas01mc@292 1146 memcpy((char*)lshHeader, db, sizeof(SerialHeaderT));
mas01mc@292 1147
mas01mc@292 1148 if(lshHeader->lshMagic!=O2_SERIAL_MAGIC)
mas01mc@292 1149 error("Not an LSH database file");
mas01mc@292 1150
mas01mc@292 1151 return lshHeader;
mas01mc@292 1152 }
mas01mc@292 1153
mas01mc@292 1154 void G::serial_munmap(char* db, Uns32T N){
mas01mc@292 1155 munmap(db, N);
mas01mc@292 1156 }
mas01mc@292 1157
mas01mc@292 1158 int G::serial_open(char* filename, int writeFlag){
mas01mc@292 1159 int dbfid;
mas01mc@292 1160 if(writeFlag){
mas01mc@292 1161 if ((dbfid = open (filename, O_RDWR)) < 0)
mas01mc@292 1162 error("Can't open serial file for read/write", filename, "open");
mas01mc@292 1163 get_lock(dbfid, writeFlag);
mas01mc@292 1164 }
mas01mc@292 1165 else{
mas01mc@292 1166 if ((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1167 error("Can't open serial file for read", filename, "open");
mas01mc@292 1168 get_lock(dbfid, 0);
mas01mc@292 1169 }
mas01mc@292 1170
mas01mc@292 1171 return dbfid;
mas01mc@292 1172 }
mas01mc@292 1173
mas01mc@292 1174 void G::serial_close(int dbfid){
mas01mc@292 1175
mas01mc@292 1176 release_lock(dbfid);
mas01mc@292 1177 close(dbfid);
mas01mc@292 1178 }
mas01mc@292 1179
mas01mc@292 1180 int G::unserialize_lsh_header(char* filename){
mas01mc@292 1181
mas01mc@292 1182 int dbfid;
mas01mc@292 1183 char* db;
mas01mc@292 1184 // Test to see if file exists
mas01mc@292 1185 if((dbfid = open (filename, O_RDONLY)) < 0)
mas01mc@292 1186 error("Can't open the file", filename, "open");
mas01mc@292 1187 close(dbfid);
mas01mc@292 1188 dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1189 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1190 serial_get_header(db); // read header
mas01mc@292 1191 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
mas01mc@292 1192
mas01mc@292 1193 // Unserialize header parameters
mas01mc@292 1194 H::L = lshHeader->numTables;
mas01mc@292 1195 H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0);
mas01mc@292 1196 H::N = lshHeader->numRows;
mas01mc@292 1197 H::C = lshHeader->numCols;
mas01mc@292 1198 H::k = lshHeader->numFuns;
mas01mc@292 1199 H::d = lshHeader->dataDim;
mas01mc@293 1200 H::w = lshHeader->binWidth;
mas01mc@293 1201 H::radius = lshHeader->radius;
mas01mc@293 1202 H::maxp = lshHeader->maxp;
mas01mc@296 1203 H::pointCount = lshHeader->pointCount;
mas01mc@292 1204
mas01mc@292 1205 return dbfid;
mas01mc@292 1206 }
mas01mc@292 1207
mas01mc@292 1208 // unserialize the LSH parameters
mas01mc@292 1209 // we leave the LSH tree on disk as a flat file
mas01mc@292 1210 // it is this flat file that we search by memory mapping
mas01mc@292 1211 void G::unserialize_lsh_functions(int dbfid){
mas01mc@292 1212 Uns32T j, kk;
mas01mc@292 1213 float* pf;
mas01mc@292 1214 Uns32T* pu;
mas01mc@292 1215
mas01mc@292 1216 // Load the hash functions into core
mas01mc@292 1217 char* db = serial_mmap(dbfid, get_serial_hashtable_offset(), 0);// get database pointer again
mas01mc@292 1218
mas01mc@292 1219 pf = get_serial_hashfunction_base(db);
mas01mc@292 1220
mas01mc@292 1221 #ifdef USE_U_FUNCTIONS
mas01mc@292 1222 for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v)
mas01mc@292 1223 for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1224 #else
mas01mc@292 1225 for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v)
mas01mc@292 1226 for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions
mas01mc@292 1227 #endif
mas01mc@292 1228 for(Uns32T i = 0 ; i < H::d ; i++ )
mas01mc@293 1229 H::A[j][kk][i] = *pf++; // Normally distributed random vectors
mas01mc@292 1230 }
mas01mc@292 1231 }
mas01mc@292 1232 #ifdef USE_U_FUNCTIONS
mas01mc@292 1233 for( j = 0 ; j < H::m ; j++ ) // biases b
mas01mc@292 1234 for( kk = 0 ; kk < H::k/2 ; kk++ )
mas01mc@292 1235 #else
mas01mc@292 1236 for( j = 0 ; j < H::L ; j++ ) // biases b
mas01mc@292 1237 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1238 #endif
mas01mc@293 1239 H::b[j][kk] = *pf++;
mas01mc@292 1240
mas01mc@292 1241 pu = (Uns32T*)pf;
mas01mc@292 1242 for( j = 0 ; j < H::L ; j++ ) // Z projectors r1
mas01mc@292 1243 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1244 H::r1[j][kk] = *pu++;
mas01mc@292 1245
mas01mc@292 1246 for( j = 0 ; j < H::L ; j++ ) // Z projectors r2
mas01mc@292 1247 for( kk = 0 ; kk < H::k ; kk++ )
mas01mc@292 1248 H::r2[j][kk] = *pu++;
mas01mc@292 1249
mas01mc@293 1250 serial_munmap(db, get_serial_hashtable_offset());
mas01mc@292 1251 }
mas01mc@292 1252
mas01mc@292 1253 void G::unserialize_lsh_hashtables_format1(int fid){
mas01mc@292 1254 SerialElementT *pe, *pt;
mas01mc@292 1255 Uns32T x,y;
mas01mc@292 1256 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1257 // Read the hash tables into core
mas01mc@292 1258 for( x = 0 ; x < H::L ; x++ ){
mas01mc@292 1259 // memory map a single hash table
mas01mc@292 1260 // Align each hash table to page boundary
mas01mc@292 1261 char* dbtable = serial_mmap(fid, hashTableSize, 0,
mas01mc@292 1262 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
mas01mc@324 1263 #ifdef __CYGWIN__
mas01mc@324 1264 // No madvise in CYGWIN
mas01mc@324 1265 #else
mas01mc@292 1266 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 1267 error("could not advise hashtable memory","","madvise");
mas01mc@324 1268 #endif
mas01mc@292 1269 pt=(SerialElementT*)dbtable;
mas01mc@292 1270 for( y = 0 ; y < H::N ; y++ ){
mas01mc@292 1271 // Move disk pointer to beginning of row
mas01mc@292 1272 pe=pt+y*lshHeader->numCols;
mas01mc@292 1273 unserialize_hashtable_row_format1(pe, h[x]+y);
mas01mc@340 1274 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1275 printf("S[%d,%d]", x, y);
mas01mc@292 1276 serial_bucket_dump(pe);
mas01mc@292 1277 printf("C[%d,%d]", x, y);
mas01mc@292 1278 dump_hashtable_row(h[x][y]);
mas01mc@292 1279 #endif
mas01mc@292 1280 }
mas01mc@292 1281 serial_munmap(dbtable, hashTableSize);
mas01mc@292 1282 }
mas01mc@292 1283 }
mas01mc@292 1284
mas01mc@292 1285 void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){
mas01mc@292 1286 Uns32T colCount = 0;
mas01mc@292 1287 while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){
mas01mc@292 1288 H::p = pe->pointID; // current point ID
mas01mc@292 1289 t2 = pe->hashValue;
mas01mc@292 1290 bucket_insert_point(b);
mas01mc@292 1291 pe++;
mas01mc@292 1292 colCount++;
mas01mc@292 1293 }
mas01mc@292 1294 }
mas01mc@292 1295
mas01mc@340 1296 void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
mas01mc@292 1297 Uns32T x=0,y=0;
mas01mc@292 1298
mas01mc@292 1299 // Seek to hashtable base offset
mas01mc@336 1300 if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
mas01mc@336 1301 fclose(dbFile);
mas01mc@336 1302 error("fSeek error in unserialize_lsh_hashtables_format2");
mas01mc@292 1303 }
mas01mc@292 1304
mas01mc@292 1305 // Read the hash tables into core (structure is given in header)
mas01mc@292 1306 while( x < H::L){
mas01mc@336 1307 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1308 fclose(dbFile);
mas01mc@292 1309 error("Read error","unserialize_lsh_hashtables_format2()");
mas01mc@292 1310 }
mas01mc@306 1311 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE)
mas01mc@292 1312 x++; // End of table
mas01mc@292 1313 else
mas01mc@292 1314 while(y < H::N){
mas01mc@306 1315 // Read a row and move file pointer to beginning of next row or _bittable
mas01mc@306 1316 if(!(H::t1==O2_SERIAL_TOKEN_T1)){
mas01mc@336 1317 fclose(dbFile);
mas01mc@306 1318 error("State matchine error T1","unserialize_lsh_hashtables_format2()");
mas01mc@292 1319 }
mas01mc@336 1320 if(fread(&(H::t1), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1321 fclose(dbFile);
mas01mc@306 1322 error("Read error: t1","unserialize_lsh_hashtables_format2()");
mas01mc@306 1323 }
mas01mc@306 1324 y = H::t1;
mas01mc@292 1325 if(y>=H::N){
mas01mc@336 1326 fclose(dbFile);
mas01mc@292 1327 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
mas01mc@292 1328 }
mas01mc@340 1329 Uns32T token = 0;
mas01mc@340 1330 #ifdef LSH_CORE_ARRAY
mas01mc@340 1331 Uns32T numElements;
mas01mc@340 1332 if(fread(&numElements, sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1333 fclose(dbFile);
mas01mc@340 1334 error("Read error: numElements","unserialize_lsh_hashtables_format2()");
mas01mc@340 1335 }
mas01mc@292 1336
mas01mc@340 1337 // BACKWARD COMPATIBILITY: check to see if T2 or END token was read
mas01mc@340 1338 if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
mas01mc@340 1339 forMerge=true; // Force use of dynamic linked list core format
mas01mc@340 1340 token = numElements;
mas01mc@340 1341 }
mas01mc@340 1342
mas01mc@340 1343 if(forMerge)
mas01mc@340 1344 // Use linked list CORE format
mas01mc@340 1345 token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
mas01mc@340 1346 else
mas01mc@340 1347 // Use ARRAY CORE format with numElements counter
mas01mc@340 1348 token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
mas01mc@340 1349 #else
mas01mc@340 1350 token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
mas01mc@340 1351 #endif
mas01mc@340 1352
mas01mc@340 1353 #ifdef LSH_DUMP_CORE_TABLES
mas01mc@292 1354 printf("C[%d,%d]", x, y);
mas01mc@292 1355 dump_hashtable_row(h[x][y]);
mas01mc@292 1356 #endif
mas01mc@292 1357 // Check that token is valid
mas01mc@306 1358 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){
mas01mc@336 1359 fclose(dbFile);
mas01mc@292 1360 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
mas01mc@292 1361 }
mas01mc@292 1362 // Check for end of table flag
mas01mc@306 1363 if(token==O2_SERIAL_TOKEN_ENDTABLE){
mas01mc@292 1364 x++;
mas01mc@292 1365 break;
mas01mc@292 1366 }
mas01mc@292 1367 // Check for new row flag
mas01mc@306 1368 if(token==O2_SERIAL_TOKEN_T1)
mas01mc@292 1369 H::t1 = token;
mas01mc@292 1370 }
mas01mc@292 1371 }
mas01mc@306 1372 }
mas01mc@292 1373
mas01mc@340 1374 Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token){
mas01mc@292 1375 bool pointFound = false;
mas01mc@340 1376
mas01mc@340 1377 if(token)
mas01mc@340 1378 H::t2 = token;
mas01mc@340 1379 else if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@340 1380 fclose(dbFile);
mas01mc@340 1381 error("Read error T2 token","unserialize_hashtable_row_format2");
mas01mc@340 1382 }
mas01mc@340 1383
mas01mc@306 1384 if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){
mas01mc@336 1385 fclose(dbFile);
mas01mc@292 1386 error("State machine error: expected E or T2");
mas01mc@292 1387 }
mas01mc@340 1388
mas01mc@306 1389 while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){
mas01mc@292 1390 pointFound=false;
mas01mc@292 1391 // Check for T2 token
mas01mc@306 1392 if(H::t2!=O2_SERIAL_TOKEN_T2){
mas01mc@336 1393 fclose(dbFile);
mas01mc@292 1394 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
mas01mc@306 1395 }
mas01mc@292 1396 // Read t2 value
mas01mc@336 1397 if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1398 fclose(dbFile);
mas01mc@292 1399 error("Read error t2","unserialize_hashtable_row_format2");
mas01mc@292 1400 }
mas01mc@336 1401 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1402 fclose(dbFile);
mas01mc@292 1403 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1404 }
mas01mc@306 1405 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@292 1406 pointFound=true;
mas01mc@292 1407 bucket_insert_point(b);
mas01mc@336 1408 if(fread(&(H::p), sizeof(Uns32T), 1, dbFile) != 1){
mas01mc@336 1409 fclose(dbFile);
mas01mc@292 1410 error("Read error H::p","unserialize_hashtable_row_format2");
mas01mc@292 1411 }
mas01mc@292 1412 }
mas01mc@292 1413 if(!pointFound)
mas01mc@292 1414 error("State machine error: point", "unserialize_hashtable_row_format2()");
mas01mc@306 1415 H::t2 = H::p; // Copy last found token to t2
mas01mc@292 1416 }
mas01mc@292 1417 return H::t2; // holds current token
mas01mc@292 1418 }
mas01mc@292 1419
mas01mc@340 1420 // Unserialize format2 hashtable row to a CORE_ARRAY pointed to
mas01mc@340 1421 // by the hashtable row pointer: rowPtr
mas01mc@340 1422 //
mas01mc@340 1423 // numElements is the total number of t2 buckets plus points
mas01mc@340 1424 // memory required is numElements+1+sizeof(hashtable ptr)
mas01mc@340 1425 //
mas01mc@340 1426 // numElements = numPoints + numBuckets
mas01mc@340 1427 //
mas01mc@340 1428 // During inserts (merges) new hashtable entries are appened at rowPtr+numPoints+numBuckets+1
mas01mc@340 1429 //
mas01mc@340 1430 // ASSUME: that LSH_LIST_HEAD_COUNTERS is set so that the first hashtable bucket is used to count
mas01mc@340 1431 // point and bucket entries
mas01mc@340 1432 //
mas01mc@340 1433 // We store the values of numPoints and numBuckets in separate fields of the first bucket
mas01mc@340 1434 // rowPtr->t2 // numPoints
mas01mc@340 1435 // (Uns32T)(rowPtr->snext) // numBuckets
mas01mc@340 1436 //
mas01mc@340 1437 // We cast the rowPtr->next pointer to (Uns32*) malloc(numElements*sizeof(Uns32T) + sizeof(bucket*))
mas01mc@340 1438 // To get to the fist bucket, we use
mas01mc@340 1439 //
mas01mc@340 1440
mas01mc@340 1441 #define READ_UNS32T(VAL,TOKENSTR) if(fread(VAL, sizeof(Uns32T), 1, dbFile) != 1){\
mas01mc@340 1442 fclose(dbFile);error("Read error unserialize_hashtable_format2",TOKENSTR);}
mas01mc@340 1443
mas01mc@340 1444 #define TEST_TOKEN(TEST, TESTSTR) if(TEST){fclose(dbFile);error("State machine error: ", TESTSTR);}
mas01mc@340 1445
mas01mc@340 1446 #define SKIP_BITS_LEFT_SHIFT_MSB (30)
mas01mc@340 1447
mas01mc@340 1448 #define SKIP_BITS_RIGHT_SHIFT_MSB (28)
mas01mc@340 1449 #define SKIP_BITS_RIGHT_SHIFT_LSB (30)
mas01mc@340 1450
mas01mc@340 1451 #define MAX_POINTS_IN_BUCKET_CORE_ARRAY (16)
mas01mc@340 1452 #define LSH_CORE_ARRAY_END_ROW_TOKEN (0xFFFFFFFD)
mas01mc@340 1453
mas01mc@340 1454 // Encode the skip bits. Zero if only one point, MAX 8 (plus first == 9)
mas01mc@340 1455 #define ENCODE_POINT_SKIP_BITS TEST_TOKEN(!numPointsThisBucket, "no points found");\
mas01mc@340 1456 if(numPointsThisBucket==1){\
mas01mc@340 1457 secondPtr=ap++;\
mas01mc@340 1458 *secondPtr=0;\
mas01mc@340 1459 numPoints++;\
mas01mc@340 1460 }\
mas01mc@340 1461 if(numPointsThisBucket>1){\
mas01mc@340 1462 *firstPtr |= ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
mas01mc@340 1463 *secondPtr |= ( ( (numPointsThisBucket-1) & 0xC) >> 2 ) << SKIP_BITS_LEFT_SHIFT_MSB;}
mas01mc@340 1464
mas01mc@340 1465 Uns32T G::unserialize_hashtable_row_to_array(FILE* dbFile, bucket** rowPP, Uns32T numElements){
mas01mc@340 1466 Uns32T numPointsThisBucket = 0;
mas01mc@340 1467 Uns32T numBuckets = 0;
mas01mc@340 1468 Uns32T numPoints = 0;
mas01mc@340 1469 Uns32T* firstPtr = 0;
mas01mc@340 1470 Uns32T* secondPtr = 0;
mas01mc@340 1471
mas01mc@340 1472 // Initialize new row
mas01mc@340 1473 if(!*rowPP){
mas01mc@340 1474 *rowPP = new bucket();
mas01mc@340 1475 #ifdef LSH_LIST_HEAD_COUNTERS
mas01mc@340 1476 (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
mas01mc@340 1477 (*rowPP)->next = 0;
mas01mc@340 1478 #endif
mas01mc@340 1479 }
mas01mc@340 1480 bucket* rowPtr = *rowPP;
mas01mc@340 1481
mas01mc@340 1482 READ_UNS32T(&(H::t2),"t2");
mas01mc@340 1483 TEST_TOKEN(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2), "expected E or T2");
mas01mc@340 1484 // Because we encode points in 16-point blocks, we sometimes allocate repeated t2 elements
mas01mc@340 1485 // So over-allocate by a factor of two and realloc later to actual numElements
mas01mc@340 1486 CR_ASSERT(rowPtr->next = (bucket*) malloc((2*numElements+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1487 Uns32T* ap = reinterpret_cast<Uns32T*>(rowPtr->next); // Cast pointer to Uns32T* for array format
mas01mc@340 1488 while( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1) ){
mas01mc@340 1489 numPointsThisBucket = 0;// reset bucket point counter
mas01mc@340 1490 secondPtr = 0; // reset second-point pointer
mas01mc@340 1491 TEST_TOKEN(H::t2!=O2_SERIAL_TOKEN_T2, "expected T2");
mas01mc@340 1492 READ_UNS32T(&(H::t2), "Read error t2");
mas01mc@340 1493 *ap++ = H::t2; // Insert t2 value into array
mas01mc@340 1494 numBuckets++;
mas01mc@340 1495 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1496 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
mas01mc@340 1497 if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
mas01mc@340 1498 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1499 *ap++ = H::t2; // Extra element
mas01mc@340 1500 numBuckets++; // record this as a new bucket
mas01mc@340 1501 numPointsThisBucket=0; // reset bucket point counter
mas01mc@340 1502 secondPtr = 0; // reset second-point pointer
mas01mc@340 1503 }
mas01mc@340 1504 if( ++numPointsThisBucket == 1 )
mas01mc@340 1505 firstPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1506 else if( numPointsThisBucket == 2 )
mas01mc@340 1507 secondPtr = ap; // store pointer to first point to insert skip bits later on
mas01mc@340 1508 numPoints++;
mas01mc@340 1509 *ap++ = H::p;
mas01mc@340 1510 READ_UNS32T(&(H::p), "Read error H::p");
mas01mc@340 1511 }
mas01mc@340 1512 ENCODE_POINT_SKIP_BITS;
mas01mc@340 1513 H::t2 = H::p; // Copy last found token to t2
mas01mc@340 1514 }
mas01mc@340 1515 // Reallocate the row to its actual size
mas01mc@340 1516 CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
mas01mc@340 1517 // Record the sizes at the head of the row
mas01mc@344 1518 rowPtr->snext.numBuckets = numBuckets;
mas01mc@340 1519 rowPtr->t2 = numPoints;
mas01mc@340 1520 // Place end of row marker
mas01mc@340 1521 *ap++ = LSH_CORE_ARRAY_END_ROW_TOKEN;
mas01mc@340 1522 // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
mas01mc@340 1523 rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
mas01mc@340 1524 // Allocate a new dynamic list head at the end of the array
mas01mc@340 1525 bucket** listPtr = reinterpret_cast<bucket**> (ap);
mas01mc@340 1526 *listPtr = 0;
mas01mc@340 1527 // Return current token
mas01mc@340 1528 return H::t2; // return H::t2 which holds current token [E or T1]
mas01mc@340 1529 }
mas01mc@340 1530
mas01mc@340 1531
mas01mc@340 1532
mas01mc@340 1533 // *p is a pointer to the beginning of a hashtable row array
mas01mc@340 1534 // The array consists of t2 hash keys and one or more point identifiers p for each hash key
mas01mc@340 1535 // Retrieval is performed by generating a hash key query_t2 for query point q
mas01mc@340 1536 // We identify the row that t2 is stored in using a secondary hash t1, this row is the entry
mas01mc@340 1537 // point for retrieve_from_core_hashtable_array
mas01mc@340 1538 #define SKIP_BITS (0xC0000000)
mas01mc@340 1539 void G::retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos){
mas01mc@340 1540 Uns32T skip;
mas01mc@340 1541 Uns32T t2;
mas01mc@340 1542 Uns32T p1;
mas01mc@340 1543 Uns32T p2;
mas01mc@340 1544
mas01mc@340 1545 CR_ASSERT(p);
mas01mc@340 1546
mas01mc@340 1547 do{
mas01mc@340 1548 t2 = *p++;
mas01mc@340 1549 if( t2 > H::t2 )
mas01mc@340 1550 return;
mas01mc@340 1551 p1 = *p++;
mas01mc@340 1552 p2 = *p++;
mas01mc@340 1553 skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
mas01mc@340 1554 if( t2 == H::t2 ){
mas01mc@340 1555 add_point_callback(calling_instance, p1 ^ (p1 & SKIP_BITS), qpos, radius);
mas01mc@340 1556 if(skip--){
mas01mc@340 1557 add_point_callback(calling_instance, p2 ^ (p2 & SKIP_BITS), qpos, radius);
mas01mc@340 1558 while(skip-- )
mas01mc@340 1559 add_point_callback(calling_instance, *p++, qpos, radius);
mas01mc@340 1560 }
mas01mc@340 1561 }
mas01mc@340 1562 else
mas01mc@340 1563 if(*p != LSH_CORE_ARRAY_END_ROW_TOKEN)
mas01mc@340 1564 p = p + skip;
mas01mc@340 1565 }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
mas01mc@340 1566 }
mas01mc@340 1567
mas01mc@292 1568 void G::dump_hashtable_row(bucket* p){
mas01mc@292 1569 while(p && p->t2!=IFLAG){
mas01mc@344 1570 sbucket* sbp = p->snext.ptr;
mas01mc@292 1571 while(sbp){
mas01mc@292 1572 printf("(%0X,%u)", p->t2, sbp->pointID);
mas01mc@292 1573 fflush(stdout);
mas01mc@292 1574 sbp=sbp->snext;
mas01mc@292 1575 }
mas01mc@292 1576 p=p->next;
mas01mc@292 1577 }
mas01mc@292 1578 printf("\n");
mas01mc@292 1579 }
mas01mc@292 1580
mas01mc@292 1581
mas01mc@292 1582 // G::serial_retrieve_point( ... )
mas01mc@292 1583 // retrieves (pointID) from a serialized LSH database
mas01mc@292 1584 //
mas01mc@292 1585 // inputs:
mas01mc@292 1586 // filename - file name of serialized LSH database
mas01mc@292 1587 // vv - query point set
mas01mc@292 1588 //
mas01mc@292 1589 // outputs:
mas01mc@292 1590 // inserts retrieved points into add_point() callback method
mas01mc@292 1591 void G::serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller)
mas01mc@292 1592 {
mas01mc@292 1593 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1594 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1595 serial_get_header(dbheader); // read header
mas01mc@292 1596 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1597
mas01mc@292 1598 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1599 serial_close(dbfid);
mas01mc@292 1600 error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1601 }
mas01mc@292 1602
mas01mc@292 1603 // size of each hash table
mas01mc@292 1604 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1605 calling_instance = caller; // class instance variable used in ...bucket_chain_point()
mas01mc@292 1606 add_point_callback = add_point;
mas01mc@292 1607
mas01mc@292 1608 for(Uns32T j=0; j<L; j++){
mas01mc@292 1609 // memory map a single hash table for random access
mas01mc@292 1610 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1611 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1612 #ifdef __CYGWIN__
mas01mc@324 1613 // No madvise in CYGWIN
mas01mc@324 1614 #else
mas01mc@292 1615 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@292 1616 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1617 #endif
mas01mc@292 1618 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1619 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
mas01mc@293 1620 H::compute_hash_functions(vv[qpos]);
mas01mc@293 1621 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1622 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1623 }
mas01mc@292 1624 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@292 1625 }
mas01mc@292 1626 serial_close(dbfid);
mas01mc@292 1627 }
mas01mc@292 1628
mas01mc@292 1629 void G::serial_retrieve_point(char* filename, vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
mas01mc@292 1630 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1631 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1632 serial_get_header(dbheader); // read header
mas01mc@292 1633 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1634
mas01mc@292 1635 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
mas01mc@336 1636 serial_close(dbfid);
mas01mc@292 1637 error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only");
mas01mc@292 1638 }
mas01mc@292 1639
mas01mc@292 1640 // size of each hash table
mas01mc@292 1641 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1642 calling_instance = caller;
mas01mc@292 1643 add_point_callback = add_point;
mas01mc@293 1644 H::compute_hash_functions(v);
mas01mc@292 1645 for(Uns32T j=0; j<L; j++){
mas01mc@292 1646 // memory map a single hash table for random access
mas01mc@292 1647 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1648 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1649 #ifdef __CYGWIN__
mas01mc@324 1650 // No madvise in CYGWIN
mas01mc@324 1651 #else
mas01mc@292 1652 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
mas01mc@292 1653 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1654 #endif
mas01mc@292 1655 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@293 1656 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
mas01mc@292 1657 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
mas01mc@292 1658 serial_munmap(db, hashTableSize); // drop hashtable mmap
mas01mc@292 1659 }
mas01mc@292 1660 serial_close(dbfid);
mas01mc@292 1661 }
mas01mc@292 1662
mas01mc@292 1663 void G::serial_dump_tables(char* filename){
mas01mc@292 1664 int dbfid = serial_open(filename, 0); // open for read
mas01mc@292 1665 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
mas01mc@292 1666 serial_get_header(dbheader); // read header
mas01mc@292 1667 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
mas01mc@292 1668 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
mas01mc@292 1669 for(Uns32T j=0; j<L; j++){
mas01mc@292 1670 // memory map a single hash table for random access
mas01mc@292 1671 char* db = serial_mmap(dbfid, hashTableSize, 0,
mas01mc@292 1672 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
mas01mc@324 1673 #ifdef __CYGWIN__
mas01mc@324 1674 // No madvise in CYGWIN
mas01mc@324 1675 #else
mas01mc@292 1676 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0)
mas01mc@292 1677 error("could not advise local hashtable memory","","madvise");
mas01mc@324 1678 #endif
mas01mc@292 1679 SerialElementT* pe = (SerialElementT*)db ;
mas01mc@292 1680 printf("*********** TABLE %d ***************\n", j);
mas01mc@292 1681 fflush(stdout);
mas01mc@292 1682 int count=0;
mas01mc@292 1683 do{
mas01mc@292 1684 printf("[%d,%d]", j, count++);
mas01mc@292 1685 fflush(stdout);
mas01mc@292 1686 serial_bucket_dump(pe);
mas01mc@292 1687 pe+=lshHeader->numCols;
mas01mc@292 1688 }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols);
mas01mc@292 1689 }
mas01mc@292 1690
mas01mc@292 1691 }
mas01mc@292 1692
mas01mc@292 1693 void G::serial_bucket_dump(SerialElementT* pe){
mas01mc@292 1694 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1695 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1696 printf("(%0X,%u)",pe->hashValue,pe->pointID);
mas01mc@292 1697 pe++;
mas01mc@292 1698 }
mas01mc@292 1699 printf("\n");
mas01mc@292 1700 fflush(stdout);
mas01mc@292 1701 }
mas01mc@292 1702
mas01mc@292 1703 void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){
mas01mc@292 1704 SerialElementT* pend = pe+lshHeader->numCols;
mas01mc@292 1705 while( !(pe->hashValue==IFLAG || pe==pend ) ){
mas01mc@292 1706 if(pe->hashValue==t2){ // new match
mas01mc@292 1707 add_point_callback(calling_instance, pe->pointID, qpos, radius);
mas01mc@292 1708 }
mas01mc@292 1709 pe++;
mas01mc@292 1710 }
mas01mc@292 1711 }
mas01mc@292 1712
mas01mc@292 1713 void G::bucket_chain_point(bucket* p, Uns32T qpos){
mas01mc@292 1714 if(!p || p->t2==IFLAG)
mas01mc@292 1715 return;
mas01mc@292 1716 if(p->t2==t2){ // match
mas01mc@344 1717 sbucket_chain_point(p->snext.ptr, qpos); // add to reporter
mas01mc@292 1718 }
mas01mc@292 1719 if(p->next){
mas01mc@292 1720 bucket_chain_point(p->next, qpos); // recurse
mas01mc@292 1721 }
mas01mc@292 1722 }
mas01mc@292 1723
mas01mc@292 1724 void G::sbucket_chain_point(sbucket* p, Uns32T qpos){
mas01mc@292 1725 add_point_callback(calling_instance, p->pointID, qpos, radius);
mas01mc@292 1726 if(p->snext){
mas01mc@292 1727 sbucket_chain_point(p->snext, qpos);
mas01mc@292 1728 }
mas01mc@292 1729 }
mas01mc@292 1730
mas01mc@292 1731 void G::get_lock(int fd, bool exclusive) {
mas01mc@292 1732 struct flock lock;
mas01mc@292 1733 int status;
mas01mc@292 1734 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
mas01mc@292 1735 lock.l_whence = SEEK_SET;
mas01mc@292 1736 lock.l_start = 0;
mas01mc@292 1737 lock.l_len = 0; /* "the whole file" */
mas01mc@292 1738 retry:
mas01mc@292 1739 do {
mas01mc@292 1740 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1741 } while (status != 0 && errno == EINTR);
mas01mc@292 1742 if (status) {
mas01mc@292 1743 if (errno == EAGAIN) {
mas01mc@292 1744 sleep(1);
mas01mc@292 1745 goto retry;
mas01mc@292 1746 } else {
mas01mc@292 1747 error("fcntl lock error", "", "fcntl");
mas01mc@292 1748 }
mas01mc@292 1749 }
mas01mc@292 1750 }
mas01mc@292 1751
mas01mc@292 1752 void G::release_lock(int fd) {
mas01mc@292 1753 struct flock lock;
mas01mc@292 1754 int status;
mas01mc@292 1755
mas01mc@292 1756 lock.l_type = F_UNLCK;
mas01mc@292 1757 lock.l_whence = SEEK_SET;
mas01mc@292 1758 lock.l_start = 0;
mas01mc@292 1759 lock.l_len = 0;
mas01mc@292 1760
mas01mc@292 1761 status = fcntl(fd, F_SETLKW, &lock);
mas01mc@292 1762
mas01mc@292 1763 if (status)
mas01mc@292 1764 error("fcntl unlock error", "", "fcntl");
mas01mc@292 1765 }