# HG changeset patch # User mas01mc # Date 1217976038 0 # Node ID 921ba500a0240c11b5a0a4ba8952e4fe4ac40c60 # Parent 8cec6eb40526c942c2c970d1695f79c388dc8925 changed FORMAT2 index serialization so that token bits don't scribble over point index for >=32768 tracks diff -r 8cec6eb40526 -r 921ba500a024 lshlib.cpp --- a/lshlib.cpp Tue Aug 05 15:34:10 2008 +0000 +++ b/lshlib.cpp Tue Aug 05 22:40:38 2008 +0000 @@ -548,15 +548,16 @@ // State machine controlled by regular expression. // legend: // -// O2_SERIAL_FLAGS_T1_BIT = 0x80000000U -// O2_SERIAL_FLAGS_T2_BIT = 0x40000000U -// O2_SERIAL_FLAGS_END_BIT = 0x20000000U +// O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU +// O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU +// O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU // -// T1(t1) - T1 hash token containing t1 hash key with O2_SERIAL_FLAGS_T1_BIT set (t1 range 0..2^29-1) -// T2 - T2 hash token with O2_SERIAL_FLAGS_T2_BIT set +// T1 - T1 hash token +// t1 - t1 hash key (t1 range 0..2^29-1) +// T2 - T2 token // t2 - t2 hash key (range 1..2^32-6) // p - point identifier (range 0..2^32-1) -// E - end hash table token with O2_SERIAL_FLAGS_END_BIT set +// E - end hash table token // {...} required arguments // [...] optional arguments // * - match zero or more occurences @@ -564,7 +565,7 @@ // {...}^L - repeat argument L times // // FORMAT2 Regular expression: -// { [T1(t1) T2 t2 p+ [T2 t2 p+]* ]* E. }^L +// { [T1 t1 [T2 t2 p+]+ ]* E }^L // // Serial header constructors @@ -873,7 +874,7 @@ if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) ) error("Cannot merge core and serial LSH, data structure dimensions mismatch."); - // We must pereform FORMAT1 merges in core + // We must pereform FORMAT2 merges in core if(merge) unserialize_lsh_hashtables_format2(fid); @@ -891,7 +892,24 @@ for( y = 0 ; y < H::N ; y++ ){ colCount=0; if(bucket* bPtr = h[x][y]){ - t1 = y | O2_SERIAL_FLAGS_T1_BIT; + // Check for empty row (even though row was allocated) +#ifdef LSH_BLOCK_FULL_ROWS + if(bPtr->next->t2==IFLAG){ + close(fid); + error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()"); + } +#else + if(bPtr->t2==IFLAG){ + close(fid); + error("b->t2==IFLAG","serialize_lsh_hashtables_format2()"); + } +#endif + t1 = O2_SERIAL_TOKEN_T1; + if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ + close(fid); + error("write error in serial_write_hashtable_format2() [T1]"); + } + t1 = y; if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ close(fid); error("write error in serial_write_hashtable_format2() [t1]"); @@ -912,7 +930,7 @@ } } // Write END of table marker - t1 = O2_SERIAL_FLAGS_END_BIT; + t1 = O2_SERIAL_TOKEN_ENDTABLE; if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ close(fid); error("write error in serial_write_hashtable_format2() [end]"); @@ -930,7 +948,11 @@ void G::serial_write_hashtable_row_format2(int fid, bucket* b, Uns32T& colCount){ while(b && b->t2!=IFLAG){ - t2 = O2_SERIAL_FLAGS_T2_BIT; + if(!b->snext){ + close(fid); + error("Empty collision chain in serial_write_hashtable_row_format2()"); + } + t2 = O2_SERIAL_TOKEN_T2; if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){ close(fid); error("write error in serial_write_hashtable_row_format2()"); @@ -1180,16 +1202,20 @@ close(fid); error("Read error","unserialize_lsh_hashtables_format2()"); } - if(H::t1&O2_SERIAL_FLAGS_END_BIT) + if(H::t1==O2_SERIAL_TOKEN_ENDTABLE) x++; // End of table else while(y < H::N){ - // Read a row and move file pointer to beginning of next row or table - if(!(H::t1&O2_SERIAL_FLAGS_T1_BIT)){ + // Read a row and move file pointer to beginning of next row or _bittable + if(!(H::t1==O2_SERIAL_TOKEN_T1)){ close(fid); - error("State matchine error t1","unserialize_lsh_hashtables_format2()"); + error("State matchine error T1","unserialize_lsh_hashtables_format2()"); } - y = H::t1 ^ O2_SERIAL_FLAGS_T1_BIT; + if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){ + close(fid); + error("Read error: t1","unserialize_lsh_hashtables_format2()"); + } + y = H::t1; if(y>=H::N){ close(fid); error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()"); @@ -1201,21 +1227,21 @@ dump_hashtable_row(h[x][y]); #endif // Check that token is valid - if( !(token&O2_SERIAL_FLAGS_T1_BIT || token&O2_SERIAL_FLAGS_END_BIT) ){ + if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ close(fid); error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); } // Check for end of table flag - if(token&O2_SERIAL_FLAGS_END_BIT){ + if(token==O2_SERIAL_TOKEN_ENDTABLE){ x++; break; } // Check for new row flag - if(token&O2_SERIAL_FLAGS_T1_BIT) + if(token==O2_SERIAL_TOKEN_T1) H::t1 = token; } } -} +} Uns32T G::unserialize_hashtable_row_format2(int fid, bucket** b){ bool pointFound = false; @@ -1223,15 +1249,17 @@ close(fid); error("Read error T2 token","unserialize_hashtable_row_format2"); } - if( !(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2==O2_SERIAL_FLAGS_T2_BIT)){ + if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){ close(fid); error("State machine error: expected E or T2"); } - while(!(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2&O2_SERIAL_FLAGS_T1_BIT)){ + while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){ pointFound=false; // Check for T2 token - if(H::t2!=O2_SERIAL_FLAGS_T2_BIT) + if(H::t2!=O2_SERIAL_TOKEN_T2){ + close(fid); error("State machine error T2 token", "unserialize_hashtable_row_format2()"); + } // Read t2 value if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ close(fid); @@ -1241,7 +1269,7 @@ close(fid); error("Read error H::p","unserialize_hashtable_row_format2"); } - while(!(H::p==O2_SERIAL_FLAGS_END_BIT || H::p&O2_SERIAL_FLAGS_T1_BIT || H::p==O2_SERIAL_FLAGS_T2_BIT )){ + while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ pointFound=true; bucket_insert_point(b); if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ @@ -1249,9 +1277,9 @@ error("Read error H::p","unserialize_hashtable_row_format2"); } } - H::t2 = H::p; // Copy last found token to t2 if(!pointFound) error("State machine error: point", "unserialize_hashtable_row_format2()"); + H::t2 = H::p; // Copy last found token to t2 } return H::t2; // holds current token } diff -r 8cec6eb40526 -r 921ba500a024 lshlib.h --- a/lshlib.h Tue Aug 05 15:34:10 2008 +0000 +++ b/lshlib.h Tue Aug 05 22:40:38 2008 +0000 @@ -70,9 +70,9 @@ #define O2_SERIAL_FILEFORMAT2 (0x2U) // Optimize for in-core search // Flags for serialization fileformat2: use high 3 bits of Uns32T -#define O2_SERIAL_FLAGS_T1_BIT (0x80000000U) -#define O2_SERIAL_FLAGS_T2_BIT (0x40000000U) -#define O2_SERIAL_FLAGS_END_BIT (0x20000000U) +#define O2_SERIAL_TOKEN_T1 (0xFFFFFFFC) +#define O2_SERIAL_TOKEN_T2 (0xFFFFFFFDU) +#define O2_SERIAL_TOKEN_ENDTABLE (0xFFFFFFFEU) unsigned align_up(unsigned x, unsigned w);