Mercurial > hg > audiodb
comparison lshlib.cpp @ 306:921ba500a024
changed FORMAT2 index serialization so that token bits don't scribble over point index for >=32768 tracks
author | mas01mc |
---|---|
date | Tue, 05 Aug 2008 22:40:38 +0000 |
parents | b10ad7b6427f |
children | 896679d8cc39 |
comparison
equal
deleted
inserted
replaced
305:8cec6eb40526 | 306:921ba500a024 |
---|---|
546 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) ******* | 546 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) ******* |
547 // | 547 // |
548 // State machine controlled by regular expression. | 548 // State machine controlled by regular expression. |
549 // legend: | 549 // legend: |
550 // | 550 // |
551 // O2_SERIAL_FLAGS_T1_BIT = 0x80000000U | 551 // O2_SERIAL_TOKEN_T1 = 0xFFFFFFFCU |
552 // O2_SERIAL_FLAGS_T2_BIT = 0x40000000U | 552 // O2_SERIAL_TOKEN_T2 = 0xFFFFFFFDU |
553 // O2_SERIAL_FLAGS_END_BIT = 0x20000000U | 553 // O2_SERIAL_TOKEN_ENDTABLE = 0xFFFFFFFEU |
554 // | 554 // |
555 // T1(t1) - T1 hash token containing t1 hash key with O2_SERIAL_FLAGS_T1_BIT set (t1 range 0..2^29-1) | 555 // T1 - T1 hash token |
556 // T2 - T2 hash token with O2_SERIAL_FLAGS_T2_BIT set | 556 // t1 - t1 hash key (t1 range 0..2^29-1) |
557 // T2 - T2 token | |
557 // t2 - t2 hash key (range 1..2^32-6) | 558 // t2 - t2 hash key (range 1..2^32-6) |
558 // p - point identifier (range 0..2^32-1) | 559 // p - point identifier (range 0..2^32-1) |
559 // E - end hash table token with O2_SERIAL_FLAGS_END_BIT set | 560 // E - end hash table token |
560 // {...} required arguments | 561 // {...} required arguments |
561 // [...] optional arguments | 562 // [...] optional arguments |
562 // * - match zero or more occurences | 563 // * - match zero or more occurences |
563 // + - match one or more occurences | 564 // + - match one or more occurences |
564 // {...}^L - repeat argument L times | 565 // {...}^L - repeat argument L times |
565 // | 566 // |
566 // FORMAT2 Regular expression: | 567 // FORMAT2 Regular expression: |
567 // { [T1(t1) T2 t2 p+ [T2 t2 p+]* ]* E. }^L | 568 // { [T1 t1 [T2 t2 p+]+ ]* E }^L |
568 // | 569 // |
569 | 570 |
570 // Serial header constructors | 571 // Serial header constructors |
571 SerialHeader::SerialHeader(){;} | 572 SerialHeader::SerialHeader(){;} |
572 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc): | 573 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT, Uns32T pc): |
871 Uns32T x,y; | 872 Uns32T x,y; |
872 | 873 |
873 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) ) | 874 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) ) |
874 error("Cannot merge core and serial LSH, data structure dimensions mismatch."); | 875 error("Cannot merge core and serial LSH, data structure dimensions mismatch."); |
875 | 876 |
876 // We must pereform FORMAT1 merges in core | 877 // We must pereform FORMAT2 merges in core |
877 if(merge) | 878 if(merge) |
878 unserialize_lsh_hashtables_format2(fid); | 879 unserialize_lsh_hashtables_format2(fid); |
879 | 880 |
880 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1; | 881 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1; |
881 lseek(fid, get_serial_hashtable_offset(), SEEK_SET); | 882 lseek(fid, get_serial_hashtable_offset(), SEEK_SET); |
889 meanColCount=0; | 890 meanColCount=0; |
890 colCountN=0; | 891 colCountN=0; |
891 for( y = 0 ; y < H::N ; y++ ){ | 892 for( y = 0 ; y < H::N ; y++ ){ |
892 colCount=0; | 893 colCount=0; |
893 if(bucket* bPtr = h[x][y]){ | 894 if(bucket* bPtr = h[x][y]){ |
894 t1 = y | O2_SERIAL_FLAGS_T1_BIT; | 895 // Check for empty row (even though row was allocated) |
896 #ifdef LSH_BLOCK_FULL_ROWS | |
897 if(bPtr->next->t2==IFLAG){ | |
898 close(fid); | |
899 error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()"); | |
900 } | |
901 #else | |
902 if(bPtr->t2==IFLAG){ | |
903 close(fid); | |
904 error("b->t2==IFLAG","serialize_lsh_hashtables_format2()"); | |
905 } | |
906 #endif | |
907 t1 = O2_SERIAL_TOKEN_T1; | |
908 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ | |
909 close(fid); | |
910 error("write error in serial_write_hashtable_format2() [T1]"); | |
911 } | |
912 t1 = y; | |
895 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ | 913 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ |
896 close(fid); | 914 close(fid); |
897 error("write error in serial_write_hashtable_format2() [t1]"); | 915 error("write error in serial_write_hashtable_format2() [t1]"); |
898 } | 916 } |
899 #ifdef LSH_BLOCK_FULL_ROWS | 917 #ifdef LSH_BLOCK_FULL_ROWS |
910 meanColCount+=colCount; | 928 meanColCount+=colCount; |
911 colCountN++; | 929 colCountN++; |
912 } | 930 } |
913 } | 931 } |
914 // Write END of table marker | 932 // Write END of table marker |
915 t1 = O2_SERIAL_FLAGS_END_BIT; | 933 t1 = O2_SERIAL_TOKEN_ENDTABLE; |
916 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ | 934 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ |
917 close(fid); | 935 close(fid); |
918 error("write error in serial_write_hashtable_format2() [end]"); | 936 error("write error in serial_write_hashtable_format2() [end]"); |
919 } | 937 } |
920 | 938 |
928 return 1; | 946 return 1; |
929 } | 947 } |
930 | 948 |
931 void G::serial_write_hashtable_row_format2(int fid, bucket* b, Uns32T& colCount){ | 949 void G::serial_write_hashtable_row_format2(int fid, bucket* b, Uns32T& colCount){ |
932 while(b && b->t2!=IFLAG){ | 950 while(b && b->t2!=IFLAG){ |
933 t2 = O2_SERIAL_FLAGS_T2_BIT; | 951 if(!b->snext){ |
952 close(fid); | |
953 error("Empty collision chain in serial_write_hashtable_row_format2()"); | |
954 } | |
955 t2 = O2_SERIAL_TOKEN_T2; | |
934 if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){ | 956 if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){ |
935 close(fid); | 957 close(fid); |
936 error("write error in serial_write_hashtable_row_format2()"); | 958 error("write error in serial_write_hashtable_row_format2()"); |
937 } | 959 } |
938 t2 = b->t2; | 960 t2 = b->t2; |
1178 while( x < H::L){ | 1200 while( x < H::L){ |
1179 if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){ | 1201 if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){ |
1180 close(fid); | 1202 close(fid); |
1181 error("Read error","unserialize_lsh_hashtables_format2()"); | 1203 error("Read error","unserialize_lsh_hashtables_format2()"); |
1182 } | 1204 } |
1183 if(H::t1&O2_SERIAL_FLAGS_END_BIT) | 1205 if(H::t1==O2_SERIAL_TOKEN_ENDTABLE) |
1184 x++; // End of table | 1206 x++; // End of table |
1185 else | 1207 else |
1186 while(y < H::N){ | 1208 while(y < H::N){ |
1187 // Read a row and move file pointer to beginning of next row or table | 1209 // Read a row and move file pointer to beginning of next row or _bittable |
1188 if(!(H::t1&O2_SERIAL_FLAGS_T1_BIT)){ | 1210 if(!(H::t1==O2_SERIAL_TOKEN_T1)){ |
1189 close(fid); | 1211 close(fid); |
1190 error("State matchine error t1","unserialize_lsh_hashtables_format2()"); | 1212 error("State matchine error T1","unserialize_lsh_hashtables_format2()"); |
1191 } | 1213 } |
1192 y = H::t1 ^ O2_SERIAL_FLAGS_T1_BIT; | 1214 if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){ |
1215 close(fid); | |
1216 error("Read error: t1","unserialize_lsh_hashtables_format2()"); | |
1217 } | |
1218 y = H::t1; | |
1193 if(y>=H::N){ | 1219 if(y>=H::N){ |
1194 close(fid); | 1220 close(fid); |
1195 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()"); | 1221 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()"); |
1196 } | 1222 } |
1197 Uns32T token = unserialize_hashtable_row_format2(fid, h[x]+y); | 1223 Uns32T token = unserialize_hashtable_row_format2(fid, h[x]+y); |
1199 #ifdef __LSH_DUMP_CORE_TABLES__ | 1225 #ifdef __LSH_DUMP_CORE_TABLES__ |
1200 printf("C[%d,%d]", x, y); | 1226 printf("C[%d,%d]", x, y); |
1201 dump_hashtable_row(h[x][y]); | 1227 dump_hashtable_row(h[x][y]); |
1202 #endif | 1228 #endif |
1203 // Check that token is valid | 1229 // Check that token is valid |
1204 if( !(token&O2_SERIAL_FLAGS_T1_BIT || token&O2_SERIAL_FLAGS_END_BIT) ){ | 1230 if( !(token==O2_SERIAL_TOKEN_T1 || token==O2_SERIAL_TOKEN_ENDTABLE) ){ |
1205 close(fid); | 1231 close(fid); |
1206 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); | 1232 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); |
1207 } | 1233 } |
1208 // Check for end of table flag | 1234 // Check for end of table flag |
1209 if(token&O2_SERIAL_FLAGS_END_BIT){ | 1235 if(token==O2_SERIAL_TOKEN_ENDTABLE){ |
1210 x++; | 1236 x++; |
1211 break; | 1237 break; |
1212 } | 1238 } |
1213 // Check for new row flag | 1239 // Check for new row flag |
1214 if(token&O2_SERIAL_FLAGS_T1_BIT) | 1240 if(token==O2_SERIAL_TOKEN_T1) |
1215 H::t1 = token; | 1241 H::t1 = token; |
1216 } | 1242 } |
1217 } | 1243 } |
1218 } | 1244 } |
1219 | 1245 |
1220 Uns32T G::unserialize_hashtable_row_format2(int fid, bucket** b){ | 1246 Uns32T G::unserialize_hashtable_row_format2(int fid, bucket** b){ |
1221 bool pointFound = false; | 1247 bool pointFound = false; |
1222 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ | 1248 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ |
1223 close(fid); | 1249 close(fid); |
1224 error("Read error T2 token","unserialize_hashtable_row_format2"); | 1250 error("Read error T2 token","unserialize_hashtable_row_format2"); |
1225 } | 1251 } |
1226 if( !(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2==O2_SERIAL_FLAGS_T2_BIT)){ | 1252 if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){ |
1227 close(fid); | 1253 close(fid); |
1228 error("State machine error: expected E or T2"); | 1254 error("State machine error: expected E or T2"); |
1229 } | 1255 } |
1230 while(!(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2&O2_SERIAL_FLAGS_T1_BIT)){ | 1256 while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){ |
1231 pointFound=false; | 1257 pointFound=false; |
1232 // Check for T2 token | 1258 // Check for T2 token |
1233 if(H::t2!=O2_SERIAL_FLAGS_T2_BIT) | 1259 if(H::t2!=O2_SERIAL_TOKEN_T2){ |
1260 close(fid); | |
1234 error("State machine error T2 token", "unserialize_hashtable_row_format2()"); | 1261 error("State machine error T2 token", "unserialize_hashtable_row_format2()"); |
1262 } | |
1235 // Read t2 value | 1263 // Read t2 value |
1236 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ | 1264 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ |
1237 close(fid); | 1265 close(fid); |
1238 error("Read error t2","unserialize_hashtable_row_format2"); | 1266 error("Read error t2","unserialize_hashtable_row_format2"); |
1239 } | 1267 } |
1240 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ | 1268 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ |
1241 close(fid); | 1269 close(fid); |
1242 error("Read error H::p","unserialize_hashtable_row_format2"); | 1270 error("Read error H::p","unserialize_hashtable_row_format2"); |
1243 } | 1271 } |
1244 while(!(H::p==O2_SERIAL_FLAGS_END_BIT || H::p&O2_SERIAL_FLAGS_T1_BIT || H::p==O2_SERIAL_FLAGS_T2_BIT )){ | 1272 while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){ |
1245 pointFound=true; | 1273 pointFound=true; |
1246 bucket_insert_point(b); | 1274 bucket_insert_point(b); |
1247 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ | 1275 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ |
1248 close(fid); | 1276 close(fid); |
1249 error("Read error H::p","unserialize_hashtable_row_format2"); | 1277 error("Read error H::p","unserialize_hashtable_row_format2"); |
1250 } | 1278 } |
1251 } | 1279 } |
1252 H::t2 = H::p; // Copy last found token to t2 | |
1253 if(!pointFound) | 1280 if(!pointFound) |
1254 error("State machine error: point", "unserialize_hashtable_row_format2()"); | 1281 error("State machine error: point", "unserialize_hashtable_row_format2()"); |
1282 H::t2 = H::p; // Copy last found token to t2 | |
1255 } | 1283 } |
1256 return H::t2; // holds current token | 1284 return H::t2; // holds current token |
1257 } | 1285 } |
1258 | 1286 |
1259 void G::dump_hashtable_row(bucket* p){ | 1287 void G::dump_hashtable_row(bucket* p){ |