Mercurial > hg > audiodb

--- a/Makefile	Fri Sep 05 20:01:58 2008 +0000
+++ b/Makefile	Wed Sep 10 18:55:16 2008 +0000
@@ -7,6 +7,7 @@
 GSOAP_INCLUDE=

 override CFLAGS+=-O3 -g
+#override CFLAGS+=-ggdb -gstabs+ -g3

 ifeq ($(shell uname),Linux)
 override CFLAGS+=-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
--- a/index.cpp	Fri Sep 05 20:01:58 2008 +0000
+++ b/index.cpp	Wed Sep 10 18:55:16 2008 +0000
@@ -190,14 +190,19 @@
   if((lshfid = open(newIndexName,O_RDONLY))>0){
     printf("INDEX: merging with existing LSH index\n");
     fflush(stdout);
+    char* mergeIndexName = newIndexName;

     // Get the lsh header info and find how many tracks are inserted already
-    lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here
+    lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here
     assert(lsh);
     Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1;
     delete lsh;
     lsh = 0;

+    // Insert up to lsh_param_b tracks
+    if(  !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){
+      index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
+    }
     // This allows for updating index after more tracks are inserted into audioDB
     for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){

@@ -206,17 +211,17 @@
 	endTrack = dbH->numFiles;
       printf("Indexing track range: %d - %d\n", startTrack, endTrack);
       fflush(stdout);
-      lsh = new LSH(newIndexName, lsh_in_core); // Initialize core memory for LSH tables
+      lsh = new LSH(mergeIndexName, false); // Initialize empty LSH tables
       assert(lsh);

       // Insert up to lsh_param_b database tracks
       index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);

-      // Serialize to file
-      lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
+      // Serialize to file (merging is performed here)
+      lsh->serialize(mergeIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
       delete lsh;
       lsh = 0;
-    }
+    }

     close(lshfid);
     printf("INDEX: done constructing LSH index.\n");
--- a/lshlib.cpp	Fri Sep 05 20:01:58 2008 +0000
+++ b/lshlib.cpp	Wed Sep 10 18:55:16 2008 +0000
@@ -1,8 +1,19 @@
 #include "lshlib.h"

-//#define __LSH_DUMP_CORE_TABLES__
+//#define LSH_DUMP_CORE_TABLES
 //#define USE_U_FUNCTIONS
-#define LSH_BLOCK_FULL_ROWS
+
+// Use backward-compatible CORE ARRAY lsh index
+#define LSH_CORE_ARRAY  // Set to use arrays for hashtables rather than linked-lists
+#define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads
+
+// Critical path logic
+#if defined LSH_CORE_ARRAY && !defined LSH_LIST_HEAD_COUNTERS
+#define LSH_LIST_HEAD_COUNTERS
+#endif
+
+#define LSH_CORE_ARRAY_BIT (0x80000000) //  LSH_CORE_ARRAY test bit for list head
+

 void err(char*s){cout << s << endl;exit(2);}

@@ -193,6 +204,7 @@
 // Destruct hash tables
 H::~H(){
   Uns32T i,j,kk;
+  bucket** pp;
 #ifdef USE_U_FUNCTIONS
   for( j = 0 ; j < H::m ; j++ ){
     for( kk = 0 ; kk < H::k/2 ; kk++ )
@@ -221,8 +233,23 @@
   for( j=0 ; j < H::L ; j++ ){
       delete[] H::r1[ j ];
       delete[] H::r2[ j ];
-      for(i = 0; i< H::N ; i++)
+      for(i = 0; i< H::N ; i++){
+	pp = 0;
+#ifdef LSH_CORE_ARRAY
+	  if(H::h[ j ][ i ])
+	    if(H::h[ j ][ i ]->t2 & LSH_CORE_ARRAY_BIT){
+	      pp = get_pointer_to_bucket_linked_list(H::h[ j ][ i ]);
+	      if(*pp){
+		(*pp)->snext=0; // Head of list uses snext as a non-pointer value
+		delete *pp;   // Now the destructor can do its work properly
+	      }
+	      free(H::h[ j ][ i ]->next);
+	      H::h[ j ][ i ]->next = 0; // Zero next pointer
+	      H::h[ j ][ i ]->snext = 0; // Zero head-of-list pointer as above
+	    }
+#endif
 	delete H::h[ j ][ i ];
+      }
       delete[] H::h[ j ];
   }
   delete[] H::r1;
@@ -333,28 +360,35 @@

 Uns32T H::bucket_insert_point(bucket **pp){
   collisionCount = 0;
+#ifdef LSH_LIST_HEAD_COUNTERS
   if(!*pp){
     *pp = new bucket();
-#ifdef LSH_BLOCK_FULL_ROWS
     (*pp)->t2 = 0; // Use t2 as a collision counter for the row
     (*pp)->next = new bucket();
-#endif
   }
-#ifdef LSH_BLOCK_FULL_ROWS
-  collisionCount = (*pp)->t2;
-  if(collisionCount < H::C){ // Block if row is full
-    (*pp)->t2++; // Increment collision counter
-    pointCount++;
-    collisionCount++;
-    __bucket_insert_point((*pp)->next); // First bucket holds collision count
+  // The list head holds point collision count
+  if( (*pp)->t2 & LSH_CORE_ARRAY_BIT )
+    bucket_insert_point(get_pointer_to_bucket_linked_list(*pp)); // recurse
+  else{
+    collisionCount = (*pp)->t2;
+    if(collisionCount < H::C){ // Block if row is full
+      (*pp)->t2++; // Increment collision counter (numPoints in row)
+      pointCount++;
+      collisionCount++;
+      // Locate the bucket linked list
+      __bucket_insert_point( (*pp)->next );
+    }
   }
-#else
+#else // NOT USING LSH_LIST_HEAD_COUNTERS
+  if(!*pp)
+    *pp = new bucket();
   pointCount++;
   __bucket_insert_point(*pp); // No collision count storage
 #endif
   return collisionCount;
 }

+// insert points into hashtable row collision chain
 void H::__bucket_insert_point(bucket* p){
   if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
     p->t2 = H::t2;
@@ -370,30 +404,37 @@
   }

   if(p->next){
-    __bucket_insert_point(p->next);
+    // Construct list in t2 order
+    if(H::t2 < p->next->t2){
+      bucket* tmp = new bucket();
+      tmp->next = p->next;
+      p->next = tmp;
+      __bucket_insert_point(tmp);
+    }
+    else
+      __bucket_insert_point(p->next);
   }
-
-  else{
+  else {
     p->next = new bucket();
     __bucket_insert_point(p->next);
   }
-
 }

+// insert points into point-locale collision chain
 void H::__sbucket_insert_point(sbucket* p){
   if(p->pointID==IFLAG){
     p->pointID = H::p;
     return;
   }
-
+
   // Search for pointID
-  if(p->snext){
+  if(p->snext){
     __sbucket_insert_point(p->snext);
   }
   else{
-  // Make new point collision bucket at end of list
-  p->snext = new sbucket();
-  __sbucket_insert_point(p->snext);
+    // Make new point collision bucket at end of list
+    p->snext = new sbucket();
+    __sbucket_insert_point(p->snext);
   }
 }

@@ -401,6 +442,14 @@
   return  *(h+j);
 }

+// Find the linked-list pointer at the end of the CORE_ARRAY
+bucket** H::get_pointer_to_bucket_linked_list(bucket* rowPtr){
+   Uns32T numBuckets = (Uns32T) rowPtr->snext; // Cast pointer to unsigned int
+   Uns32T numPoints = rowPtr->t2 & 0x7FFFFFFF; // Value is stored in low 31 bits of t2 field
+   bucket** listPtr = reinterpret_cast<bucket**> (reinterpret_cast<unsigned int*>(rowPtr->next)+numPoints+numBuckets+1);
+   return listPtr;
+}
+
 // Interface to Locality Sensitive Hashing G
 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
   H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
@@ -479,8 +528,11 @@
   for(Uns32T j = 0 ; j < H::L ; j++ ){
     H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
     if( bucket* bPtr = *(get_bucket(j) + get_t1()) )
-#ifdef LSH_BLOCK_FULL_ROWS
-      bucket_chain_point( bPtr->next, qpos);
+#ifdef LSH_LIST_HEAD_COUNTERS
+      if(bPtr->t2&LSH_CORE_ARRAY_BIT)
+	retrieve_from_core_hashtable_array((Uns32T*)(bPtr->next), qpos);
+      else
+	bucket_chain_point( bPtr->next, qpos);
 #else
     bucket_chain_point( bPtr , qpos);
 #endif
@@ -796,7 +848,7 @@
       colCount=0;
       if(bucket* bPtr = h[x][y])
 	if(merge)
-#ifdef LSH_BLOCK_FULL_ROWS
+#ifdef LSH_LIST_HEAD_COUNTERS
 	  serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
 	else
 	  serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
@@ -887,9 +939,9 @@
   if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
     error("Cannot merge core and serial LSH, data structure dimensions mismatch.");

-  // We must pereform FORMAT2 merges in core
+  // We must pereform FORMAT2 merges in core FORMAT1 (dynamic list structure)
   if(merge)
-    unserialize_lsh_hashtables_format2(dbFile);
+    unserialize_lsh_hashtables_format2(dbFile, merge);

   Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
   if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
@@ -909,7 +961,7 @@
       colCount=0;
       if(bucket* bPtr = h[x][y]){
 	// Check for empty row (even though row was allocated)
-#ifdef LSH_BLOCK_FULL_ROWS
+#ifdef LSH_LIST_HEAD_COUNTERS
 	if(bPtr->next->t2==IFLAG){
 	  fclose(dbFile);
 	  error("b->next->t2==IFLAG","serialize_lsh_hashtables_format2()");
@@ -921,19 +973,17 @@
 	}
 #endif
 	t1 = O2_SERIAL_TOKEN_T1;
-	if( fwrite(&t1, sizeof(Uns32T), 1, dbFile) != 1 ){
-	  fclose(dbFile);
-	  error("write error in serial_write_hashtable_format2() [T1]");
-	}
+	WRITE_UNS32(&t1, "[T1]");
 	t1 = y;
-	if( fwrite(&t1, sizeof(Uns32T), 1, dbFile) != 1 ){
-	  fclose(dbFile);
-	  error("write error in serial_write_hashtable_format2() [t1]");
-	}
-#ifdef LSH_BLOCK_FULL_ROWS
+	WRITE_UNS32(&t1, "[t1]");
+#ifdef LSH_CORE_ARRAY
+	t1 = count_buckets_and_points_hashtable_row(bPtr);
+	WRITE_UNS32(&t1,"[count]"); // write numElements
+#endif
+#ifdef LSH_LIST_HEAD_COUNTERS
 	serial_write_hashtable_row_format2(dbFile, bPtr->next, colCount); // skip collision counter bucket
 #else
-      serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
+	serial_write_hashtable_row_format2(dbFile, bPtr, colCount);
 #endif
       }
       if(colCount){
@@ -947,21 +997,52 @@
     }
     // Write END of table marker
     t1 = O2_SERIAL_TOKEN_ENDTABLE;
-    if( fwrite(&t1, sizeof(Uns32T), 1, dbFile ) != 1 ){
-      fclose(dbFile);
-      error("write error in serial_write_hashtable_format2() [end]");
-    }
-
+    WRITE_UNS32(&t1,"[end]");
     if(colCountN)
       std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
 		<< ", min = " << minColCount << ", max = " << maxColCount
 		<< endl;
-  }
-
+  }
   // We're done writing
   return 1;
 }

+Uns32T G::count_buckets_and_points_hashtable_row(bucket* bPtr){
+  Uns32T total_count = 0;
+  bucket* p = 0;
+
+  // count points
+#ifdef LSH_LIST_HEAD_COUNTERS
+  total_count = bPtr->t2; // points already counted
+  p = bPtr->next;
+#else
+  total_count = count_points_hashtable_row(bPtr);
+  p = bPtr;
+#endif
+
+  // count buckets
+  do{
+    total_count++;
+  }while((p=p->next));
+
+  return total_count;
+}
+
+Uns32T G::count_points_hashtable_row(bucket* bPtr){
+  Uns32T point_count = 0;
+  bucket* p = bPtr;
+  sbucket* s = 0;
+  while(p){
+    s = p->snext;
+    while(s){
+      point_count++;
+      s=s->snext;
+    }
+    p=p->next;
+  }
+  return point_count;
+}
+
 void G::serial_write_hashtable_row_format2(FILE* dbFile, bucket* b, Uns32T& colCount){
   while(b && b->t2!=IFLAG){
     if(!b->snext){
@@ -1185,7 +1266,7 @@
       // Move disk pointer to beginning of row
       pe=pt+y*lshHeader->numCols;
       unserialize_hashtable_row_format1(pe, h[x]+y);
-#ifdef __LSH_DUMP_CORE_TABLES__
+#ifdef LSH_DUMP_CORE_TABLES
       printf("S[%d,%d]", x, y);
       serial_bucket_dump(pe);
       printf("C[%d,%d]", x, y);
@@ -1207,7 +1288,7 @@
   }
 }

-void G::unserialize_lsh_hashtables_format2(FILE* dbFile){
+void G::unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge){
   Uns32T x=0,y=0;

   // Seek to hashtable base offset
@@ -1240,9 +1321,31 @@
 	  fclose(dbFile);
 	  error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
 	}
-	Uns32T token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
+	Uns32T token = 0;
+#ifdef LSH_CORE_ARRAY
+	Uns32T numElements;
+	if(fread(&numElements, sizeof(Uns32T), 1, dbFile) != 1){
+	  fclose(dbFile);
+	  error("Read error: numElements","unserialize_lsh_hashtables_format2()");
+	}

-#ifdef __LSH_DUMP_CORE_TABLES__
+	// BACKWARD COMPATIBILITY: check to see if T2 or END token was read
+	if(numElements==O2_SERIAL_TOKEN_T2 || numElements==O2_SERIAL_TOKEN_ENDTABLE ){
+	  forMerge=true; // Force use of dynamic linked list core format
+	  token = numElements;
+	}
+
+	if(forMerge)
+	  // Use linked list CORE format
+	  token = unserialize_hashtable_row_format2(dbFile, h[x]+y, token);
+	else
+	  // Use ARRAY CORE format with numElements counter
+	  token = unserialize_hashtable_row_to_array(dbFile, h[x]+y, numElements);
+#else
+	token = unserialize_hashtable_row_format2(dbFile, h[x]+y);
+#endif
+
+#ifdef LSH_DUMP_CORE_TABLES
 	printf("C[%d,%d]", x, y);
 	dump_hashtable_row(h[x][y]);
 #endif
@@ -1263,16 +1366,21 @@
   }
 }

-Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b){
+Uns32T G::unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token){
   bool pointFound = false;
-  if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
-    fclose(dbFile);
-    error("Read error T2 token","unserialize_hashtable_row_format2");
-  }
+
+  if(token)
+    H::t2 = token;
+  else if(fread(&(H::t2), sizeof(Uns32T), 1, dbFile) != 1){
+      fclose(dbFile);
+      error("Read error T2 token","unserialize_hashtable_row_format2");
+    }
+
   if( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2)){
     fclose(dbFile);
     error("State machine error: expected E or T2");
   }
+
   while(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1)){
     pointFound=false;
     // Check for T2 token
@@ -1304,6 +1412,154 @@
   return H::t2; // holds current token
 }

+// Unserialize format2 hashtable row to a CORE_ARRAY pointed to
+// by the hashtable row pointer: rowPtr
+//
+// numElements is the total number of t2 buckets plus points
+// memory required is numElements+1+sizeof(hashtable ptr)
+//
+// numElements = numPoints + numBuckets
+//
+// During inserts (merges) new hashtable entries are appened at rowPtr+numPoints+numBuckets+1
+//
+// ASSUME: that LSH_LIST_HEAD_COUNTERS is set so that the first hashtable bucket is used to count
+// point and bucket entries
+//
+// We store the values of numPoints and numBuckets in separate fields of the first bucket
+// rowPtr->t2 // numPoints
+// (Uns32T)(rowPtr->snext) // numBuckets
+//
+// We cast the rowPtr->next pointer to (Uns32*) malloc(numElements*sizeof(Uns32T) + sizeof(bucket*))
+// To get to the fist bucket, we use
+//
+
+#define READ_UNS32T(VAL,TOKENSTR)   if(fread(VAL, sizeof(Uns32T), 1, dbFile) != 1){\
+    fclose(dbFile);error("Read error unserialize_hashtable_format2",TOKENSTR);}
+
+#define TEST_TOKEN(TEST, TESTSTR)   if(TEST){fclose(dbFile);error("State machine error: ", TESTSTR);}
+
+#define SKIP_BITS_LEFT_SHIFT_MSB (30)
+
+#define SKIP_BITS_RIGHT_SHIFT_MSB (28)
+#define SKIP_BITS_RIGHT_SHIFT_LSB (30)
+
+#define MAX_POINTS_IN_BUCKET_CORE_ARRAY (16)
+#define LSH_CORE_ARRAY_END_ROW_TOKEN (0xFFFFFFFD)
+
+// Encode the skip bits. Zero if only one point, MAX 8 (plus first == 9)
+#define ENCODE_POINT_SKIP_BITS  TEST_TOKEN(!numPointsThisBucket, "no points found");\
+    if(numPointsThisBucket==1){\
+             secondPtr=ap++;\
+             *secondPtr=0;\
+             numPoints++;\
+    }\
+    if(numPointsThisBucket>1){\
+      *firstPtr |=  ( (numPointsThisBucket-1) & 0x3 ) << SKIP_BITS_LEFT_SHIFT_MSB;\
+      *secondPtr |= ( ( (numPointsThisBucket-1) & 0xC) >> 2 ) << SKIP_BITS_LEFT_SHIFT_MSB;}
+
+Uns32T G::unserialize_hashtable_row_to_array(FILE* dbFile, bucket** rowPP, Uns32T numElements){
+  Uns32T numPointsThisBucket = 0;
+  Uns32T numBuckets = 0;
+  Uns32T numPoints = 0;
+  Uns32T* firstPtr = 0;
+  Uns32T* secondPtr = 0;
+
+  // Initialize new row
+  if(!*rowPP){
+    *rowPP = new bucket();
+#ifdef LSH_LIST_HEAD_COUNTERS
+    (*rowPP)->t2 = 0; // Use t2 as a collision counter for the row
+    (*rowPP)->next = 0;
+#endif
+  }
+  bucket* rowPtr = *rowPP;
+
+  READ_UNS32T(&(H::t2),"t2");
+  TEST_TOKEN(!(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T2), "expected E or T2");
+  // Because we encode points in 16-point blocks, we sometimes allocate repeated t2 elements
+  // So over-allocate by a factor of two and realloc later to actual numElements
+  CR_ASSERT(rowPtr->next = (bucket*) malloc((2*numElements+1)*sizeof(Uns32T)+sizeof(bucket**)));
+  Uns32T* ap = reinterpret_cast<Uns32T*>(rowPtr->next); // Cast pointer to Uns32T* for array format
+  while( !(H::t2==O2_SERIAL_TOKEN_ENDTABLE || H::t2==O2_SERIAL_TOKEN_T1) ){
+    numPointsThisBucket = 0;// reset bucket point counter
+    secondPtr = 0; // reset second-point pointer
+    TEST_TOKEN(H::t2!=O2_SERIAL_TOKEN_T2, "expected T2");
+    READ_UNS32T(&(H::t2), "Read error t2");
+    *ap++ = H::t2; // Insert t2 value into array
+    numBuckets++;
+    READ_UNS32T(&(H::p), "Read error H::p");
+    while(!(H::p==O2_SERIAL_TOKEN_ENDTABLE || H::p==O2_SERIAL_TOKEN_T1 || H::p==O2_SERIAL_TOKEN_T2 )){
+      if(numPointsThisBucket==MAX_POINTS_IN_BUCKET_CORE_ARRAY){
+	ENCODE_POINT_SKIP_BITS;
+	*ap++ = H::t2; // Extra element
+	numBuckets++;  // record this as a new bucket
+	numPointsThisBucket=0; // reset bucket point counter
+	secondPtr = 0; // reset second-point pointer
+      }
+      if( ++numPointsThisBucket == 1 )
+	firstPtr = ap;  // store pointer to first point to insert skip bits later on
+      else if( numPointsThisBucket == 2 )
+	secondPtr = ap;  // store pointer to first point to insert skip bits later on
+      numPoints++;
+      *ap++ = H::p;
+      READ_UNS32T(&(H::p), "Read error H::p");
+    }
+    ENCODE_POINT_SKIP_BITS;
+    H::t2 = H::p; // Copy last found token to t2
+  }
+  // Reallocate the row to its actual size
+  CR_ASSERT(rowPtr->next = (bucket*) realloc(rowPtr->next, (numBuckets+numPoints+1)*sizeof(Uns32T)+sizeof(bucket**)));
+  // Record the sizes at the head of the row
+  rowPtr->snext = (sbucket*) numBuckets;
+  rowPtr->t2 = numPoints;
+  // Place end of row marker
+  *ap++ = LSH_CORE_ARRAY_END_ROW_TOKEN;
+  // Set the LSH_CORE_ARRAY_BIT to identify data structure for insertion and retrieval
+  rowPtr->t2 |= LSH_CORE_ARRAY_BIT;
+  // Allocate a new dynamic list head at the end of the array
+  bucket** listPtr = reinterpret_cast<bucket**> (ap);
+  *listPtr = 0;
+  // Return current token
+  return H::t2; // return H::t2 which holds current token [E or T1]
+}
+
+
+
+ // *p is a pointer to the beginning of a hashtable row array
+ // The array consists of t2 hash keys and one or more point identifiers p for each hash key
+ // Retrieval is performed by generating a hash key query_t2 for query point q
+ // We identify the row that t2 is stored in using a secondary hash t1, this row is the entry
+ // point for retrieve_from_core_hashtable_array
+#define SKIP_BITS (0xC0000000)
+void G::retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos){
+  Uns32T skip;
+  Uns32T t2;
+  Uns32T p1;
+  Uns32T p2;
+
+  CR_ASSERT(p);
+
+  do{
+    t2 = *p++;
+    if( t2 > H::t2 )
+      return;
+    p1 = *p++;
+    p2 = *p++;
+    skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
+    if( t2 == H::t2 ){
+      add_point_callback(calling_instance, p1 ^ (p1 & SKIP_BITS), qpos, radius);
+      if(skip--){
+	add_point_callback(calling_instance, p2 ^ (p2 & SKIP_BITS), qpos, radius);
+	while(skip-- )
+	  add_point_callback(calling_instance, *p++, qpos, radius);
+      }
+    }
+    else
+      if(*p != LSH_CORE_ARRAY_END_ROW_TOKEN)
+	p = p + skip;
+  }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
+}
+
 void G::dump_hashtable_row(bucket* p){
   while(p && p->t2!=IFLAG){
     sbucket* sbp = p->snext;
--- a/lshlib.h	Fri Sep 05 20:01:58 2008 +0000
+++ b/lshlib.h	Wed Sep 10 18:55:16 2008 +0000
@@ -90,6 +90,9 @@

 #define O2_SERIAL_MAGIC ('o'|'2'<<8|'l'<<16|'s'<<24)

+#define WRITE_UNS32(VAL, TOKENSTR) if( fwrite(VAL, sizeof(Uns32T), 1, dbFile) != 1 ){\
+  fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);}
+
 using namespace std;

 Uns32T get_page_logn();
@@ -226,6 +229,7 @@
   void initialize_partial_functions();
   void __bucket_insert_point(bucket*);
   void __sbucket_insert_point(sbucket*);
+  bucket** get_pointer_to_bucket_linked_list(bucket* rowPtr);
   Uns32T computeProductModDefaultPrime(Uns32T*,Uns32T*,IntT);
   Uns32T randr();
   float randn();
@@ -290,6 +294,8 @@
   int serialize_lsh_hashtables_format2(FILE* dbFile, int merge);
   void serial_write_hashtable_row_format2(FILE* dbFile, bucket* h, Uns32T& colCount);
   void serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount);
+  Uns32T count_buckets_and_points_hashtable_row(bucket* bPtr);
+  Uns32T count_points_hashtable_row(bucket* bPtr);

   // Functions to read serial header and hash functions (format1 and format2)
   int unserialize_lsh_header(char* filename);            // read lsh header from disk into core
@@ -300,8 +306,9 @@
   void unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b); // read lsh hash table row into core

   // Functions to read hashtables in format2
-  void unserialize_lsh_hashtables_format2(FILE* dbFile);       // read FORMAT2 hash tables into core (core format)
-  Uns32T unserialize_hashtable_row_format2(FILE* dbFile, bucket** b); // read lsh hash table row into core
+  void unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge = 0);
+  Uns32T unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token=0); // to dynamic linked list
+  Uns32T unserialize_hashtable_row_to_array(FILE* dbFile, bucket** b, Uns32T numElements); // to core array

   // Helper functions
   void serial_print_header(Uns32T requestedFormat);
@@ -320,6 +327,10 @@
   void serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos);
   void serial_bucket_dump(SerialElementT* pe);

+  // Core ARRAY Retrieval Functions
+  void retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos);
+
+
   // Callback Function for point reporting
   void* calling_instance; // store calling object instance for member-function callback
   ReporterCallbackPtr add_point_callback; // Pointer to the callback function
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0044/run-test.sh	Wed Sep 10 18:55:16 2008 +0000
@@ -0,0 +1,76 @@
+#! /bin/bash
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+floatstring 1 0 >> testfeature
+floatstring 0 1 >> testfeature
+
+intstring 1 > testpower
+floatstring -0.5 >> testpower
+floatstring -1 >> testpower
+floatstring -1 >> testpower
+floatstring -0.5 >> testpower
+
+expect_clean_error_exit ${AUDIODB} -d testdb -I -f testfeature -w testpower
+${AUDIODB} -d testdb -P
+expect_clean_error_exit ${AUDIODB} -d testdb -I -f testfeature
+${AUDIODB} -d testdb -I -f testfeature -w testpower -k testfeature1
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query points (0.0,0.5),(0.0,0.5),(0.5,0.0)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+floatstring 0 0.5 >> testquery
+floatstring 0.5 0 >> testquery
+
+# LSH Indexing tests
+
+# Indexing requires a radius (-R)
+expect_clean_error_exit ${AUDIODB} -d testdb -X -l 1
+
+# Merged index
+${AUDIODB} -d testdb -I -f testfeature -w testpower -k testfeature2
+
+if [ -f testdb.lsh* ]; then
+    rm testdb.lsh*
+fi
+
+${AUDIODB} -d testdb -X -l 1 -R 1 --lsh_b 1
+
+# Query using the index
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -p 0 -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -p 1 -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -e -R 1 > testoutput
+echo testfeature1 3 > test-expected-output
+echo testfeature2 3 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -e -R 1 --lsh_exact > testoutput
+echo testfeature1 3 > test-expected-output
+echo testfeature2 3 >> test-expected-output
+cmp testoutput test-expected-output
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0044/short-description	Wed Sep 10 18:55:16 2008 +0000
@@ -0,0 +1,1 @@
+merged INDEX and LSH QUERY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0045/run-test.sh	Wed Sep 10 18:55:16 2008 +0000
@@ -0,0 +1,86 @@
+#! /bin/bash
+
+. ../test-utils.sh
+
+if [ -f testdb ]; then rm -f testdb; fi
+
+${AUDIODB} -d testdb -N
+
+intstring 2 > testfeature
+floatstring 0 1 >> testfeature
+floatstring 1 0 >> testfeature
+floatstring 1 0 >> testfeature
+floatstring 0 1 >> testfeature
+
+intstring 1 > testpower
+floatstring -0.5 >> testpower
+floatstring -1 >> testpower
+floatstring -1 >> testpower
+floatstring -0.5 >> testpower
+
+expect_clean_error_exit ${AUDIODB} -d testdb -I -f testfeature -w testpower
+${AUDIODB} -d testdb -P
+expect_clean_error_exit ${AUDIODB} -d testdb -I -f testfeature
+${AUDIODB} -d testdb -I -f testfeature -w testpower -k testfeature1
+
+# sequence queries require L2NORM
+${AUDIODB} -d testdb -L
+
+echo "query points (0.0,0.5),(0.0,0.5),(0.5,0.0)"
+intstring 2 > testquery
+floatstring 0 0.5 >> testquery
+floatstring 0 0.5 >> testquery
+floatstring 0.5 0 >> testquery
+
+# LSH Indexing tests
+
+# Indexing requires a radius (-R)
+expect_clean_error_exit ${AUDIODB} -d testdb -X -l 1
+
+# Merged index
+${AUDIODB} -d testdb -I -f testfeature -w testpower -k testfeature2
+
+if [ -f testdb.lsh* ]; then
+    rm testdb.lsh*
+fi
+
+${AUDIODB} -d testdb -X -l 1 -R 1 --lsh_b 1
+
+# Add a new track
+${AUDIODB} -d testdb -I -f testfeature -w testpower -k testfeature3
+
+# index using same paramters as previous index (merge new data)
+${AUDIODB} -d testdb -X -l 1 -R 1
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+echo testfeature3 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -p 0 -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+echo testfeature3 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -p 1 -R 1 > testoutput
+echo testfeature1 1 > test-expected-output
+echo testfeature2 1 >> test-expected-output
+echo testfeature3 1 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -e -R 1 > testoutput
+echo testfeature1 3 > test-expected-output
+echo testfeature2 3 >> test-expected-output
+echo testfeature3 3 >> test-expected-output
+cmp testoutput test-expected-output
+
+${AUDIODB} -d testdb -Q sequence -l 1 -f testquery -w testpower -e -R 1 --lsh_exact > testoutput
+echo testfeature1 3 > test-expected-output
+echo testfeature2 3 >> test-expected-output
+echo testfeature3 3 >> test-expected-output
+cmp testoutput test-expected-output
+
+
+exit 104
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/0045/short-description	Wed Sep 10 18:55:16 2008 +0000
@@ -0,0 +1,1 @@
+newly-inserted track merged INDEX and LSH QUERY