changeset 560:11ea54a02534 multiprobeLSH

Added a test program and lshlib functionality to inspect on-disk and in-core hashtable representations.
author mas01mc
date Sun, 22 Feb 2009 03:44:25 +0000
parents f6363bfdad80
children 1e6cc843563a
files lshlib.cpp lshlib.h tabdump.cpp
diffstat 3 files changed, 139 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/lshlib.cpp	Sat Feb 21 21:56:57 2009 +0000
+++ b/lshlib.cpp	Sun Feb 22 03:44:25 2009 +0000
@@ -1507,7 +1507,7 @@
     if(!pointFound)
       error("State machine error: point", "unserialize_hashtable_row_format2()");    
     H::t2 = H::p; // Copy last found token to t2
-  }  
+  }
   return H::t2; // holds current token
 }
 
@@ -1735,8 +1735,108 @@
    }
  }
 
-void G::dump_disk_row(Uns32T n){
+ void G::dump_disk_row(char* filename, Uns32T n){
+  int dbfid = unserialize_lsh_header(filename);
+  if(dbfid<0){
+    cerr << "Could not read header from " << filename << endl;
+    return;
+  }
+  FILE* dbFile = 0;
+  dbFile = fdopen(dbfid, "rb");
+  if(!dbFile){
+    cerr << "Could not create FILE pointer from file:" << filename << " with fid:" << dbfid << endl;
+    close(dbfid);
+    return;
+  }
 
+  Uns32T x=0,y=0;
+
+  // Seek to hashtable base offset
+  if(fseek(dbFile, get_serial_hashtable_offset(), SEEK_SET)){
+    fclose(dbFile);
+    error("fSeek error in unserialize_lsh_hashtables_format2");
+  }
+  Uns32T token;
+  Uns32T pointID;
+
+  // Read the hash tables into core (structure is given in header) 
+  while( x < H::L){
+    y=0;
+    if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+      fclose(dbFile);
+      error("Read error 1","unserialize_lsh_hashtables_format2()");
+    }
+    if(token==O2_SERIAL_TOKEN_ENDTABLE){
+      x++; // End of table
+    }
+    else{
+      while(y < n){
+	// Read a row and move file pointer to beginning of next row or _bittable
+	if(!(token==O2_SERIAL_TOKEN_T1)){
+	  fclose(dbFile);
+	  error("State machine error T1","unserialize_lsh_hashtables_format2()");
+	}
+	
+	while(token != O2_SERIAL_TOKEN_ENDTABLE){
+	  if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+	    fclose(dbFile);
+	    error("Read error 2","unserialize_lsh_hashtables_format2()");
+	  }
+	  if(token == O2_SERIAL_TOKEN_T1){
+	    if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+	      fclose(dbFile);
+	      error("Read error t1","unserialize_lsh_hashtables_format2()");
+	    }
+	    y=token;
+	    if(y==n){
+	      printf("D[%d,%d]", x, y);
+	      if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+		fclose(dbFile);
+		error("Read error 2","unserialize_lsh_hashtables_format2()");
+	      }
+	      printf("[numElements=%d]", token);
+	      if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+		fclose(dbFile);
+		error("Read error 3","unserialize_lsh_hashtables_format2()");
+	      }
+	      while(!(token==O2_SERIAL_TOKEN_ENDTABLE || token==O2_SERIAL_TOKEN_T1)){
+		// Check for T2 token
+		if(token!=O2_SERIAL_TOKEN_T2){
+		  printf("t2=%d",token);
+		  fclose(dbFile);
+		  error("State machine error T2 token", "unserialize_hashtable_row_format2()");
+		}
+		// Read t2 value
+		if(fread(&token, sizeof(Uns32T), 1, dbFile) != 1){
+		  fclose(dbFile);
+		  error("Read error t2","unserialize_hashtable_row_format2");
+		}
+		if(fread(&pointID, sizeof(Uns32T), 1, dbFile) != 1){
+		  fclose(dbFile);
+		  error("Read error pointID","unserialize_hashtable_row_format2");
+		}
+		while(!(pointID==O2_SERIAL_TOKEN_ENDTABLE || pointID==O2_SERIAL_TOKEN_T1 || pointID==O2_SERIAL_TOKEN_T2 )){
+		  printf("(%0X,%u)", token, pointID);
+		  if(fread(&pointID, sizeof(Uns32T), 1, dbFile) != 1){
+		    fclose(dbFile);
+		    error("Read error H::p","unserialize_hashtable_row_format2");
+		  }
+		}
+		token = pointID; // Copy last found token
+	      }
+	      printf("\n");
+	    }
+	  }
+	}
+	if(token==O2_SERIAL_TOKEN_ENDTABLE){
+	  x++;
+	  break;
+	}
+      }
+    }
+  }
+  
+  close(dbfid);
  }
 
  void G::dump_core_hashtable_array(Uns32T* p){
@@ -1750,11 +1850,11 @@
     p1 = *p++;
     p2 = *p++;
     skip = (( p1 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_LSB) + (( p2 & SKIP_BITS ) >> SKIP_BITS_RIGHT_SHIFT_MSB);
-    printf("(%0x, %0x)", t2, p1 & !SKIP_BITS);
+    printf("(%0X, %u)", t2, p1 & !SKIP_BITS);
     if(skip--){
-      printf("(%0x, %0x)", t2, p2 & !SKIP_BITS);
+      printf("(%0X, %u)", t2, p2 & !SKIP_BITS);
       while(skip-- )
-	printf("(%0x, %0x)", t2, *p++);
+	printf("(%0X, %u)", t2, *p++);
     }
   }while( *p != LSH_CORE_ARRAY_END_ROW_TOKEN );
  }
--- a/lshlib.h	Sat Feb 21 21:56:57 2009 +0000
+++ b/lshlib.h	Sun Feb 22 03:44:25 2009 +0000
@@ -394,7 +394,7 @@
   char* get_indexName(){return indexName;}
   void dump_hashtables();
   void dump_core_row(Uns32T n);
-  void dump_disk_row(Uns32T n);
+  void dump_disk_row(char*, Uns32T n);
 };
 
 typedef class G LSH;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tabdump.cpp	Sun Feb 22 03:44:25 2009 +0000
@@ -0,0 +1,33 @@
+#include "lshlib.h"
+
+int main(int argc, char **argv) {
+
+  if(argc < 2){
+    cout << "Usage: " << argv[0] << " indexfile" << endl;
+    exit(1);
+  }
+
+  LSH* lsh = 0;
+  lsh = new LSH(argv[1], true); // Initialize empty LSH tables
+
+  if(!lsh){
+    cerr << "Cannot open " << argv[1] << endl;
+    exit(1);
+  }
+
+  char buf[1024];
+  int n;
+  while(1){
+    printf("row#");
+    fflush(stdout);
+    scanf("%s", &buf);
+    if(strcmp("exit", buf)==0 || strcmp("quit", buf)==0){
+      exit(0);
+    }
+    n = atoi(buf);
+    printf("row=%d\n", n);
+    lsh->dump_disk_row(argv[1], n);
+    lsh->dump_core_row(n);
+  }
+  return 0;
+}