mas01mc@292
|
1 // lshlib.h - a library for locality sensitive hashtable insertion and retrieval
|
mas01mc@292
|
2 //
|
mas01mc@292
|
3 // Author: Michael Casey
|
mas01mc@292
|
4 // Copyright (c) 2008 Michael Casey, All Rights Reserved
|
mas01mc@292
|
5
|
mas01mc@292
|
6 /* GNU GENERAL PUBLIC LICENSE
|
mas01mc@292
|
7 Version 2, June 1991
|
mas01mc@292
|
8 See LICENSE.txt
|
mas01mc@292
|
9 */
|
mas01mc@292
|
10
|
mas01mc@292
|
11 #ifndef __LSHLIB_H
|
mas01mc@292
|
12 #define __LSHLIB_H
|
mas01mc@292
|
13
|
mas01mc@764
|
14 #include <vector>
|
mas01mc@764
|
15 #include <queue>
|
mas01mc@764
|
16 #include <stdio.h>
|
mas01mc@764
|
17 #include <stdlib.h>
|
mas01mc@764
|
18 #include <sys/types.h>
|
mas01mc@764
|
19 #include <sys/stat.h>
|
mas01mc@764
|
20 #include <sys/mman.h>
|
mas01mc@764
|
21 #include <fcntl.h>
|
mas01mc@764
|
22 #include <string.h>
|
mas01mc@764
|
23 #include <iostream>
|
mas01mc@764
|
24 #include <fstream>
|
mas01mc@764
|
25 #include <math.h>
|
mas01mc@764
|
26 #include <sys/time.h>
|
mas01mc@764
|
27 #include <assert.h>
|
mas01mc@764
|
28 #include <float.h>
|
mas01mc@764
|
29 #include <signal.h>
|
mas01mc@764
|
30 #include <time.h>
|
mas01mc@764
|
31 #include <limits.h>
|
mas01mc@764
|
32 #include <errno.h>
|
mas01mc@764
|
33 #ifdef MT19937
|
mas01mc@764
|
34 #include "mt19937/mt19937ar.h"
|
mas01mc@764
|
35 #endif
|
mas01mc@764
|
36 #include "multiprobe.h"
|
mas01mc@292
|
37
|
mas01mc@292
|
38 #define IntT int
|
mas01mc@292
|
39 #define LongUns64T long long unsigned
|
mas01mc@292
|
40 #define Uns32T unsigned
|
mas01mc@292
|
41 #define Int32T int
|
mas01mc@292
|
42 #define BooleanT int
|
mas01mc@764
|
43 #define TRUE 1
|
mas01mc@764
|
44 #define FALSE 0
|
mas01mc@292
|
45
|
mas01mc@292
|
46 // A big number (>> max # of points)
|
mas01mc@292
|
47 #define INDEX_START_EMPTY 1000000000U
|
mas01mc@292
|
48
|
mas01mc@292
|
49 // 4294967291 = 2^32-5
|
mas01mc@292
|
50 #define UH_PRIME_DEFAULT 4294967291U
|
mas01mc@292
|
51
|
mas01mc@292
|
52 // 2^29
|
mas01mc@292
|
53 #define MAX_HASH_RND 536870912U
|
mas01mc@292
|
54
|
mas01mc@292
|
55 // 2^32-1
|
mas01mc@292
|
56 #define TWO_TO_32_MINUS_1 4294967295U
|
mas01mc@292
|
57
|
mas01mc@292
|
58 #define O2_SERIAL_VERSION 1 // Sync with SVN version
|
mas01mc@292
|
59 #define O2_SERIAL_HEADER_SIZE sizeof(SerialHeaderT)
|
mas01mc@292
|
60 #define O2_SERIAL_ELEMENT_SIZE sizeof(SerialElementT)
|
mas01mc@292
|
61 #define O2_SERIAL_MAX_TABLES (200)
|
mas01mc@324
|
62 #define O2_SERIAL_MAX_ROWS (1000000000)
|
mas01mc@324
|
63 #define O2_SERIAL_MAX_COLS (1000000)
|
mas01mc@464
|
64 #define O2_SERIAL_MAX_DIM (20000)
|
mas01mc@292
|
65 #define O2_SERIAL_MAX_FUNS (100)
|
mas01mc@292
|
66 #define O2_SERIAL_MAX_BINWIDTH (200)
|
mas01mc@296
|
67 #define O2_SERIAL_MAXFILESIZE (4000000000UL)
|
mas01mc@292
|
68
|
mas01mc@292
|
69 // Flags for Serial Header
|
mas01mc@324
|
70 #define O2_SERIAL_FILEFORMAT1 (0x1U) // Optimize disk format for on-disk search
|
mas01mc@324
|
71 #define O2_SERIAL_FILEFORMAT2 (0x2U) // Optimize disk format for in-core search
|
mas01mc@324
|
72 #define O2_SERIAL_COREFORMAT1 (0x4U)
|
mas01mc@324
|
73 #define O2_SERIAL_COREFORMAT2 (0x8U)
|
mas01mc@292
|
74
|
mas01mc@292
|
75 // Flags for serialization fileformat2: use high 3 bits of Uns32T
|
mas01mc@324
|
76 #define O2_SERIAL_TOKEN_T1 (0xFFFFFFFCU)
|
mas01mc@306
|
77 #define O2_SERIAL_TOKEN_T2 (0xFFFFFFFDU)
|
mas01mc@306
|
78 #define O2_SERIAL_TOKEN_ENDTABLE (0xFFFFFFFEU)
|
mas01mc@292
|
79
|
mas01mc@324
|
80 #define O2_INDEX_MAXSTR (256)
|
mas01mc@309
|
81
|
mas01mc@764
|
82 unsigned align_up(unsigned x, unsigned w);
|
mas01mc@292
|
83
|
mas01mc@292
|
84 #define O2_SERIAL_FUNCTIONS_SIZE (align_up(sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS * O2_SERIAL_MAX_DIM \
|
mas01mc@292
|
85 + sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS + \
|
mas01mc@292
|
86 + sizeof(Uns32T) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS * 2 \
|
mas01mc@292
|
87 + O2_SERIAL_HEADER_SIZE,get_page_logn()))
|
mas01mc@292
|
88
|
mas01mc@292
|
89 #define O2_SERIAL_MAX_LSH_SIZE (O2_SERIAL_ELEMENT_SIZE * O2_SERIAL_MAX_TABLES \
|
mas01mc@292
|
90 * O2_SERIAL_MAX_ROWS * O2_SERIAL_MAX_COLS + O2_SERIAL_FUNCTIONS_SIZE)
|
mas01mc@292
|
91
|
mas01mc@292
|
92 #define O2_SERIAL_MAGIC ('o'|'2'<<8|'l'<<16|'s'<<24)
|
mas01mc@292
|
93
|
mas01mc@340
|
94 #define WRITE_UNS32(VAL, TOKENSTR) if( fwrite(VAL, sizeof(Uns32T), 1, dbFile) != 1 ){\
|
mas01mc@340
|
95 fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);}
|
mas01mc@340
|
96
|
mas01mc@514
|
97 //#define LSH_DUMP_CORE_TABLES // set to dump hashtables on load
|
mas01mc@764
|
98 //#define _LSH_DEBUG_ // turn on debugging information
|
mas01mc@514
|
99 //#define USE_U_FUNCTIONS // set to use partial hashfunction re-use
|
mas01mc@514
|
100
|
mas01mc@514
|
101 // Backward-compatible CORE ARRAY lsh index
|
mas01mc@514
|
102 #define LSH_CORE_ARRAY // Set to use arrays for hashtables rather than linked-lists
|
mas01mc@514
|
103 #define LSH_LIST_HEAD_COUNTERS // Enable counters in hashtable list heads
|
mas01mc@514
|
104
|
mas01mc@514
|
105 // Critical path logic
|
mas01mc@514
|
106 #if defined LSH_CORE_ARRAY && !defined LSH_LIST_HEAD_COUNTERS
|
mas01mc@514
|
107 #define LSH_LIST_HEAD_COUNTERS
|
mas01mc@514
|
108 #endif
|
mas01mc@514
|
109
|
mas01mc@514
|
110 #define LSH_CORE_ARRAY_BIT (0x80000000) // LSH_CORE_ARRAY test bit for list head
|
mas01mc@514
|
111
|
mas01mc@764
|
112 #ifndef LSH_MULTI_PROBE_COUNT
|
mas01mc@764
|
113 #define LSH_MULTI_PROBE_COUNT 1 // How many adjacent hash-buckets to probe in LSH retrieval
|
mas01mc@764
|
114 #endif
|
mas01mc@764
|
115
|
mas01mc@292
|
116 Uns32T get_page_logn();
|
mas01mc@292
|
117
|
mas01mc@764
|
118 using namespace std;
|
mas01mc@764
|
119
|
mas01mc@292
|
120 // Disk table entry
|
mas01mc@292
|
121 typedef class SerialElement SerialElementT;
|
mas01mc@292
|
122 class SerialElement {
|
mas01mc@292
|
123 public:
|
mas01mc@292
|
124 Uns32T hashValue;
|
mas01mc@292
|
125 Uns32T pointID;
|
mas01mc@292
|
126
|
mas01mc@292
|
127 SerialElement(Uns32T h, Uns32T pID):
|
mas01mc@292
|
128 hashValue(h),
|
mas01mc@292
|
129 pointID(pID){}
|
mas01mc@292
|
130 };
|
mas01mc@292
|
131
|
mas01mc@292
|
132 // Disk header
|
mas01mc@292
|
133 typedef class SerialHeader SerialHeaderT;
|
mas01mc@292
|
134 class SerialHeader {
|
mas01mc@292
|
135 public:
|
mas01mc@292
|
136 Uns32T lshMagic; // unique identifier for file header
|
mas01mc@292
|
137 float binWidth; // hash-function bin width
|
mas01mc@292
|
138 Uns32T numTables; // number of hash tables in file
|
mas01mc@292
|
139 Uns32T numRows; // size of each hash table
|
mas01mc@292
|
140 Uns32T numCols; // max collisions in each hash table
|
mas01mc@292
|
141 Uns32T elementSize; // size of a hash bucket
|
mas01mc@292
|
142 Uns32T version; // version number of file format
|
mas01mc@292
|
143 Uns32T size; // total size of database (bytes)
|
mas01mc@292
|
144 Uns32T flags; // 32 bits of useful information
|
mas01mc@292
|
145 Uns32T dataDim; // vector dimensionality
|
mas01mc@292
|
146 Uns32T numFuns; // number of independent hash functions
|
mas01mc@292
|
147 float radius; // 32-bit floating point radius
|
mas01mc@292
|
148 Uns32T maxp; // number of unique IDs in the database
|
mas01mc@296
|
149 unsigned long long size_long; // long version of size
|
mas01mc@296
|
150 Uns32T pointCount; // number of points in the database
|
mas01mc@292
|
151
|
mas01mc@292
|
152 SerialHeader();
|
mas01mc@296
|
153 SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float radius, Uns32T p, Uns32T FMT, Uns32T pointCount);
|
mas01mc@292
|
154
|
mas01mc@292
|
155 float get_binWidth(){return binWidth;}
|
mas01mc@292
|
156 Uns32T get_numTables(){return numTables;}
|
mas01mc@292
|
157 Uns32T get_numRows(){return numRows;}
|
mas01mc@292
|
158 Uns32T get_numCols(){return numCols;}
|
mas01mc@292
|
159 Uns32T get_elementSize(){return elementSize;}
|
mas01mc@292
|
160 Uns32T get_version(){return version;}
|
mas01mc@292
|
161 Uns32T get_flags(){return flags;}
|
mas01mc@296
|
162 unsigned long long get_size(){return size_long;}
|
mas01mc@292
|
163 Uns32T get_dataDim(){return dataDim;}
|
mas01mc@292
|
164 Uns32T get_numFuns(){return numFuns;}
|
mas01mc@292
|
165 Uns32T get_maxp(){return maxp;}
|
mas01mc@296
|
166 Uns32T get_pointCount(){return pointCount;}
|
mas01mc@292
|
167 };
|
mas01mc@292
|
168
|
mas01mc@292
|
169 #define IFLAG 0xFFFFFFFF
|
mas01mc@292
|
170
|
mas01mc@292
|
171 // Point-set collision bucket (sbucket).
|
mas01mc@292
|
172 // sbuckets form a collision chain that identifies PointIDs falling in the same locale.
|
mas01mc@292
|
173 // sbuckets are chained from a bucket containing the collision list's t2 identifier
|
mas01mc@292
|
174 class sbucket {
|
mas01mc@292
|
175 friend class bucket;
|
mas01mc@292
|
176 friend class H;
|
mas01mc@292
|
177 friend class G;
|
mas01mc@292
|
178
|
mas01mc@292
|
179 public:
|
mas01mc@292
|
180 class sbucket* snext;
|
mas01mc@292
|
181 unsigned int pointID;
|
mas01mc@292
|
182
|
mas01mc@292
|
183 sbucket(){
|
mas01mc@292
|
184 snext=0;
|
mas01mc@292
|
185 pointID=IFLAG;
|
mas01mc@292
|
186 }
|
mas01mc@292
|
187 ~sbucket(){delete snext;}
|
mas01mc@292
|
188 sbucket* get_snext(){return snext;}
|
mas01mc@292
|
189 };
|
mas01mc@292
|
190
|
mas01mc@292
|
191 // bucket structure for a linked list of locales that collide with the same hash value t1
|
mas01mc@292
|
192 // different buckets represent different locales, collisions within a locale are chained
|
mas01mc@292
|
193 // in sbuckets
|
mas01mc@292
|
194 class bucket {
|
mas01mc@292
|
195 friend class H;
|
mas01mc@292
|
196 friend class G;
|
mas01mc@292
|
197 bucket* next;
|
mas01mc@344
|
198 union {
|
mas01mc@344
|
199 sbucket* ptr;
|
mas01mc@344
|
200 Uns32T numBuckets;
|
mas01mc@344
|
201 } snext;
|
mas01mc@292
|
202 public:
|
mas01mc@292
|
203 unsigned int t2;
|
mas01mc@292
|
204 bucket(){
|
mas01mc@292
|
205 next=0;
|
mas01mc@344
|
206 snext.ptr=0;
|
mas01mc@292
|
207 t2=IFLAG;
|
mas01mc@292
|
208 }
|
mas01mc@344
|
209 ~bucket(){delete next;delete snext.ptr;}
|
mas01mc@292
|
210 bucket* get_next(){return next;}
|
mas01mc@292
|
211 };
|
mas01mc@292
|
212
|
mas01mc@292
|
213
|
mas01mc@292
|
214 // The hash_functions for locality-sensitive hashing
|
mas01mc@292
|
215 class H{
|
mas01mc@292
|
216 friend class G;
|
mas01mc@292
|
217 private:
|
mas01mc@293
|
218
|
mas01mc@293
|
219 float *** A; // m x k x d random projectors from R^d->R^k
|
mas01mc@293
|
220 float ** b; // m x k uniform additive constants
|
mas01mc@293
|
221
|
mas01mc@293
|
222 Uns32T ** g; // L x k random hash projections \in Z^k
|
mas01mc@292
|
223 Uns32T** r1; // random ints for hashing
|
mas01mc@292
|
224 Uns32T** r2; // random ints for hashing
|
mas01mc@293
|
225
|
mas01mc@293
|
226 bucket*** h; // The LSH hash tables
|
mas01mc@293
|
227
|
mas01mc@293
|
228 bool use_u_functions; // flag to optimize computation of hashes
|
mas01mc@514
|
229 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
230 vector<vector<Uns32T> > uu; // Storage for m patial hash evaluations ( g_j = [u_a,u_b] )
|
mas01mc@514
|
231 #endif
|
mas01mc@293
|
232 Uns32T maxp; // highest pointID stored in database
|
mas01mc@293
|
233 Uns32T bucketCount; // count of number of point buckets allocated
|
mas01mc@293
|
234 Uns32T pointCount; // count of number of points inserted
|
mas01mc@296
|
235 Uns32T collisionCount; // number of points collided in a hash-table row
|
mas01mc@764
|
236 Uns32T tablesPointCount; // count of points per hash table on load
|
mas01mc@293
|
237
|
mas01mc@292
|
238 Uns32T t1; // first hash table key
|
mas01mc@292
|
239 Uns32T t2; // second hash table key
|
mas01mc@292
|
240 Uns32T P; // hash table prime number
|
mas01mc@292
|
241
|
mas01mc@764
|
242
|
mas01mc@292
|
243 Uns32T N; // num rows per table
|
mas01mc@292
|
244 Uns32T C; // num collision per row
|
mas01mc@292
|
245 Uns32T k; // num projections per hash function
|
mas01mc@292
|
246 Uns32T m; // ~sqrt num hash tables
|
mas01mc@292
|
247 Uns32T L; // L = m*(m-1)/2, conversely, m = (1 + sqrt(1 + 8.0*L)) / 2.0
|
mas01mc@292
|
248 Uns32T d; // dimensions
|
mas01mc@292
|
249 Uns32T p; // current point
|
mas01mc@293
|
250 float w; // width of hash slots (relative to normalized feature space)
|
mas01mc@293
|
251 float radius;// scaling coefficient for data (1./radius)
|
mas01mc@292
|
252
|
mas01mc@764
|
253 MultiProbe* multiProbePtr; // Utility class for handling multi-probe queries
|
mas01mc@764
|
254 float ** boundaryDistances; // Array of query bucket-boundary-distances per hashtable
|
mas01mc@764
|
255
|
mas01mc@293
|
256 void initialize_data_structures();
|
mas01mc@293
|
257 void initialize_lsh_functions();
|
mas01mc@293
|
258 void initialize_partial_functions();
|
mas01mc@293
|
259 void __bucket_insert_point(bucket*);
|
mas01mc@293
|
260 void __sbucket_insert_point(sbucket*);
|
mas01mc@340
|
261 bucket** get_pointer_to_bucket_linked_list(bucket* rowPtr);
|
mas01mc@293
|
262 Uns32T computeProductModDefaultPrime(Uns32T*,Uns32T*,IntT);
|
mas01mc@293
|
263 Uns32T randr();
|
mas01mc@293
|
264 float randn();
|
mas01mc@293
|
265 float ranf();
|
mas01mc@293
|
266 bucket** get_bucket(int j);
|
mas01mc@293
|
267 void error(const char* a, const char* b = "", const char *sysFunc = 0);
|
mas01mc@293
|
268
|
mas01mc@293
|
269 public:
|
mas01mc@293
|
270
|
mas01mc@293
|
271 H();
|
mas01mc@293
|
272 H(Uns32T k, Uns32T m, Uns32T d, Uns32T N, Uns32T C, float w, float r);
|
mas01mc@474
|
273 virtual ~H();
|
mas01mc@292
|
274
|
mas01mc@293
|
275 float get_w(){return w;}
|
mas01mc@293
|
276 float get_radius(){return radius;}
|
mas01mc@293
|
277 Uns32T get_numRows(){return N;}
|
mas01mc@293
|
278 Uns32T get_numCols(){return C;}
|
mas01mc@293
|
279 Uns32T get_numFuns(){return k;}
|
mas01mc@293
|
280 Uns32T get_numTables(){return L;}
|
mas01mc@293
|
281 Uns32T get_dataDim(){return d;}
|
mas01mc@293
|
282 Uns32T get_maxp(){return maxp;}
|
mas01mc@292
|
283
|
mas01mc@292
|
284 Uns32T bucket_insert_point(bucket**);
|
mas01mc@292
|
285
|
mas01mc@293
|
286 // Interface to hash functions
|
mas01mc@293
|
287 void compute_hash_functions(vector<float>& v);
|
mas01mc@293
|
288 void generate_hash_keys(Uns32T* g, Uns32T* r1, Uns32T* r2);
|
mas01mc@764
|
289 void generate_multiprobe_keys(Uns32T*g, Uns32T* r1, Uns32T* r2);
|
mas01mc@293
|
290 Uns32T get_t1(){return t1;} // hash-key t1
|
mas01mc@293
|
291 Uns32T get_t2(){return t2;} // hash-key t2
|
mas01mc@292
|
292 };
|
mas01mc@292
|
293
|
mas01mc@293
|
294 // Typedef for point-reporting callback function. Used to collect points during LSH retrieval
|
mas01mc@292
|
295 typedef void (*ReporterCallbackPtr)(void* objPtr, Uns32T pointID, Uns32T queryIndex, float squaredDistance);
|
mas01mc@292
|
296
|
mas01mc@292
|
297 // Interface for indexing and retrieval
|
mas01mc@292
|
298 class G: public H{
|
mas01mc@292
|
299 private:
|
mas01mc@308
|
300 char* indexName;
|
mas01mc@308
|
301
|
mas01mc@293
|
302 // LSH serial data structure file handling
|
mas01mc@292
|
303 void get_lock(int fd, bool exclusive);
|
mas01mc@292
|
304 void release_lock(int fd);
|
mas01mc@292
|
305 int serial_create(char* filename, Uns32T FMT);
|
mas01mc@292
|
306 int serial_create(char* filename, float binWidth, Uns32T nTables, Uns32T nRows, Uns32T nCols, Uns32T k, Uns32T d, Uns32T FMT);
|
mas01mc@764
|
307 char* serial_mmap(int dbfid, Uns32T sz, Uns32T w, off_t offset = 0);
|
mas01mc@764
|
308 void serial_munmap(char* db, Uns32T N);
|
mas01mc@292
|
309 int serial_open(char* filename,int writeFlag);
|
mas01mc@292
|
310 void serial_close(int dbfid);
|
mas01mc@292
|
311
|
mas01mc@292
|
312 // Function to write hashfunctions to disk
|
mas01mc@292
|
313 int serialize_lsh_hashfunctions(int fid);
|
mas01mc@292
|
314
|
mas01mc@292
|
315 // Functions to write hashtables to disk in format1 (optimized for on-disk retrieval)
|
mas01mc@292
|
316 int serialize_lsh_hashtables_format1(int fid, int merge);
|
mas01mc@292
|
317 void serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* h, Uns32T& colCount);
|
mas01mc@292
|
318 void serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount);
|
mas01mc@292
|
319 void serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* h, Uns32T& colCount);
|
mas01mc@292
|
320 void serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount);
|
mas01mc@292
|
321 int serial_can_merge(Uns32T requestedFormat); // Test to see whether core and on-disk structures are compatible
|
mas01mc@292
|
322
|
mas01mc@292
|
323 // Functions to write hashtables to disk in format2 (optimized for in-core retrieval)
|
mas01mc@336
|
324 int serialize_lsh_hashtables_format2(FILE* dbFile, int merge);
|
mas01mc@336
|
325 void serial_write_hashtable_row_format2(FILE* dbFile, bucket* h, Uns32T& colCount);
|
mas01mc@336
|
326 void serial_write_element_format2(FILE* dbFile, sbucket* sb, Uns32T& colCount);
|
mas01mc@340
|
327 Uns32T count_buckets_and_points_hashtable_row(bucket* bPtr);
|
mas01mc@340
|
328 Uns32T count_points_hashtable_row(bucket* bPtr);
|
mas01mc@292
|
329
|
mas01mc@292
|
330 // Functions to read serial header and hash functions (format1 and format2)
|
mas01mc@292
|
331 int unserialize_lsh_header(char* filename); // read lsh header from disk into core
|
mas01mc@292
|
332 void unserialize_lsh_functions(int fid); // read the lsh hash functions into core
|
mas01mc@292
|
333
|
mas01mc@292
|
334 // Functions to read hashtables in format1
|
mas01mc@292
|
335 void unserialize_lsh_hashtables_format1(int fid); // read FORMAT1 hash tables into core (disk format)
|
mas01mc@292
|
336 void unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b); // read lsh hash table row into core
|
mas01mc@292
|
337
|
mas01mc@292
|
338 // Functions to read hashtables in format2
|
mas01mc@340
|
339 void unserialize_lsh_hashtables_format2(FILE* dbFile, bool forMerge = 0);
|
mas01mc@340
|
340 Uns32T unserialize_hashtable_row_format2(FILE* dbFile, bucket** b, Uns32T token=0); // to dynamic linked list
|
mas01mc@340
|
341 Uns32T unserialize_hashtable_row_to_array(FILE* dbFile, bucket** b, Uns32T numElements); // to core array
|
mas01mc@292
|
342
|
mas01mc@292
|
343 // Helper functions
|
mas01mc@292
|
344 void serial_print_header(Uns32T requestedFormat);
|
mas01mc@292
|
345 float* get_serial_hashfunction_base(char* db);
|
mas01mc@292
|
346 SerialElementT* get_serial_hashtable_base(char* db);
|
mas01mc@292
|
347 Uns32T get_serial_hashtable_offset(); // Size of SerialHeader + HashFunctions
|
mas01mc@764
|
348 SerialHeaderT* serial_get_header(char* db);
|
mas01mc@292
|
349 SerialHeaderT* lshHeader;
|
mas01mc@292
|
350
|
mas01mc@292
|
351 // Core Retrieval/Inspections Functions
|
mas01mc@292
|
352 void bucket_chain_point(bucket* p, Uns32T qpos);
|
mas01mc@292
|
353 void sbucket_chain_point(sbucket* p, Uns32T qpos);
|
mas01mc@292
|
354 void dump_hashtable_row(bucket* p);
|
mas01mc@513
|
355 void dump_core_hashtable_array(Uns32T* p);
|
mas01mc@292
|
356
|
mas01mc@292
|
357 // Serial (Format 1) Retrieval/Inspection Functions
|
mas01mc@292
|
358 void serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos);
|
mas01mc@292
|
359 void serial_bucket_dump(SerialElementT* pe);
|
mas01mc@292
|
360
|
mas01mc@340
|
361 // Core ARRAY Retrieval Functions
|
mas01mc@340
|
362 void retrieve_from_core_hashtable_array(Uns32T* p, Uns32T qpos);
|
mas01mc@340
|
363
|
mas01mc@340
|
364
|
mas01mc@293
|
365 // Callback Function for point reporting
|
mas01mc@293
|
366 void* calling_instance; // store calling object instance for member-function callback
|
mas01mc@324
|
367 ReporterCallbackPtr add_point_callback; // Pointer to the callback function
|
mas01mc@292
|
368
|
mas01mc@292
|
369 public:
|
mas01mc@292
|
370 G(char* lshFile, bool lshInCore = false); // unserialize constructor
|
mas01mc@292
|
371 G(float w, Uns32T k,Uns32T m, Uns32T d, Uns32T N, Uns32T C, float r); // core constructor
|
mas01mc@474
|
372 virtual ~G();
|
mas01mc@292
|
373
|
mas01mc@292
|
374 Uns32T insert_point(vector<float>&, Uns32T pointID);
|
mas01mc@292
|
375 void insert_point_set(vector<vector<float> >& vv, Uns32T basePointID);
|
mas01mc@292
|
376
|
mas01mc@292
|
377 // point retrieval from core
|
mas01mc@292
|
378 void retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr, void* me=NULL);
|
mas01mc@292
|
379 // point set retrieval from core
|
mas01mc@292
|
380 void retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr, void* me=NULL);
|
mas01mc@292
|
381 // serial point set retrieval
|
mas01mc@292
|
382 void serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr, void* me=NULL);
|
mas01mc@292
|
383 // serial point retrieval
|
mas01mc@292
|
384 void serial_retrieve_point(char* filename, vector<float>& vv, Uns32T qpos, ReporterCallbackPtr, void* me=NULL);
|
mas01mc@292
|
385
|
mas01mc@292
|
386 void serialize(char* filename, Uns32T serialFormat = O2_SERIAL_FILEFORMAT1); // write hashfunctions and hashtables to disk
|
mas01mc@292
|
387
|
mas01mc@292
|
388 SerialHeaderT* get_lshHeader(){return lshHeader;}
|
mas01mc@292
|
389 void serial_dump_tables(char* filename);
|
mas01mc@292
|
390 float get_mean_collision_rate(){ return (float) pointCount / bucketCount ; }
|
mas01mc@308
|
391 char* get_indexName(){return indexName;}
|
mas01mc@513
|
392 void dump_hashtables();
|
mas01mc@764
|
393 void dump_core_row(Uns32T n);
|
mas01mc@764
|
394 void dump_disk_row(char*, Uns32T n);
|
mas01mc@292
|
395 };
|
mas01mc@292
|
396
|
mas01mc@292
|
397 typedef class G LSH;
|
mas01mc@292
|
398
|
mas01mc@292
|
399
|
mas01mc@292
|
400
|
mas01mc@292
|
401 #endif
|