mas01mc@292
|
1 #include "lshlib.h"
|
mas01mc@292
|
2
|
mas01mc@292
|
3 //#define __LSH_DUMP_CORE_TABLES__
|
mas01mc@292
|
4 //#define USE_U_FUNCTIONS
|
mas01mc@292
|
5 //#define LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
6
|
mas01mc@292
|
7 void err(char*s){cout << s << endl;exit(2);}
|
mas01mc@292
|
8
|
mas01mc@292
|
9 Uns32T get_page_logn(){
|
mas01mc@292
|
10 int pagesz = (int)sysconf(_SC_PAGESIZE);
|
mas01mc@292
|
11 return (Uns32T)log2((double)pagesz);
|
mas01mc@292
|
12 }
|
mas01mc@292
|
13
|
mas01mc@292
|
14 unsigned align_up(unsigned x, unsigned w){ return ((x) + ((1<<w)-1) & ~((1<<w)-1)); }
|
mas01mc@292
|
15
|
mas01mc@292
|
16 void H::error(const char* a, const char* b, const char *sysFunc) {
|
mas01mc@292
|
17 cerr << a << ": " << b << endl;
|
mas01mc@292
|
18 if (sysFunc) {
|
mas01mc@292
|
19 perror(sysFunc);
|
mas01mc@292
|
20 }
|
mas01mc@292
|
21 exit(1);
|
mas01mc@292
|
22 }
|
mas01mc@292
|
23
|
mas01mc@293
|
24 H::H(){
|
mas01mc@293
|
25 // Delay initialization of lsh functions until we know the parameters
|
mas01mc@293
|
26 }
|
mas01mc@293
|
27
|
mas01mc@293
|
28 H::H(Uns32T kk, Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float ww, float rr):
|
mas01mc@292
|
29 #ifdef USE_U_FUNCTIONS
|
mas01mc@292
|
30 use_u_functions(true),
|
mas01mc@292
|
31 #else
|
mas01mc@292
|
32 use_u_functions(false),
|
mas01mc@292
|
33 #endif
|
mas01mc@293
|
34 maxp(0),
|
mas01mc@292
|
35 bucketCount(0),
|
mas01mc@292
|
36 pointCount(0),
|
mas01mc@292
|
37 N(NN),
|
mas01mc@292
|
38 C(CC),
|
mas01mc@292
|
39 k(kk),
|
mas01mc@292
|
40 m(mm),
|
mas01mc@293
|
41 L((mm*(mm-1))/2),
|
mas01mc@293
|
42 d(dd),
|
mas01mc@293
|
43 w(ww),
|
mas01mc@293
|
44 radius(rr)
|
mas01mc@292
|
45 {
|
mas01mc@292
|
46
|
mas01mc@292
|
47 if(m<2){
|
mas01mc@292
|
48 m=2;
|
mas01mc@292
|
49 L=1; // check value of L
|
mas01mc@292
|
50 cout << "warning: setting m=2, L=1" << endl;
|
mas01mc@292
|
51 }
|
mas01mc@292
|
52 if(use_u_functions && k%2){
|
mas01mc@292
|
53 k++; // make sure k is even
|
mas01mc@292
|
54 cout << "warning: setting k even" << endl;
|
mas01mc@292
|
55 }
|
mas01mc@293
|
56
|
mas01mc@293
|
57 cout << "file size: ~" << (((unsigned long long)L*N*C*sizeof(SerialElementT))/1000000UL) << "MB" << endl;
|
mas01mc@293
|
58 if(((unsigned long long)L*N*C*sizeof(SerialElementT))>4000000000UL)
|
mas01mc@293
|
59 error("Maximum size of LSH file exceded: 12*L*N*C > 4000MB");
|
mas01mc@293
|
60 else if(((unsigned long long)N*C*sizeof(SerialElementT))>1000000000UL)
|
mas01mc@293
|
61 cout << "warning: hash tables exceed 1000MB." << endl;
|
mas01mc@293
|
62
|
mas01mc@293
|
63 // We have the necessary parameters, so construct hashfunction datastructures
|
mas01mc@293
|
64 initialize_lsh_functions();
|
mas01mc@292
|
65 }
|
mas01mc@292
|
66
|
mas01mc@293
|
67 void H::initialize_lsh_functions(){
|
mas01mc@292
|
68 H::P = UH_PRIME_DEFAULT;
|
mas01mc@292
|
69
|
mas01mc@292
|
70 /* FIXME: don't use time(); instead use /dev/random or similar */
|
mas01mc@292
|
71 /* FIXME: write out the seed somewhere, so that we can get
|
mas01mc@292
|
72 repeatability */
|
mas01mc@292
|
73 #ifdef MT19937
|
mas01mc@292
|
74 init_genrand(time(NULL));
|
mas01mc@292
|
75 #else
|
mas01mc@292
|
76 srand(time(NULL)); // seed random number generator
|
mas01mc@292
|
77 #endif
|
mas01mc@293
|
78 Uns32T i,j, kk;
|
mas01mc@293
|
79 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
80 H::A = new float**[ H::m ]; // m x k x d random projectors
|
mas01mc@293
|
81 H::b = new float*[ H::m ]; // m x k random biases
|
mas01mc@293
|
82 #else
|
mas01mc@293
|
83 H::A = new float**[ H::L ]; // m x k x d random projectors
|
mas01mc@293
|
84 H::b = new float*[ H::L ]; // m x k random biases
|
mas01mc@293
|
85 #endif
|
mas01mc@293
|
86 H::g = new Uns32T*[ H::L ]; // L x k random projections
|
mas01mc@293
|
87 assert( H::g && H::A && H::b ); // failure
|
mas01mc@293
|
88 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
89 // Use m \times u_i functions \in R^{(k/2) \times (d)}
|
mas01mc@293
|
90 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
|
mas01mc@293
|
91 for( j = 0; j < H::m ; j++ ){ // m functions u_i(v)
|
mas01mc@293
|
92 H::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients
|
mas01mc@293
|
93 H::b[j] = new float[ H::k/2 ]; // bias
|
mas01mc@293
|
94 assert( H::A[j] && H::b[j] ); // failure
|
mas01mc@293
|
95 for( kk = 0; kk < H::k/2 ; kk++ ){
|
mas01mc@293
|
96 H::A[j][kk] = new float[ H::d ];
|
mas01mc@293
|
97 assert( H::A[j][kk] ); // failure
|
mas01mc@293
|
98 for(Uns32T i = 0 ; i < H::d ; i++ )
|
mas01mc@293
|
99 H::A[j][kk][i] = H::randn(); // Normal
|
mas01mc@293
|
100 H::b[j][kk] = H::ranf()*H::w; // Uniform
|
mas01mc@293
|
101 }
|
mas01mc@293
|
102 }
|
mas01mc@293
|
103 #else
|
mas01mc@293
|
104 // Use m \times u_i functions \in R^{k \times (d)}
|
mas01mc@293
|
105 // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d}
|
mas01mc@293
|
106 for( j = 0; j < H::L ; j++ ){ // m functions u_i(v)
|
mas01mc@293
|
107 H::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients
|
mas01mc@293
|
108 H::b[j] = new float[ H::k ]; // bias
|
mas01mc@293
|
109 assert( H::A[j] && H::b[j] ); // failure
|
mas01mc@293
|
110 for( kk = 0; kk < H::k ; kk++ ){
|
mas01mc@293
|
111 H::A[j][kk] = new float[ H::d ];
|
mas01mc@293
|
112 assert( H::A[j][kk] ); // failure
|
mas01mc@293
|
113 for(Uns32T i = 0 ; i < H::d ; i++ )
|
mas01mc@293
|
114 H::A[j][kk][i] = H::randn(); // Normal
|
mas01mc@293
|
115 H::b[j][kk] = H::ranf()*H::w; // Uniform
|
mas01mc@293
|
116 }
|
mas01mc@293
|
117 }
|
mas01mc@293
|
118 #endif
|
mas01mc@293
|
119
|
mas01mc@293
|
120 // Storage for LSH hash function output (Uns32T)
|
mas01mc@293
|
121 for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2)
|
mas01mc@293
|
122 H::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z
|
mas01mc@293
|
123 assert( H::g[j] );
|
mas01mc@292
|
124 }
|
mas01mc@292
|
125
|
mas01mc@293
|
126 // LSH Hash tables
|
mas01mc@293
|
127 H::h = new bucket**[ H::L ];
|
mas01mc@293
|
128 assert( H::h );
|
mas01mc@292
|
129 for( j = 0 ; j < H::L ; j++ ){
|
mas01mc@292
|
130 H::h[j] = new bucket*[ H::N ];
|
mas01mc@292
|
131 assert( H::h[j] );
|
mas01mc@292
|
132 for( i = 0 ; i < H::N ; i++)
|
mas01mc@292
|
133 H::h[j][i] = 0;
|
mas01mc@292
|
134 }
|
mas01mc@293
|
135
|
mas01mc@293
|
136 // Standard hash functions
|
mas01mc@293
|
137 H::r1 = new Uns32T*[ H::L ];
|
mas01mc@293
|
138 H::r2 = new Uns32T*[ H::L ];
|
mas01mc@293
|
139 assert( H::r1 && H::r2 ); // failure
|
mas01mc@293
|
140 for( j = 0 ; j < H::L ; j++ ){
|
mas01mc@293
|
141 H::r1[ j ] = new Uns32T[ H::k ];
|
mas01mc@293
|
142 H::r2[ j ] = new Uns32T[ H::k ];
|
mas01mc@293
|
143 assert( H::r1[j] && H::r2[j] ); // failure
|
mas01mc@293
|
144 for( i = 0; i<H::k; i++){
|
mas01mc@293
|
145 H::r1[j][i] = randr();
|
mas01mc@293
|
146 H::r2[j][i] = randr();
|
mas01mc@293
|
147 }
|
mas01mc@293
|
148 }
|
mas01mc@293
|
149
|
mas01mc@293
|
150 // Storage for whole or partial function evaluation depdenting on USE_U_FUNCTIONS
|
mas01mc@293
|
151 H::initialize_partial_functions();
|
mas01mc@293
|
152 }
|
mas01mc@293
|
153
|
mas01mc@293
|
154 void H::initialize_partial_functions(){
|
mas01mc@293
|
155
|
mas01mc@293
|
156 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
157 H::uu = vector<vector<Uns32T> >(H::m);
|
mas01mc@293
|
158 for( Uns32T aa=0 ; aa < H::m ; aa++ )
|
mas01mc@293
|
159 H::uu[aa] = vector<Uns32T>( H::k/2 );
|
mas01mc@293
|
160 #else
|
mas01mc@293
|
161 H::uu = vector<vector<Uns32T> >(H::L);
|
mas01mc@293
|
162 for( Uns32T aa=0 ; aa < H::L ; aa++ )
|
mas01mc@293
|
163 H::uu[aa] = vector<Uns32T>( H::k );
|
mas01mc@293
|
164 #endif
|
mas01mc@293
|
165 }
|
mas01mc@293
|
166
|
mas01mc@293
|
167
|
mas01mc@293
|
168 // Generate z ~ N(0,1)
|
mas01mc@293
|
169 float H::randn(){
|
mas01mc@293
|
170 // Box-Muller
|
mas01mc@293
|
171 float x1, x2;
|
mas01mc@293
|
172 do{
|
mas01mc@293
|
173 x1 = ranf();
|
mas01mc@293
|
174 } while (x1 == 0); // cannot take log of 0
|
mas01mc@293
|
175 x2 = ranf();
|
mas01mc@293
|
176 float z;
|
mas01mc@293
|
177 z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2);
|
mas01mc@293
|
178 return z;
|
mas01mc@293
|
179 }
|
mas01mc@293
|
180
|
mas01mc@293
|
181 float H::ranf(){
|
mas01mc@293
|
182 #ifdef MT19937
|
mas01mc@293
|
183 return (float) genrand_real2();
|
mas01mc@293
|
184 #else
|
mas01mc@293
|
185 return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) );
|
mas01mc@293
|
186 #endif
|
mas01mc@293
|
187 }
|
mas01mc@293
|
188
|
mas01mc@293
|
189 // range is 1..2^29
|
mas01mc@293
|
190 /* FIXME: that looks like an ... odd range. Still. */
|
mas01mc@293
|
191 Uns32T H::randr(){
|
mas01mc@293
|
192 #ifdef MT19937
|
mas01mc@293
|
193 return (Uns32T)((genrand_int32() >> 3) + 1);
|
mas01mc@293
|
194 #else
|
mas01mc@293
|
195 return (Uns32T) ((rand() >> 2) + 1);
|
mas01mc@293
|
196 #endif
|
mas01mc@292
|
197 }
|
mas01mc@292
|
198
|
mas01mc@292
|
199 // Destruct hash tables
|
mas01mc@292
|
200 H::~H(){
|
mas01mc@293
|
201 Uns32T i,j,kk;
|
mas01mc@293
|
202 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
203 for( j = 0 ; j < H::m ; j++ ){
|
mas01mc@293
|
204 for( kk = 0 ; kk < H::k/2 ; kk++ )
|
mas01mc@293
|
205 delete[] A[j][kk];
|
mas01mc@293
|
206 delete[] A[j];
|
mas01mc@293
|
207 }
|
mas01mc@293
|
208 delete[] A;
|
mas01mc@293
|
209 for( j = 0 ; j < H::m ; j++ )
|
mas01mc@293
|
210 delete[] b[j];
|
mas01mc@293
|
211 delete[] b;
|
mas01mc@293
|
212 #else
|
mas01mc@293
|
213 for( j = 0 ; j < H::L ; j++ ){
|
mas01mc@293
|
214 for( kk = 0 ; kk < H::k ; kk++ )
|
mas01mc@293
|
215 delete[] A[j][kk];
|
mas01mc@293
|
216 delete[] A[j];
|
mas01mc@293
|
217 }
|
mas01mc@293
|
218 delete[] A;
|
mas01mc@293
|
219 for( j = 0 ; j < H::L ; j++ )
|
mas01mc@293
|
220 delete[] b[j];
|
mas01mc@293
|
221 delete[] b;
|
mas01mc@293
|
222 #endif
|
mas01mc@293
|
223
|
mas01mc@293
|
224 for( j = 0 ; j < H::L ; j++ )
|
mas01mc@293
|
225 delete[] g[j];
|
mas01mc@293
|
226 delete[] g;
|
mas01mc@292
|
227 for( j=0 ; j < H::L ; j++ ){
|
mas01mc@292
|
228 delete[] H::r1[ j ];
|
mas01mc@292
|
229 delete[] H::r2[ j ];
|
mas01mc@292
|
230 for(i = 0; i< H::N ; i++)
|
mas01mc@292
|
231 delete H::h[ j ][ i ];
|
mas01mc@292
|
232 delete[] H::h[ j ];
|
mas01mc@292
|
233 }
|
mas01mc@292
|
234 delete[] H::r1;
|
mas01mc@292
|
235 delete[] H::r2;
|
mas01mc@292
|
236 delete[] H::h;
|
mas01mc@292
|
237 }
|
mas01mc@292
|
238
|
mas01mc@292
|
239
|
mas01mc@293
|
240 // Compute all hash functions for vector v
|
mas01mc@293
|
241 // #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d}
|
mas01mc@293
|
242 // to make L \times g_j functions \in Z^k
|
mas01mc@293
|
243 void H::compute_hash_functions(vector<float>& v){ // v \in R^d
|
mas01mc@293
|
244 float iw = 1. / H::w; // hash bucket width
|
mas01mc@293
|
245 Uns32T aa, kk;
|
mas01mc@293
|
246 if( v.size() != H::d )
|
mas01mc@293
|
247 error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality
|
mas01mc@293
|
248 double tmp = 0;
|
mas01mc@293
|
249 float *pA, *pb;
|
mas01mc@293
|
250 Uns32T *pg;
|
mas01mc@293
|
251 int dd;
|
mas01mc@293
|
252 vector<float>::iterator vi;
|
mas01mc@293
|
253 vector<Uns32T>::iterator ui;
|
mas01mc@293
|
254
|
mas01mc@293
|
255 #ifdef USE_U_FUNCTIONS
|
mas01mc@293
|
256 Uns32T bb;
|
mas01mc@293
|
257 // Store m dot products to expand
|
mas01mc@293
|
258 for( aa=0; aa < H::m ; aa++ ){
|
mas01mc@293
|
259 ui = H::uu[aa].begin();
|
mas01mc@293
|
260 for( kk = 0 ; kk < H::k/2 ; kk++ ){
|
mas01mc@293
|
261 pb = *( H::b + aa ) + kk;
|
mas01mc@293
|
262 pA = * ( * ( H::A + aa ) + kk );
|
mas01mc@293
|
263 dd = H::d;
|
mas01mc@293
|
264 tmp = 0.;
|
mas01mc@293
|
265 vi = v.begin();
|
mas01mc@293
|
266 while( dd-- )
|
mas01mc@293
|
267 tmp += *pA++ * *vi++; // project
|
mas01mc@293
|
268 tmp += *pb; // translate
|
mas01mc@293
|
269 tmp *= iw; // scale
|
mas01mc@293
|
270 *ui++ = (Uns32T) floor(tmp); // floor
|
mas01mc@293
|
271 }
|
mas01mc@293
|
272 }
|
mas01mc@293
|
273 // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d}
|
mas01mc@293
|
274 Uns32T j;
|
mas01mc@293
|
275 for( aa=0, j=0 ; aa < H::m-1 ; aa++ )
|
mas01mc@293
|
276 for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){
|
mas01mc@293
|
277 pg= *( H::g + j ); // L \times functions g_j(v) \in Z^k
|
mas01mc@293
|
278 // u_1 \in Z^{(k/2) \times d}
|
mas01mc@293
|
279 ui = H::uu[aa].begin();
|
mas01mc@293
|
280 kk=H::k/2;
|
mas01mc@293
|
281 while( kk-- )
|
mas01mc@293
|
282 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z
|
mas01mc@293
|
283 // u_2 \in Z^{(k/2) \times d}
|
mas01mc@293
|
284 ui = H::uu[bb].begin();
|
mas01mc@293
|
285 kk=H::k/2;
|
mas01mc@293
|
286 while( kk--)
|
mas01mc@293
|
287 *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z
|
mas01mc@293
|
288 }
|
mas01mc@293
|
289 #else
|
mas01mc@293
|
290 for( aa=0; aa < H::L ; aa++ ){
|
mas01mc@293
|
291 ui = H::uu[aa].begin();
|
mas01mc@293
|
292 for( kk = 0 ; kk < H::k ; kk++ ){
|
mas01mc@293
|
293 pb = *( H::b + aa ) + kk;
|
mas01mc@293
|
294 pA = * ( * ( H::A + aa ) + kk );
|
mas01mc@293
|
295 dd = H::d;
|
mas01mc@293
|
296 tmp = 0.;
|
mas01mc@293
|
297 vi = v.begin();
|
mas01mc@293
|
298 while( dd-- )
|
mas01mc@293
|
299 tmp += *pA++ * *vi++; // project
|
mas01mc@293
|
300 tmp += *pb; // translate
|
mas01mc@293
|
301 tmp *= iw; // scale
|
mas01mc@293
|
302 *ui++ = (Uns32T) (floor(tmp)); // floor
|
mas01mc@293
|
303 }
|
mas01mc@293
|
304 }
|
mas01mc@293
|
305 // Compute hash functions
|
mas01mc@293
|
306 for( aa=0 ; aa < H::L ; aa++ ){
|
mas01mc@293
|
307 pg= *( H::g + aa ); // L \times functions g_j(v) \in Z^k
|
mas01mc@293
|
308 // u_1 \in Z^{k \times d}
|
mas01mc@293
|
309 ui = H::uu[aa].begin();
|
mas01mc@293
|
310 kk=H::k;
|
mas01mc@293
|
311 while( kk-- )
|
mas01mc@293
|
312 *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z
|
mas01mc@293
|
313 }
|
mas01mc@293
|
314 #endif
|
mas01mc@293
|
315 }
|
mas01mc@293
|
316
|
mas01mc@292
|
317 // make hash value \in Z
|
mas01mc@293
|
318 void H::generate_hash_keys(Uns32T*g, Uns32T* r1, Uns32T* r2){
|
mas01mc@293
|
319 H::t1 = computeProductModDefaultPrime( g, r1, H::k ) % H::N;
|
mas01mc@293
|
320 H::t2 = computeProductModDefaultPrime( g, r2, H::k );
|
mas01mc@292
|
321 }
|
mas01mc@292
|
322
|
mas01mc@292
|
323 #define CR_ASSERT(b){if(!(b)){fprintf(stderr, "ASSERT failed on line %d, file %s.\n", __LINE__, __FILE__); exit(1);}}
|
mas01mc@292
|
324
|
mas01mc@292
|
325 // Computes (a.b) mod UH_PRIME_DEFAULT
|
mas01mc@293
|
326 inline Uns32T H::computeProductModDefaultPrime(Uns32T *a, Uns32T *b, IntT size){
|
mas01mc@292
|
327 LongUns64T h = 0;
|
mas01mc@292
|
328
|
mas01mc@292
|
329 for(IntT i = 0; i < size; i++){
|
mas01mc@292
|
330 h = h + (LongUns64T)a[i] * (LongUns64T)b[i];
|
mas01mc@292
|
331 h = (h & TWO_TO_32_MINUS_1) + 5 * (h >> 32);
|
mas01mc@292
|
332 if (h >= UH_PRIME_DEFAULT) {
|
mas01mc@292
|
333 h = h - UH_PRIME_DEFAULT;
|
mas01mc@292
|
334 }
|
mas01mc@292
|
335 CR_ASSERT(h < UH_PRIME_DEFAULT);
|
mas01mc@292
|
336 }
|
mas01mc@292
|
337 return h;
|
mas01mc@292
|
338 }
|
mas01mc@292
|
339
|
mas01mc@292
|
340 Uns32T H::bucket_insert_point(bucket **pp){
|
mas01mc@292
|
341 Uns32T collisionCount = 0;
|
mas01mc@292
|
342 if(!*pp){
|
mas01mc@292
|
343 *pp = new bucket();
|
mas01mc@292
|
344 #ifdef LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
345 (*pp)->t2 = 0; // Use t2 as a collision counter for the row
|
mas01mc@292
|
346 (*pp)->next = new bucket();
|
mas01mc@292
|
347 #endif
|
mas01mc@292
|
348 }
|
mas01mc@292
|
349 #ifdef LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
350 collisionCount = (*pp)->t2;
|
mas01mc@292
|
351 if(collisionCount < H::C){ // Block if row is full
|
mas01mc@292
|
352 (*pp)->t2++; // Increment collision counter
|
mas01mc@292
|
353 pointCount++;
|
mas01mc@292
|
354 collisionCount++;
|
mas01mc@292
|
355 __bucket_insert_point((*pp)->next); // First bucket holds collision count
|
mas01mc@292
|
356 }
|
mas01mc@292
|
357 #else
|
mas01mc@292
|
358 pointCount++;
|
mas01mc@292
|
359 __bucket_insert_point(*pp); // No collision count storage
|
mas01mc@292
|
360 #endif
|
mas01mc@292
|
361 return collisionCount;
|
mas01mc@292
|
362 }
|
mas01mc@292
|
363
|
mas01mc@292
|
364 void H::__bucket_insert_point(bucket* p){
|
mas01mc@292
|
365 if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2?
|
mas01mc@292
|
366 p->t2 = H::t2;
|
mas01mc@292
|
367 bucketCount++; // Record start of new point-locale collision chain
|
mas01mc@292
|
368 p->snext = new sbucket();
|
mas01mc@292
|
369 __sbucket_insert_point(p->snext);
|
mas01mc@292
|
370 return;
|
mas01mc@292
|
371 }
|
mas01mc@292
|
372
|
mas01mc@292
|
373 if(p->t2 == H::t2){
|
mas01mc@292
|
374 __sbucket_insert_point(p->snext);
|
mas01mc@292
|
375 return;
|
mas01mc@292
|
376 }
|
mas01mc@292
|
377
|
mas01mc@292
|
378 if(p->next){
|
mas01mc@292
|
379 __bucket_insert_point(p->next);
|
mas01mc@292
|
380 }
|
mas01mc@292
|
381
|
mas01mc@292
|
382 else{
|
mas01mc@292
|
383 p->next = new bucket();
|
mas01mc@292
|
384 __bucket_insert_point(p->next);
|
mas01mc@292
|
385 }
|
mas01mc@292
|
386
|
mas01mc@292
|
387 }
|
mas01mc@292
|
388
|
mas01mc@292
|
389 void H::__sbucket_insert_point(sbucket* p){
|
mas01mc@292
|
390 if(p->pointID==IFLAG){
|
mas01mc@292
|
391 p->pointID = H::p;
|
mas01mc@292
|
392 return;
|
mas01mc@292
|
393 }
|
mas01mc@292
|
394
|
mas01mc@292
|
395 // Search for pointID
|
mas01mc@292
|
396 if(p->snext){
|
mas01mc@292
|
397 __sbucket_insert_point(p->snext);
|
mas01mc@292
|
398 }
|
mas01mc@292
|
399 else{
|
mas01mc@292
|
400 // Make new point collision bucket at end of list
|
mas01mc@292
|
401 p->snext = new sbucket();
|
mas01mc@292
|
402 __sbucket_insert_point(p->snext);
|
mas01mc@292
|
403 }
|
mas01mc@292
|
404 }
|
mas01mc@292
|
405
|
mas01mc@293
|
406 inline bucket** H::get_bucket(int j){
|
mas01mc@292
|
407 return *(h+j);
|
mas01mc@292
|
408 }
|
mas01mc@292
|
409
|
mas01mc@293
|
410 // Interface to Locality Sensitive Hashing G
|
mas01mc@293
|
411 G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float rr):
|
mas01mc@293
|
412 H(kk,mm,dd,NN,CC,ww,rr), // constructor to initialize data structures
|
mas01mc@293
|
413 lshHeader(0),
|
mas01mc@292
|
414 calling_instance(0),
|
mas01mc@293
|
415 add_point_callback(0)
|
mas01mc@292
|
416 {
|
mas01mc@293
|
417
|
mas01mc@292
|
418 }
|
mas01mc@292
|
419
|
mas01mc@292
|
420 // Serialize from file LSH constructor
|
mas01mc@292
|
421 // Read parameters from database file
|
mas01mc@292
|
422 // Load the hash functions, close the database
|
mas01mc@292
|
423 // Optionally load the LSH tables into head-allocated lists in core
|
mas01mc@292
|
424 G::G(char* filename, bool lshInCoreFlag):
|
mas01mc@293
|
425 H(), // default base-class constructor call delays data-structure initialization
|
mas01mc@293
|
426 lshHeader(0),
|
mas01mc@292
|
427 calling_instance(0),
|
mas01mc@292
|
428 add_point_callback(0)
|
mas01mc@292
|
429 {
|
mas01mc@292
|
430 int dbfid = unserialize_lsh_header(filename);
|
mas01mc@293
|
431
|
mas01mc@293
|
432 H::initialize_lsh_functions(); // Base-class data-structure initialization
|
mas01mc@293
|
433 unserialize_lsh_functions(dbfid); // populate with on-disk hashfunction values
|
mas01mc@292
|
434
|
mas01mc@292
|
435 // Format1 only needs unserializing if specifically requested
|
mas01mc@292
|
436 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){
|
mas01mc@292
|
437 unserialize_lsh_hashtables_format1(dbfid);
|
mas01mc@292
|
438 }
|
mas01mc@292
|
439
|
mas01mc@292
|
440 // Format2 always needs unserializing
|
mas01mc@292
|
441 if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){
|
mas01mc@292
|
442 unserialize_lsh_hashtables_format2(dbfid);
|
mas01mc@292
|
443 }
|
mas01mc@292
|
444
|
mas01mc@293
|
445 close(dbfid);}
|
mas01mc@292
|
446
|
mas01mc@292
|
447 G::~G(){
|
mas01mc@292
|
448 delete lshHeader;
|
mas01mc@292
|
449 }
|
mas01mc@292
|
450
|
mas01mc@292
|
451 // single point insertion; inserted values are hash value and pointID
|
mas01mc@292
|
452 Uns32T G::insert_point(vector<float>& v, Uns32T pp){
|
mas01mc@292
|
453 Uns32T collisionCount = 0;
|
mas01mc@292
|
454 H::p = pp;
|
mas01mc@293
|
455 if(pp>H::maxp)
|
mas01mc@293
|
456 H::maxp=pp; // Store highest pointID in database
|
mas01mc@293
|
457 H::compute_hash_functions( v );
|
mas01mc@292
|
458 for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion
|
mas01mc@293
|
459 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
|
mas01mc@292
|
460 collisionCount += bucket_insert_point( *(h + j) + t1 );
|
mas01mc@292
|
461 }
|
mas01mc@292
|
462 return collisionCount;
|
mas01mc@292
|
463 }
|
mas01mc@292
|
464
|
mas01mc@292
|
465
|
mas01mc@292
|
466 // batch insert for a point set
|
mas01mc@292
|
467 // inserted values are vector hash value and pointID starting at basePointID
|
mas01mc@292
|
468 void G::insert_point_set(vector<vector<float> >& vv, Uns32T basePointID){
|
mas01mc@292
|
469 for(Uns32T point=0; point<vv.size(); point++)
|
mas01mc@292
|
470 insert_point(vv[point], basePointID+point);
|
mas01mc@292
|
471 }
|
mas01mc@292
|
472
|
mas01mc@292
|
473 // point retrieval routine
|
mas01mc@292
|
474 void G::retrieve_point(vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
|
mas01mc@292
|
475 calling_instance = caller;
|
mas01mc@292
|
476 add_point_callback = add_point;
|
mas01mc@293
|
477 H::compute_hash_functions( v );
|
mas01mc@292
|
478 for(Uns32T j = 0 ; j < H::L ; j++ ){
|
mas01mc@293
|
479 H::generate_hash_keys( *( H::g + j ), *( H::r1 + j ), *( H::r2 + j ) );
|
mas01mc@293
|
480 if( bucket* bPtr = *(get_bucket(j) + get_t1()) )
|
mas01mc@292
|
481 #ifdef LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
482 bucket_chain_point( bPtr->next, qpos);
|
mas01mc@292
|
483 #else
|
mas01mc@292
|
484 bucket_chain_point( bPtr , qpos);
|
mas01mc@292
|
485 #endif
|
mas01mc@292
|
486 }
|
mas01mc@292
|
487 }
|
mas01mc@292
|
488
|
mas01mc@292
|
489 void G::retrieve_point_set(vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller){
|
mas01mc@292
|
490 for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ )
|
mas01mc@292
|
491 retrieve_point(vv[qpos], qpos, add_point, caller);
|
mas01mc@292
|
492 }
|
mas01mc@292
|
493
|
mas01mc@292
|
494 // export lsh tables to table structure on disk
|
mas01mc@292
|
495 //
|
mas01mc@292
|
496 // LSH TABLE STRUCTURE
|
mas01mc@292
|
497 // ---header 64 bytes ---
|
mas01mc@292
|
498 // [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0]
|
mas01mc@292
|
499 //
|
mas01mc@292
|
500 // ---random projections L x k x d float ---
|
mas01mc@292
|
501 // A[0][0][0] A[0][0][1] ... A[0][0][d-1]
|
mas01mc@292
|
502 // A[0][1][0] A[0][1][1] ... A[1][1][d-1]
|
mas01mc@292
|
503 // ...
|
mas01mc@292
|
504 // A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1]
|
mas01mc@292
|
505 // ...
|
mas01mc@292
|
506 // ...
|
mas01mc@292
|
507 // A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1]
|
mas01mc@292
|
508 // A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1]
|
mas01mc@292
|
509 // ...
|
mas01mc@292
|
510 // A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1]
|
mas01mc@292
|
511 //
|
mas01mc@292
|
512 // ---bias L x k float ---
|
mas01mc@292
|
513 // b[0][0] b[0][1] ... b[0][k-1]
|
mas01mc@292
|
514 // b[1][0] b[1][1] ... b[1][k-1]
|
mas01mc@292
|
515 // ...
|
mas01mc@292
|
516 // b[L-1][0] b[L-1][1] ... b[L-1][k-1]
|
mas01mc@292
|
517 //
|
mas01mc@292
|
518 // ---random r1 L x k float ---
|
mas01mc@292
|
519 // r1[0][0] r1[0][1] ... r1[0][k-1]
|
mas01mc@292
|
520 // r1[1][0] r1[1][1] ... r1[1][k-1]
|
mas01mc@292
|
521 // ...
|
mas01mc@292
|
522 // r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1]
|
mas01mc@292
|
523 //
|
mas01mc@292
|
524 // ---random r2 L x k float ---
|
mas01mc@292
|
525 // r2[0][0] r2[0][1] ... r2[0][k-1]
|
mas01mc@292
|
526 // r2[1][0] r2[1][1] ... r2[1][k-1]
|
mas01mc@292
|
527 // ...
|
mas01mc@292
|
528 // r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1]
|
mas01mc@292
|
529 //
|
mas01mc@293
|
530 // ******* HASHTABLES FORMAT1 (optimized for LSH_ON_DISK retrieval) *******
|
mas01mc@292
|
531 // ---hash table 0: N x C x 8 ---
|
mas01mc@292
|
532 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
533 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
534 // ...
|
mas01mc@292
|
535 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
536 //
|
mas01mc@292
|
537 // ---hash table 1: N x C x 8 ---
|
mas01mc@292
|
538 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
539 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
540 // ...
|
mas01mc@292
|
541 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
542 //
|
mas01mc@292
|
543 // ...
|
mas01mc@292
|
544 //
|
mas01mc@292
|
545 // ---hash table L-1: N x C x 8 ---
|
mas01mc@292
|
546 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
547 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
548 // ...
|
mas01mc@292
|
549 // [t2 pointID][t2 pointID]...[t2 pointID]
|
mas01mc@292
|
550 //
|
mas01mc@293
|
551 // ******* HASHTABLES FORMAT2 (optimized for LSH_IN_CORE retrieval) *******
|
mas01mc@293
|
552 //
|
mas01mc@293
|
553 // State machine controlled by regular expression.
|
mas01mc@293
|
554 // legend:
|
mas01mc@293
|
555 //
|
mas01mc@293
|
556 // O2_SERIAL_FLAGS_T1_BIT = 0x80000000U
|
mas01mc@293
|
557 // O2_SERIAL_FLAGS_T2_BIT = 0x40000000U
|
mas01mc@293
|
558 // O2_SERIAL_FLAGS_END_BIT = 0x20000000U
|
mas01mc@293
|
559 //
|
mas01mc@293
|
560 // T1(t1) - T1 hash token containing t1 hash key with O2_SERIAL_FLAGS_T1_BIT set (t1 range 0..2^29-1)
|
mas01mc@293
|
561 // T2 - T2 hash token with O2_SERIAL_FLAGS_T2_BIT set
|
mas01mc@293
|
562 // t2 - t2 hash key (range 1..2^32-6)
|
mas01mc@293
|
563 // p - point identifier (range 0..2^32-1)
|
mas01mc@293
|
564 // E - end hash table token with O2_SERIAL_FLAGS_END_BIT set
|
mas01mc@293
|
565 // {...} required arguments
|
mas01mc@293
|
566 // [...] optional arguments
|
mas01mc@293
|
567 // * - match zero or more occurences
|
mas01mc@293
|
568 // + - match one or more occurences
|
mas01mc@293
|
569 // {...}^L - repeat argument L times
|
mas01mc@293
|
570 //
|
mas01mc@293
|
571 // FORMAT2 Regular expression:
|
mas01mc@293
|
572 // { [T1(t1) T2 t2 p+ [T2 t2 p+]* ]* E. }^L
|
mas01mc@293
|
573 //
|
mas01mc@292
|
574
|
mas01mc@292
|
575 // Serial header constructors
|
mas01mc@292
|
576 SerialHeader::SerialHeader(){;}
|
mas01mc@292
|
577 SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT):
|
mas01mc@292
|
578 lshMagic(O2_SERIAL_MAGIC),
|
mas01mc@292
|
579 binWidth(W),
|
mas01mc@292
|
580 numTables(L),
|
mas01mc@292
|
581 numRows(N),
|
mas01mc@292
|
582 numCols(C),
|
mas01mc@292
|
583 elementSize(O2_SERIAL_ELEMENT_SIZE),
|
mas01mc@292
|
584 version(O2_SERIAL_VERSION),
|
mas01mc@292
|
585 size(L * align_up(N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables
|
mas01mc@292
|
586 + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions
|
mas01mc@292
|
587 L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())),
|
mas01mc@292
|
588 flags(FMT),
|
mas01mc@292
|
589 dataDim(d),
|
mas01mc@292
|
590 numFuns(k),
|
mas01mc@292
|
591 radius(r),
|
mas01mc@292
|
592 maxp(p){;} // header
|
mas01mc@292
|
593
|
mas01mc@292
|
594 float* G::get_serial_hashfunction_base(char* db){
|
mas01mc@292
|
595 if(db&&lshHeader)
|
mas01mc@292
|
596 return (float*)(db+O2_SERIAL_HEADER_SIZE);
|
mas01mc@292
|
597 else return NULL;
|
mas01mc@292
|
598 }
|
mas01mc@292
|
599
|
mas01mc@292
|
600 SerialElementT* G::get_serial_hashtable_base(char* db){
|
mas01mc@292
|
601 if(db&&lshHeader)
|
mas01mc@292
|
602 return (SerialElementT*)(db+get_serial_hashtable_offset());
|
mas01mc@292
|
603 else
|
mas01mc@292
|
604 return NULL;
|
mas01mc@292
|
605 }
|
mas01mc@292
|
606
|
mas01mc@292
|
607 Uns32T G::get_serial_hashtable_offset(){
|
mas01mc@292
|
608 if(lshHeader)
|
mas01mc@292
|
609 return align_up(O2_SERIAL_HEADER_SIZE +
|
mas01mc@292
|
610 L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn());
|
mas01mc@292
|
611 else
|
mas01mc@292
|
612 return 0;
|
mas01mc@292
|
613 }
|
mas01mc@292
|
614
|
mas01mc@292
|
615 void G::serialize(char* filename, Uns32T serialFormat){
|
mas01mc@292
|
616 int dbfid;
|
mas01mc@292
|
617 char* db;
|
mas01mc@292
|
618 int dbIsNew=0;
|
mas01mc@292
|
619
|
mas01mc@292
|
620 // Check requested serialFormat
|
mas01mc@292
|
621 if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2))
|
mas01mc@292
|
622 error("Unrecognized serial file format request: ", "serialize()");
|
mas01mc@292
|
623
|
mas01mc@292
|
624 // Test to see if file exists
|
mas01mc@292
|
625 if((dbfid = open (filename, O_RDONLY)) < 0)
|
mas01mc@292
|
626 // If it doesn't, then create the file (CREATE)
|
mas01mc@292
|
627 if(errno == ENOENT){
|
mas01mc@292
|
628 // Create the file
|
mas01mc@292
|
629 std::cout << "Creating new serialized LSH database:" << filename << "...";
|
mas01mc@292
|
630 std::cout.flush();
|
mas01mc@292
|
631 serial_create(filename, serialFormat);
|
mas01mc@292
|
632 dbIsNew=1;
|
mas01mc@292
|
633 }
|
mas01mc@292
|
634 else
|
mas01mc@292
|
635 // The file can't be opened
|
mas01mc@292
|
636 error("Can't open the file", filename, "open");
|
mas01mc@292
|
637
|
mas01mc@292
|
638 // Load the on-disk header into core
|
mas01mc@292
|
639 dbfid = serial_open(filename, 1); // open for write
|
mas01mc@292
|
640 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
|
mas01mc@292
|
641 serial_get_header(db); // read header
|
mas01mc@292
|
642 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
|
mas01mc@292
|
643
|
mas01mc@292
|
644 // Check compatibility of core and disk data structures
|
mas01mc@292
|
645 if( !serial_can_merge(serialFormat) )
|
mas01mc@292
|
646 error("Incompatible core and serial LSH, data structure dimensions mismatch.");
|
mas01mc@292
|
647
|
mas01mc@292
|
648 // For new LSH databases write the hashfunctions
|
mas01mc@292
|
649 if(dbIsNew)
|
mas01mc@292
|
650 serialize_lsh_hashfunctions(dbfid);
|
mas01mc@292
|
651 // Write the hashtables in the requested format
|
mas01mc@292
|
652 if(serialFormat == O2_SERIAL_FILEFORMAT1)
|
mas01mc@292
|
653 serialize_lsh_hashtables_format1(dbfid, !dbIsNew);
|
mas01mc@292
|
654 else
|
mas01mc@292
|
655 serialize_lsh_hashtables_format2(dbfid, !dbIsNew);
|
mas01mc@292
|
656
|
mas01mc@292
|
657 if(!dbIsNew){
|
mas01mc@292
|
658 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer
|
mas01mc@292
|
659 //serial_get_header(db); // read header
|
mas01mc@293
|
660 cout << "maxp = " << H::maxp << endl;
|
mas01mc@293
|
661 lshHeader->maxp=H::maxp;
|
mas01mc@292
|
662 // Default to FILEFORMAT1
|
mas01mc@292
|
663 if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2))
|
mas01mc@292
|
664 lshHeader->flags|=O2_SERIAL_FILEFORMAT2;
|
mas01mc@292
|
665 memcpy((char*)db, (char*)lshHeader, sizeof(SerialHeaderT));
|
mas01mc@292
|
666 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
|
mas01mc@292
|
667 }
|
mas01mc@292
|
668
|
mas01mc@292
|
669 serial_close(dbfid);
|
mas01mc@292
|
670 }
|
mas01mc@292
|
671
|
mas01mc@292
|
672 // Test to see if core structure and requested format is
|
mas01mc@292
|
673 // compatible with currently opened database
|
mas01mc@292
|
674 int G::serial_can_merge(Uns32T format){
|
mas01mc@292
|
675 SerialHeaderT* that = lshHeader;
|
mas01mc@292
|
676 if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2)
|
mas01mc@292
|
677 || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2)
|
mas01mc@292
|
678 || !( this->w == that->binWidth &&
|
mas01mc@292
|
679 this->L == that->numTables &&
|
mas01mc@292
|
680 this->N == that->numRows &&
|
mas01mc@292
|
681 this->k == that->numFuns &&
|
mas01mc@292
|
682 this->d == that->dataDim &&
|
mas01mc@292
|
683 sizeof(SerialElementT) == that->elementSize &&
|
mas01mc@292
|
684 this->radius == that->radius)){
|
mas01mc@292
|
685 serial_print_header(format);
|
mas01mc@292
|
686 return 0;
|
mas01mc@292
|
687 }
|
mas01mc@292
|
688 else
|
mas01mc@292
|
689 return 1;
|
mas01mc@292
|
690 }
|
mas01mc@292
|
691
|
mas01mc@292
|
692 // Used as an error message for serial_can_merge()
|
mas01mc@292
|
693 void G::serial_print_header(Uns32T format){
|
mas01mc@292
|
694 std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl;
|
mas01mc@292
|
695 std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl;
|
mas01mc@292
|
696 std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl;
|
mas01mc@292
|
697 std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl;
|
mas01mc@292
|
698 std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl;
|
mas01mc@292
|
699 std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl;
|
mas01mc@292
|
700 std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl;
|
mas01mc@292
|
701 std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl;
|
mas01mc@292
|
702 }
|
mas01mc@292
|
703
|
mas01mc@292
|
704 int G::serialize_lsh_hashfunctions(int fid){
|
mas01mc@292
|
705 float* pf;
|
mas01mc@292
|
706 Uns32T *pu;
|
mas01mc@292
|
707 Uns32T x,y,z;
|
mas01mc@292
|
708
|
mas01mc@293
|
709 char* db = serial_mmap(fid, get_serial_hashtable_offset(), 1);// get database pointer
|
mas01mc@292
|
710 pf = get_serial_hashfunction_base(db);
|
mas01mc@292
|
711
|
mas01mc@292
|
712 // HASH FUNCTIONS
|
mas01mc@292
|
713 // Write the random projectors A[][][]
|
mas01mc@292
|
714 #ifdef USE_U_FUNCTIONS
|
mas01mc@292
|
715 for( x = 0 ; x < H::m ; x++ )
|
mas01mc@292
|
716 for( y = 0 ; y < H::k/2 ; y++ )
|
mas01mc@292
|
717 #else
|
mas01mc@292
|
718 for( x = 0 ; x < H::L ; x++ )
|
mas01mc@292
|
719 for( y = 0 ; y < H::k ; y++ )
|
mas01mc@292
|
720 #endif
|
mas01mc@292
|
721 for( z = 0 ; z < d ; z++ )
|
mas01mc@293
|
722 *pf++ = H::A[x][y][z];
|
mas01mc@292
|
723
|
mas01mc@292
|
724 // Write the random biases b[][]
|
mas01mc@292
|
725 #ifdef USE_U_FUNCTIONS
|
mas01mc@292
|
726 for( x = 0 ; x < H::m ; x++ )
|
mas01mc@292
|
727 for( y = 0 ; y < H::k/2 ; y++ )
|
mas01mc@292
|
728 #else
|
mas01mc@292
|
729 for( x = 0 ; x < H::L ; x++ )
|
mas01mc@292
|
730 for( y = 0 ; y < H::k ; y++ )
|
mas01mc@292
|
731 #endif
|
mas01mc@293
|
732 *pf++ = H::b[x][y];
|
mas01mc@292
|
733
|
mas01mc@292
|
734 pu = (Uns32T*)pf;
|
mas01mc@292
|
735
|
mas01mc@292
|
736 // Write the Z projectors r1[][]
|
mas01mc@292
|
737 for( x = 0 ; x < H::L ; x++)
|
mas01mc@292
|
738 for( y = 0 ; y < H::k ; y++)
|
mas01mc@293
|
739 *pu++ = H::r1[x][y];
|
mas01mc@292
|
740
|
mas01mc@292
|
741 // Write the Z projectors r2[][]
|
mas01mc@292
|
742 for( x = 0 ; x < H::L ; x++)
|
mas01mc@292
|
743 for( y = 0; y < H::k ; y++)
|
mas01mc@293
|
744 *pu++ = H::r2[x][y];
|
mas01mc@292
|
745
|
mas01mc@292
|
746 serial_munmap(db, get_serial_hashtable_offset());
|
mas01mc@292
|
747 return 1;
|
mas01mc@292
|
748 }
|
mas01mc@292
|
749
|
mas01mc@292
|
750 int G::serialize_lsh_hashtables_format1(int fid, int merge){
|
mas01mc@292
|
751 SerialElementT *pe, *pt;
|
mas01mc@292
|
752 Uns32T x,y;
|
mas01mc@292
|
753
|
mas01mc@292
|
754 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) )
|
mas01mc@292
|
755 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
|
mas01mc@292
|
756
|
mas01mc@292
|
757 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
|
mas01mc@292
|
758 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount;
|
mas01mc@292
|
759 // Write the hash tables
|
mas01mc@292
|
760 for( x = 0 ; x < H::L ; x++ ){
|
mas01mc@292
|
761 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1...";
|
mas01mc@292
|
762 std::cout.flush();
|
mas01mc@292
|
763 // memory map a single hash table for sequential access
|
mas01mc@292
|
764 // Align each hash table to page boundary
|
mas01mc@292
|
765 char* dbtable = serial_mmap(fid, hashTableSize, 1,
|
mas01mc@292
|
766 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
|
mas01mc@292
|
767 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
|
mas01mc@292
|
768 error("could not advise hashtable memory","","madvise");
|
mas01mc@292
|
769
|
mas01mc@292
|
770 maxColCount=0;
|
mas01mc@292
|
771 minColCount=O2_SERIAL_MAX_COLS;
|
mas01mc@292
|
772 meanColCount=0;
|
mas01mc@292
|
773 colCountN=0;
|
mas01mc@292
|
774 pt=(SerialElementT*)dbtable;
|
mas01mc@292
|
775 for( y = 0 ; y < H::N ; y++ ){
|
mas01mc@292
|
776 // Move disk pointer to beginning of row
|
mas01mc@292
|
777 pe=pt+y*lshHeader->numCols;
|
mas01mc@292
|
778
|
mas01mc@292
|
779 colCount=0;
|
mas01mc@292
|
780 if(bucket* bPtr = h[x][y])
|
mas01mc@292
|
781 if(merge)
|
mas01mc@292
|
782 #ifdef LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
783 serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
|
mas01mc@292
|
784 else
|
mas01mc@292
|
785 serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket
|
mas01mc@292
|
786 #else
|
mas01mc@292
|
787 serial_merge_hashtable_row_format1(pe, bPtr, colCount);
|
mas01mc@292
|
788 else
|
mas01mc@292
|
789 serial_write_hashtable_row_format1(pe, bPtr, colCount);
|
mas01mc@292
|
790 #endif
|
mas01mc@292
|
791 if(colCount){
|
mas01mc@292
|
792 if(colCount<minColCount)
|
mas01mc@292
|
793 minColCount=colCount;
|
mas01mc@292
|
794 if(colCount>maxColCount)
|
mas01mc@292
|
795 maxColCount=colCount;
|
mas01mc@292
|
796 meanColCount+=colCount;
|
mas01mc@292
|
797 colCountN++;
|
mas01mc@292
|
798 }
|
mas01mc@292
|
799 }
|
mas01mc@292
|
800 if(colCountN)
|
mas01mc@292
|
801 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
|
mas01mc@292
|
802 << ", min = " << minColCount << ", max = " << maxColCount
|
mas01mc@292
|
803 << endl;
|
mas01mc@292
|
804 serial_munmap(dbtable, hashTableSize);
|
mas01mc@292
|
805 }
|
mas01mc@292
|
806
|
mas01mc@292
|
807 // We're done writing
|
mas01mc@292
|
808 return 1;
|
mas01mc@292
|
809 }
|
mas01mc@292
|
810
|
mas01mc@292
|
811 void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){
|
mas01mc@292
|
812 while(b && b->t2!=IFLAG){
|
mas01mc@292
|
813 SerialElementT*pe=pr; // reset disk pointer to beginning of row
|
mas01mc@292
|
814 serial_merge_element_format1(pe, b->snext, b->t2, colCount);
|
mas01mc@292
|
815 b=b->next;
|
mas01mc@292
|
816 }
|
mas01mc@292
|
817 }
|
mas01mc@292
|
818
|
mas01mc@292
|
819 void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
|
mas01mc@292
|
820 while(sb){
|
mas01mc@292
|
821 if(colCount==lshHeader->numCols){
|
mas01mc@292
|
822 std::cout << "!point-chain full " << endl;
|
mas01mc@292
|
823 return;
|
mas01mc@292
|
824 }
|
mas01mc@292
|
825 Uns32T c=0;
|
mas01mc@292
|
826 // Merge collision chains
|
mas01mc@292
|
827 while(c<lshHeader->numCols){
|
mas01mc@292
|
828 if( (pe+c)->hashValue==IFLAG){
|
mas01mc@292
|
829 (pe+c)->hashValue=t2;
|
mas01mc@292
|
830 (pe+c)->pointID=sb->pointID;
|
mas01mc@292
|
831 colCount=c+1;
|
mas01mc@292
|
832 if(c+1<lshHeader->numCols)
|
mas01mc@292
|
833 (pe+c+1)->hashValue=IFLAG;
|
mas01mc@292
|
834 break;
|
mas01mc@292
|
835 }
|
mas01mc@292
|
836 c++;
|
mas01mc@292
|
837 }
|
mas01mc@292
|
838 sb=sb->snext;
|
mas01mc@292
|
839 }
|
mas01mc@292
|
840 return;
|
mas01mc@292
|
841 }
|
mas01mc@292
|
842
|
mas01mc@292
|
843 void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){
|
mas01mc@292
|
844 pe->hashValue=IFLAG;
|
mas01mc@292
|
845 while(b && b->t2!=IFLAG){
|
mas01mc@292
|
846 serial_write_element_format1(pe, b->snext, b->t2, colCount);
|
mas01mc@292
|
847 b=b->next;
|
mas01mc@292
|
848 }
|
mas01mc@292
|
849 }
|
mas01mc@292
|
850
|
mas01mc@292
|
851 void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){
|
mas01mc@292
|
852 while(sb){
|
mas01mc@292
|
853 if(colCount==lshHeader->numCols){
|
mas01mc@292
|
854 std::cout << "!point-chain full " << endl;
|
mas01mc@292
|
855 return;
|
mas01mc@292
|
856 }
|
mas01mc@292
|
857 pe->hashValue=t2;
|
mas01mc@292
|
858 pe->pointID=sb->pointID;
|
mas01mc@292
|
859 pe++;
|
mas01mc@292
|
860 colCount++;
|
mas01mc@292
|
861 sb=sb->snext;
|
mas01mc@292
|
862 }
|
mas01mc@292
|
863 pe->hashValue=IFLAG;
|
mas01mc@292
|
864 return;
|
mas01mc@292
|
865 }
|
mas01mc@292
|
866
|
mas01mc@292
|
867 int G::serialize_lsh_hashtables_format2(int fid, int merge){
|
mas01mc@292
|
868 Uns32T x,y;
|
mas01mc@292
|
869
|
mas01mc@292
|
870 if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) )
|
mas01mc@292
|
871 error("Cannot merge core and serial LSH, data structure dimensions mismatch.");
|
mas01mc@292
|
872
|
mas01mc@292
|
873 // We must pereform FORMAT1 merges in core
|
mas01mc@292
|
874 if(merge)
|
mas01mc@292
|
875 unserialize_lsh_hashtables_format2(fid);
|
mas01mc@292
|
876
|
mas01mc@292
|
877 Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1;
|
mas01mc@292
|
878 lseek(fid, get_serial_hashtable_offset(), SEEK_SET);
|
mas01mc@292
|
879
|
mas01mc@292
|
880 // Write the hash tables
|
mas01mc@292
|
881 for( x = 0 ; x < H::L ; x++ ){
|
mas01mc@292
|
882 std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2...";
|
mas01mc@292
|
883 std::cout.flush();
|
mas01mc@292
|
884 maxColCount=0;
|
mas01mc@292
|
885 minColCount=O2_SERIAL_MAX_COLS;
|
mas01mc@292
|
886 meanColCount=0;
|
mas01mc@292
|
887 colCountN=0;
|
mas01mc@292
|
888 for( y = 0 ; y < H::N ; y++ ){
|
mas01mc@292
|
889 colCount=0;
|
mas01mc@292
|
890 if(bucket* bPtr = h[x][y]){
|
mas01mc@292
|
891 t1 = y | O2_SERIAL_FLAGS_T1_BIT;
|
mas01mc@292
|
892 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){
|
mas01mc@292
|
893 close(fid);
|
mas01mc@292
|
894 error("write error in serial_write_hashtable_format2() [t1]");
|
mas01mc@292
|
895 }
|
mas01mc@292
|
896 #ifdef LSH_BLOCK_FULL_ROWS
|
mas01mc@292
|
897 serial_write_hashtable_row_format2(fid, bPtr->next, colCount); // skip collision counter bucket
|
mas01mc@292
|
898 #else
|
mas01mc@292
|
899 serial_write_hashtable_row_format2(fid, bPtr, colCount);
|
mas01mc@292
|
900 #endif
|
mas01mc@292
|
901 }
|
mas01mc@292
|
902 if(colCount){
|
mas01mc@292
|
903 if(colCount<minColCount)
|
mas01mc@292
|
904 minColCount=colCount;
|
mas01mc@292
|
905 if(colCount>maxColCount)
|
mas01mc@292
|
906 maxColCount=colCount;
|
mas01mc@292
|
907 meanColCount+=colCount;
|
mas01mc@292
|
908 colCountN++;
|
mas01mc@292
|
909 }
|
mas01mc@292
|
910 }
|
mas01mc@292
|
911 // Write END of table marker
|
mas01mc@292
|
912 t1 = O2_SERIAL_FLAGS_END_BIT;
|
mas01mc@292
|
913 if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){
|
mas01mc@292
|
914 close(fid);
|
mas01mc@292
|
915 error("write error in serial_write_hashtable_format2() [end]");
|
mas01mc@292
|
916 }
|
mas01mc@292
|
917
|
mas01mc@292
|
918 if(colCountN)
|
mas01mc@292
|
919 std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN
|
mas01mc@292
|
920 << ", min = " << minColCount << ", max = " << maxColCount
|
mas01mc@292
|
921 << endl;
|
mas01mc@292
|
922 }
|
mas01mc@292
|
923
|
mas01mc@292
|
924 // We're done writing
|
mas01mc@292
|
925 return 1;
|
mas01mc@292
|
926 }
|
mas01mc@292
|
927
|
mas01mc@292
|
928 void G::serial_write_hashtable_row_format2(int fid, bucket* b, Uns32T& colCount){
|
mas01mc@292
|
929 while(b && b->t2!=IFLAG){
|
mas01mc@292
|
930 t2 = O2_SERIAL_FLAGS_T2_BIT;
|
mas01mc@292
|
931 if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){
|
mas01mc@292
|
932 close(fid);
|
mas01mc@292
|
933 error("write error in serial_write_hashtable_row_format2()");
|
mas01mc@292
|
934 }
|
mas01mc@292
|
935 t2 = b->t2;
|
mas01mc@292
|
936 if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){
|
mas01mc@292
|
937 close(fid);
|
mas01mc@292
|
938 error("write error in serial_write_hashtable_row_format2()");
|
mas01mc@292
|
939 }
|
mas01mc@292
|
940 serial_write_element_format2(fid, b->snext, colCount);
|
mas01mc@292
|
941 b=b->next;
|
mas01mc@292
|
942 }
|
mas01mc@292
|
943 }
|
mas01mc@292
|
944
|
mas01mc@292
|
945 void G::serial_write_element_format2(int fid, sbucket* sb, Uns32T& colCount){
|
mas01mc@292
|
946 while(sb){
|
mas01mc@292
|
947 if(write(fid, &sb->pointID, sizeof(Uns32T))!=sizeof(Uns32T)){
|
mas01mc@292
|
948 close(fid);
|
mas01mc@292
|
949 error("Write error in serial_write_element_format2()");
|
mas01mc@292
|
950 }
|
mas01mc@292
|
951 colCount++;
|
mas01mc@292
|
952 sb=sb->snext;
|
mas01mc@292
|
953 }
|
mas01mc@292
|
954 }
|
mas01mc@292
|
955
|
mas01mc@292
|
956
|
mas01mc@292
|
957 int G::serial_create(char* filename, Uns32T FMT){
|
mas01mc@292
|
958 return serial_create(filename, w, L, N, C, k, d, FMT);
|
mas01mc@292
|
959 }
|
mas01mc@292
|
960
|
mas01mc@292
|
961
|
mas01mc@292
|
962 int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols,
|
mas01mc@292
|
963 Uns32T numFuns, Uns32T dim, Uns32T FMT){
|
mas01mc@292
|
964
|
mas01mc@292
|
965 if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS
|
mas01mc@292
|
966 || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS
|
mas01mc@292
|
967 || dim>O2_SERIAL_MAX_DIM){
|
mas01mc@292
|
968 error("LSH parameters out of bounds for serialization");
|
mas01mc@292
|
969 }
|
mas01mc@292
|
970
|
mas01mc@292
|
971 int dbfid;
|
mas01mc@292
|
972 if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
|
mas01mc@292
|
973 error("Can't create serial file", filename, "open");
|
mas01mc@292
|
974 get_lock(dbfid, 1);
|
mas01mc@292
|
975
|
mas01mc@292
|
976 // Make header first to get size of serialized database
|
mas01mc@292
|
977 lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT);
|
mas01mc@292
|
978
|
mas01mc@292
|
979 // go to the location corresponding to the last byte
|
mas01mc@292
|
980 if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1)
|
mas01mc@292
|
981 error("lseek error in db file", "", "lseek");
|
mas01mc@292
|
982
|
mas01mc@292
|
983 // write a dummy byte at the last location
|
mas01mc@292
|
984 if (write (dbfid, "", 1) != 1)
|
mas01mc@292
|
985 error("write error", "", "write");
|
mas01mc@292
|
986
|
mas01mc@293
|
987 char* db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);
|
mas01mc@292
|
988
|
mas01mc@292
|
989 memcpy (db, lshHeader, O2_SERIAL_HEADER_SIZE);
|
mas01mc@292
|
990
|
mas01mc@292
|
991 serial_munmap(db, O2_SERIAL_HEADER_SIZE);
|
mas01mc@292
|
992
|
mas01mc@292
|
993 close(dbfid);
|
mas01mc@292
|
994
|
mas01mc@292
|
995 std::cout << "done initializing tables." << endl;
|
mas01mc@292
|
996
|
mas01mc@292
|
997 return 1;
|
mas01mc@292
|
998 }
|
mas01mc@292
|
999
|
mas01mc@292
|
1000 char* G::serial_mmap(int dbfid, Uns32T memSize, Uns32T forWrite, off_t offset){
|
mas01mc@293
|
1001 char* db;
|
mas01mc@292
|
1002 if(forWrite){
|
mas01mc@292
|
1003 if ((db = (char*) mmap(0, memSize, PROT_READ | PROT_WRITE,
|
mas01mc@292
|
1004 MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
|
mas01mc@292
|
1005 error("mmap error in request for writable serialized database", "", "mmap");
|
mas01mc@292
|
1006 }
|
mas01mc@292
|
1007 else if ((db = (char*) mmap(0, memSize, PROT_READ, MAP_SHARED, dbfid, offset)) == (caddr_t) -1)
|
mas01mc@292
|
1008 error("mmap error in read-only serialized database", "", "mmap");
|
mas01mc@292
|
1009
|
mas01mc@292
|
1010 return db;
|
mas01mc@292
|
1011 }
|
mas01mc@292
|
1012
|
mas01mc@292
|
1013 SerialHeaderT* G::serial_get_header(char* db){
|
mas01mc@292
|
1014 lshHeader = new SerialHeaderT();
|
mas01mc@292
|
1015 memcpy((char*)lshHeader, db, sizeof(SerialHeaderT));
|
mas01mc@292
|
1016
|
mas01mc@292
|
1017 if(lshHeader->lshMagic!=O2_SERIAL_MAGIC)
|
mas01mc@292
|
1018 error("Not an LSH database file");
|
mas01mc@292
|
1019
|
mas01mc@292
|
1020 return lshHeader;
|
mas01mc@292
|
1021 }
|
mas01mc@292
|
1022
|
mas01mc@292
|
1023 void G::serial_munmap(char* db, Uns32T N){
|
mas01mc@292
|
1024 munmap(db, N);
|
mas01mc@292
|
1025 }
|
mas01mc@292
|
1026
|
mas01mc@292
|
1027 int G::serial_open(char* filename, int writeFlag){
|
mas01mc@292
|
1028 int dbfid;
|
mas01mc@292
|
1029 if(writeFlag){
|
mas01mc@292
|
1030 if ((dbfid = open (filename, O_RDWR)) < 0)
|
mas01mc@292
|
1031 error("Can't open serial file for read/write", filename, "open");
|
mas01mc@292
|
1032 get_lock(dbfid, writeFlag);
|
mas01mc@292
|
1033 }
|
mas01mc@292
|
1034 else{
|
mas01mc@292
|
1035 if ((dbfid = open (filename, O_RDONLY)) < 0)
|
mas01mc@292
|
1036 error("Can't open serial file for read", filename, "open");
|
mas01mc@292
|
1037 get_lock(dbfid, 0);
|
mas01mc@292
|
1038 }
|
mas01mc@292
|
1039
|
mas01mc@292
|
1040 return dbfid;
|
mas01mc@292
|
1041 }
|
mas01mc@292
|
1042
|
mas01mc@292
|
1043 void G::serial_close(int dbfid){
|
mas01mc@292
|
1044
|
mas01mc@292
|
1045 release_lock(dbfid);
|
mas01mc@292
|
1046 close(dbfid);
|
mas01mc@292
|
1047 }
|
mas01mc@292
|
1048
|
mas01mc@292
|
1049 int G::unserialize_lsh_header(char* filename){
|
mas01mc@292
|
1050
|
mas01mc@292
|
1051 int dbfid;
|
mas01mc@292
|
1052 char* db;
|
mas01mc@292
|
1053 // Test to see if file exists
|
mas01mc@292
|
1054 if((dbfid = open (filename, O_RDONLY)) < 0)
|
mas01mc@292
|
1055 error("Can't open the file", filename, "open");
|
mas01mc@292
|
1056 close(dbfid);
|
mas01mc@292
|
1057 dbfid = serial_open(filename, 0); // open for read
|
mas01mc@292
|
1058 db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
|
mas01mc@292
|
1059 serial_get_header(db); // read header
|
mas01mc@292
|
1060 serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap
|
mas01mc@292
|
1061
|
mas01mc@292
|
1062 // Unserialize header parameters
|
mas01mc@292
|
1063 H::L = lshHeader->numTables;
|
mas01mc@292
|
1064 H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0);
|
mas01mc@292
|
1065 H::N = lshHeader->numRows;
|
mas01mc@292
|
1066 H::C = lshHeader->numCols;
|
mas01mc@292
|
1067 H::k = lshHeader->numFuns;
|
mas01mc@292
|
1068 H::d = lshHeader->dataDim;
|
mas01mc@293
|
1069 H::w = lshHeader->binWidth;
|
mas01mc@293
|
1070 H::radius = lshHeader->radius;
|
mas01mc@293
|
1071 H::maxp = lshHeader->maxp;
|
mas01mc@292
|
1072
|
mas01mc@292
|
1073 return dbfid;
|
mas01mc@292
|
1074 }
|
mas01mc@292
|
1075
|
mas01mc@292
|
1076 // unserialize the LSH parameters
|
mas01mc@292
|
1077 // we leave the LSH tree on disk as a flat file
|
mas01mc@292
|
1078 // it is this flat file that we search by memory mapping
|
mas01mc@292
|
1079 void G::unserialize_lsh_functions(int dbfid){
|
mas01mc@292
|
1080 Uns32T j, kk;
|
mas01mc@292
|
1081 float* pf;
|
mas01mc@292
|
1082 Uns32T* pu;
|
mas01mc@292
|
1083
|
mas01mc@292
|
1084 // Load the hash functions into core
|
mas01mc@292
|
1085 char* db = serial_mmap(dbfid, get_serial_hashtable_offset(), 0);// get database pointer again
|
mas01mc@292
|
1086
|
mas01mc@292
|
1087 pf = get_serial_hashfunction_base(db);
|
mas01mc@292
|
1088
|
mas01mc@292
|
1089 #ifdef USE_U_FUNCTIONS
|
mas01mc@292
|
1090 for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v)
|
mas01mc@292
|
1091 for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions
|
mas01mc@292
|
1092 #else
|
mas01mc@292
|
1093 for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v)
|
mas01mc@292
|
1094 for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions
|
mas01mc@292
|
1095 #endif
|
mas01mc@292
|
1096 for(Uns32T i = 0 ; i < H::d ; i++ )
|
mas01mc@293
|
1097 H::A[j][kk][i] = *pf++; // Normally distributed random vectors
|
mas01mc@292
|
1098 }
|
mas01mc@292
|
1099 }
|
mas01mc@292
|
1100 #ifdef USE_U_FUNCTIONS
|
mas01mc@292
|
1101 for( j = 0 ; j < H::m ; j++ ) // biases b
|
mas01mc@292
|
1102 for( kk = 0 ; kk < H::k/2 ; kk++ )
|
mas01mc@292
|
1103 #else
|
mas01mc@292
|
1104 for( j = 0 ; j < H::L ; j++ ) // biases b
|
mas01mc@292
|
1105 for( kk = 0 ; kk < H::k ; kk++ )
|
mas01mc@292
|
1106 #endif
|
mas01mc@293
|
1107 H::b[j][kk] = *pf++;
|
mas01mc@292
|
1108
|
mas01mc@292
|
1109 pu = (Uns32T*)pf;
|
mas01mc@292
|
1110 for( j = 0 ; j < H::L ; j++ ) // Z projectors r1
|
mas01mc@292
|
1111 for( kk = 0 ; kk < H::k ; kk++ )
|
mas01mc@292
|
1112 H::r1[j][kk] = *pu++;
|
mas01mc@292
|
1113
|
mas01mc@292
|
1114 for( j = 0 ; j < H::L ; j++ ) // Z projectors r2
|
mas01mc@292
|
1115 for( kk = 0 ; kk < H::k ; kk++ )
|
mas01mc@292
|
1116 H::r2[j][kk] = *pu++;
|
mas01mc@292
|
1117
|
mas01mc@293
|
1118 serial_munmap(db, get_serial_hashtable_offset());
|
mas01mc@292
|
1119 }
|
mas01mc@292
|
1120
|
mas01mc@292
|
1121 void G::unserialize_lsh_hashtables_format1(int fid){
|
mas01mc@292
|
1122 SerialElementT *pe, *pt;
|
mas01mc@292
|
1123 Uns32T x,y;
|
mas01mc@292
|
1124 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
|
mas01mc@292
|
1125 // Read the hash tables into core
|
mas01mc@292
|
1126 for( x = 0 ; x < H::L ; x++ ){
|
mas01mc@292
|
1127 // memory map a single hash table
|
mas01mc@292
|
1128 // Align each hash table to page boundary
|
mas01mc@292
|
1129 char* dbtable = serial_mmap(fid, hashTableSize, 0,
|
mas01mc@292
|
1130 align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn()));
|
mas01mc@292
|
1131 if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0)
|
mas01mc@292
|
1132 error("could not advise hashtable memory","","madvise");
|
mas01mc@292
|
1133 pt=(SerialElementT*)dbtable;
|
mas01mc@292
|
1134 for( y = 0 ; y < H::N ; y++ ){
|
mas01mc@292
|
1135 // Move disk pointer to beginning of row
|
mas01mc@292
|
1136 pe=pt+y*lshHeader->numCols;
|
mas01mc@292
|
1137 unserialize_hashtable_row_format1(pe, h[x]+y);
|
mas01mc@292
|
1138 #ifdef __LSH_DUMP_CORE_TABLES__
|
mas01mc@292
|
1139 printf("S[%d,%d]", x, y);
|
mas01mc@292
|
1140 serial_bucket_dump(pe);
|
mas01mc@292
|
1141 printf("C[%d,%d]", x, y);
|
mas01mc@292
|
1142 dump_hashtable_row(h[x][y]);
|
mas01mc@292
|
1143 #endif
|
mas01mc@292
|
1144 }
|
mas01mc@292
|
1145 serial_munmap(dbtable, hashTableSize);
|
mas01mc@292
|
1146 }
|
mas01mc@292
|
1147 }
|
mas01mc@292
|
1148
|
mas01mc@292
|
1149 void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){
|
mas01mc@292
|
1150 Uns32T colCount = 0;
|
mas01mc@292
|
1151 while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){
|
mas01mc@292
|
1152 H::p = pe->pointID; // current point ID
|
mas01mc@292
|
1153 t2 = pe->hashValue;
|
mas01mc@292
|
1154 bucket_insert_point(b);
|
mas01mc@292
|
1155 pe++;
|
mas01mc@292
|
1156 colCount++;
|
mas01mc@292
|
1157 }
|
mas01mc@292
|
1158 }
|
mas01mc@292
|
1159
|
mas01mc@292
|
1160 void G::unserialize_lsh_hashtables_format2(int fid){
|
mas01mc@292
|
1161 Uns32T x=0,y=0;
|
mas01mc@292
|
1162
|
mas01mc@292
|
1163 // Seek to hashtable base offset
|
mas01mc@292
|
1164 if(lseek(fid, get_serial_hashtable_offset(), SEEK_SET)!=get_serial_hashtable_offset()){
|
mas01mc@292
|
1165 close(fid);
|
mas01mc@292
|
1166 error("Seek error in unserialize_lsh_hashtables_format2");
|
mas01mc@292
|
1167 }
|
mas01mc@292
|
1168
|
mas01mc@292
|
1169 // Read the hash tables into core (structure is given in header)
|
mas01mc@292
|
1170 while( x < H::L){
|
mas01mc@292
|
1171 if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){
|
mas01mc@292
|
1172 close(fid);
|
mas01mc@292
|
1173 error("Read error","unserialize_lsh_hashtables_format2()");
|
mas01mc@292
|
1174 }
|
mas01mc@292
|
1175 if(H::t1&O2_SERIAL_FLAGS_END_BIT)
|
mas01mc@292
|
1176 x++; // End of table
|
mas01mc@292
|
1177 else
|
mas01mc@292
|
1178 while(y < H::N){
|
mas01mc@292
|
1179 // Read a row and move file pointer to beginning of next row or table
|
mas01mc@292
|
1180 if(!(H::t1&O2_SERIAL_FLAGS_T1_BIT)){
|
mas01mc@292
|
1181 close(fid);
|
mas01mc@292
|
1182 error("State matchine error t1","unserialize_lsh_hashtables_format2()");
|
mas01mc@292
|
1183 }
|
mas01mc@292
|
1184 y = H::t1 ^ O2_SERIAL_FLAGS_T1_BIT;
|
mas01mc@292
|
1185 if(y>=H::N){
|
mas01mc@292
|
1186 close(fid);
|
mas01mc@292
|
1187 error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()");
|
mas01mc@292
|
1188 }
|
mas01mc@292
|
1189 Uns32T token = unserialize_hashtable_row_format2(fid, h[x]+y);
|
mas01mc@292
|
1190
|
mas01mc@292
|
1191 #ifdef __LSH_DUMP_CORE_TABLES__
|
mas01mc@292
|
1192 printf("C[%d,%d]", x, y);
|
mas01mc@292
|
1193 dump_hashtable_row(h[x][y]);
|
mas01mc@292
|
1194 #endif
|
mas01mc@292
|
1195 // Check that token is valid
|
mas01mc@292
|
1196 if( !(token&O2_SERIAL_FLAGS_T1_BIT || token&O2_SERIAL_FLAGS_END_BIT) ){
|
mas01mc@292
|
1197 close(fid);
|
mas01mc@292
|
1198 error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()");
|
mas01mc@292
|
1199 }
|
mas01mc@292
|
1200 // Check for end of table flag
|
mas01mc@292
|
1201 if(token&O2_SERIAL_FLAGS_END_BIT){
|
mas01mc@292
|
1202 x++;
|
mas01mc@292
|
1203 break;
|
mas01mc@292
|
1204 }
|
mas01mc@292
|
1205 // Check for new row flag
|
mas01mc@292
|
1206 if(token&O2_SERIAL_FLAGS_T1_BIT)
|
mas01mc@292
|
1207 H::t1 = token;
|
mas01mc@292
|
1208 }
|
mas01mc@292
|
1209 }
|
mas01mc@292
|
1210 }
|
mas01mc@292
|
1211
|
mas01mc@292
|
1212 Uns32T G::unserialize_hashtable_row_format2(int fid, bucket** b){
|
mas01mc@292
|
1213 bool pointFound = false;
|
mas01mc@292
|
1214 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){
|
mas01mc@292
|
1215 close(fid);
|
mas01mc@292
|
1216 error("Read error T2 token","unserialize_hashtable_row_format2");
|
mas01mc@292
|
1217 }
|
mas01mc@292
|
1218 if( !(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2==O2_SERIAL_FLAGS_T2_BIT)){
|
mas01mc@292
|
1219 close(fid);
|
mas01mc@292
|
1220 error("State machine error: expected E or T2");
|
mas01mc@292
|
1221 }
|
mas01mc@292
|
1222 while(!(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2&O2_SERIAL_FLAGS_T1_BIT)){
|
mas01mc@292
|
1223 pointFound=false;
|
mas01mc@292
|
1224 // Check for T2 token
|
mas01mc@292
|
1225 if(H::t2!=O2_SERIAL_FLAGS_T2_BIT)
|
mas01mc@292
|
1226 error("State machine error T2 token", "unserialize_hashtable_row_format2()");
|
mas01mc@292
|
1227 // Read t2 value
|
mas01mc@292
|
1228 if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){
|
mas01mc@292
|
1229 close(fid);
|
mas01mc@292
|
1230 error("Read error t2","unserialize_hashtable_row_format2");
|
mas01mc@292
|
1231 }
|
mas01mc@292
|
1232 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){
|
mas01mc@292
|
1233 close(fid);
|
mas01mc@292
|
1234 error("Read error H::p","unserialize_hashtable_row_format2");
|
mas01mc@292
|
1235 }
|
mas01mc@292
|
1236 while(!(H::p==O2_SERIAL_FLAGS_END_BIT || H::p&O2_SERIAL_FLAGS_T1_BIT || H::p==O2_SERIAL_FLAGS_T2_BIT )){
|
mas01mc@292
|
1237 pointFound=true;
|
mas01mc@292
|
1238 bucket_insert_point(b);
|
mas01mc@292
|
1239 if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){
|
mas01mc@292
|
1240 close(fid);
|
mas01mc@292
|
1241 error("Read error H::p","unserialize_hashtable_row_format2");
|
mas01mc@292
|
1242 }
|
mas01mc@292
|
1243 }
|
mas01mc@292
|
1244 H::t2 = H::p; // Copy last found token to t2
|
mas01mc@292
|
1245 if(!pointFound)
|
mas01mc@292
|
1246 error("State machine error: point", "unserialize_hashtable_row_format2()");
|
mas01mc@292
|
1247 }
|
mas01mc@292
|
1248 return H::t2; // holds current token
|
mas01mc@292
|
1249 }
|
mas01mc@292
|
1250
|
mas01mc@292
|
1251 void G::dump_hashtable_row(bucket* p){
|
mas01mc@292
|
1252 while(p && p->t2!=IFLAG){
|
mas01mc@292
|
1253 sbucket* sbp = p->snext;
|
mas01mc@292
|
1254 while(sbp){
|
mas01mc@292
|
1255 printf("(%0X,%u)", p->t2, sbp->pointID);
|
mas01mc@292
|
1256 fflush(stdout);
|
mas01mc@292
|
1257 sbp=sbp->snext;
|
mas01mc@292
|
1258 }
|
mas01mc@292
|
1259 p=p->next;
|
mas01mc@292
|
1260 }
|
mas01mc@292
|
1261 printf("\n");
|
mas01mc@292
|
1262 }
|
mas01mc@292
|
1263
|
mas01mc@292
|
1264
|
mas01mc@292
|
1265 // G::serial_retrieve_point( ... )
|
mas01mc@292
|
1266 // retrieves (pointID) from a serialized LSH database
|
mas01mc@292
|
1267 //
|
mas01mc@292
|
1268 // inputs:
|
mas01mc@292
|
1269 // filename - file name of serialized LSH database
|
mas01mc@292
|
1270 // vv - query point set
|
mas01mc@292
|
1271 //
|
mas01mc@292
|
1272 // outputs:
|
mas01mc@292
|
1273 // inserts retrieved points into add_point() callback method
|
mas01mc@292
|
1274 void G::serial_retrieve_point_set(char* filename, vector<vector<float> >& vv, ReporterCallbackPtr add_point, void* caller)
|
mas01mc@292
|
1275 {
|
mas01mc@292
|
1276 int dbfid = serial_open(filename, 0); // open for read
|
mas01mc@292
|
1277 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
|
mas01mc@292
|
1278 serial_get_header(dbheader); // read header
|
mas01mc@292
|
1279 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
|
mas01mc@292
|
1280
|
mas01mc@292
|
1281 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
|
mas01mc@292
|
1282 close(dbfid);
|
mas01mc@292
|
1283 error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only");
|
mas01mc@292
|
1284 }
|
mas01mc@292
|
1285
|
mas01mc@292
|
1286 // size of each hash table
|
mas01mc@292
|
1287 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
|
mas01mc@292
|
1288 calling_instance = caller; // class instance variable used in ...bucket_chain_point()
|
mas01mc@292
|
1289 add_point_callback = add_point;
|
mas01mc@292
|
1290
|
mas01mc@292
|
1291 for(Uns32T j=0; j<L; j++){
|
mas01mc@292
|
1292 // memory map a single hash table for random access
|
mas01mc@292
|
1293 char* db = serial_mmap(dbfid, hashTableSize, 0,
|
mas01mc@292
|
1294 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
|
mas01mc@292
|
1295 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
|
mas01mc@292
|
1296 error("could not advise local hashtable memory","","madvise");
|
mas01mc@292
|
1297 SerialElementT* pe = (SerialElementT*)db ;
|
mas01mc@292
|
1298 for(Uns32T qpos=0; qpos<vv.size(); qpos++){
|
mas01mc@293
|
1299 H::compute_hash_functions(vv[qpos]);
|
mas01mc@293
|
1300 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
|
mas01mc@292
|
1301 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
|
mas01mc@292
|
1302 }
|
mas01mc@292
|
1303 serial_munmap(db, hashTableSize); // drop hashtable mmap
|
mas01mc@292
|
1304 }
|
mas01mc@292
|
1305 serial_close(dbfid);
|
mas01mc@292
|
1306 }
|
mas01mc@292
|
1307
|
mas01mc@292
|
1308 void G::serial_retrieve_point(char* filename, vector<float>& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){
|
mas01mc@292
|
1309 int dbfid = serial_open(filename, 0); // open for read
|
mas01mc@292
|
1310 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
|
mas01mc@292
|
1311 serial_get_header(dbheader); // read header
|
mas01mc@292
|
1312 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
|
mas01mc@292
|
1313
|
mas01mc@292
|
1314 if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){
|
mas01mc@292
|
1315 close(dbfid);
|
mas01mc@292
|
1316 error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only");
|
mas01mc@292
|
1317 }
|
mas01mc@292
|
1318
|
mas01mc@292
|
1319 // size of each hash table
|
mas01mc@292
|
1320 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
|
mas01mc@292
|
1321 calling_instance = caller;
|
mas01mc@292
|
1322 add_point_callback = add_point;
|
mas01mc@293
|
1323 H::compute_hash_functions(v);
|
mas01mc@292
|
1324 for(Uns32T j=0; j<L; j++){
|
mas01mc@292
|
1325 // memory map a single hash table for random access
|
mas01mc@292
|
1326 char* db = serial_mmap(dbfid, hashTableSize, 0,
|
mas01mc@292
|
1327 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
|
mas01mc@292
|
1328 if(madvise(db, hashTableSize, MADV_RANDOM)<0)
|
mas01mc@292
|
1329 error("could not advise local hashtable memory","","madvise");
|
mas01mc@292
|
1330 SerialElementT* pe = (SerialElementT*)db ;
|
mas01mc@293
|
1331 H::generate_hash_keys(*(g+j),*(r1+j),*(r2+j));
|
mas01mc@292
|
1332 serial_bucket_chain_point(pe+t1*lshHeader->numCols, qpos); // Point to correct row
|
mas01mc@292
|
1333 serial_munmap(db, hashTableSize); // drop hashtable mmap
|
mas01mc@292
|
1334 }
|
mas01mc@292
|
1335 serial_close(dbfid);
|
mas01mc@292
|
1336 }
|
mas01mc@292
|
1337
|
mas01mc@292
|
1338 void G::serial_dump_tables(char* filename){
|
mas01mc@292
|
1339 int dbfid = serial_open(filename, 0); // open for read
|
mas01mc@292
|
1340 char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer
|
mas01mc@292
|
1341 serial_get_header(dbheader); // read header
|
mas01mc@292
|
1342 serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap
|
mas01mc@292
|
1343 Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols;
|
mas01mc@292
|
1344 for(Uns32T j=0; j<L; j++){
|
mas01mc@292
|
1345 // memory map a single hash table for random access
|
mas01mc@292
|
1346 char* db = serial_mmap(dbfid, hashTableSize, 0,
|
mas01mc@292
|
1347 align_up(get_serial_hashtable_offset()+j*hashTableSize,get_page_logn()));
|
mas01mc@292
|
1348 if(madvise(db, hashTableSize, MADV_SEQUENTIAL)<0)
|
mas01mc@292
|
1349 error("could not advise local hashtable memory","","madvise");
|
mas01mc@292
|
1350 SerialElementT* pe = (SerialElementT*)db ;
|
mas01mc@292
|
1351 printf("*********** TABLE %d ***************\n", j);
|
mas01mc@292
|
1352 fflush(stdout);
|
mas01mc@292
|
1353 int count=0;
|
mas01mc@292
|
1354 do{
|
mas01mc@292
|
1355 printf("[%d,%d]", j, count++);
|
mas01mc@292
|
1356 fflush(stdout);
|
mas01mc@292
|
1357 serial_bucket_dump(pe);
|
mas01mc@292
|
1358 pe+=lshHeader->numCols;
|
mas01mc@292
|
1359 }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols);
|
mas01mc@292
|
1360 }
|
mas01mc@292
|
1361
|
mas01mc@292
|
1362 }
|
mas01mc@292
|
1363
|
mas01mc@292
|
1364 void G::serial_bucket_dump(SerialElementT* pe){
|
mas01mc@292
|
1365 SerialElementT* pend = pe+lshHeader->numCols;
|
mas01mc@292
|
1366 while( !(pe->hashValue==IFLAG || pe==pend ) ){
|
mas01mc@292
|
1367 printf("(%0X,%u)",pe->hashValue,pe->pointID);
|
mas01mc@292
|
1368 pe++;
|
mas01mc@292
|
1369 }
|
mas01mc@292
|
1370 printf("\n");
|
mas01mc@292
|
1371 fflush(stdout);
|
mas01mc@292
|
1372 }
|
mas01mc@292
|
1373
|
mas01mc@292
|
1374 void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){
|
mas01mc@292
|
1375 SerialElementT* pend = pe+lshHeader->numCols;
|
mas01mc@292
|
1376 while( !(pe->hashValue==IFLAG || pe==pend ) ){
|
mas01mc@292
|
1377 if(pe->hashValue==t2){ // new match
|
mas01mc@292
|
1378 add_point_callback(calling_instance, pe->pointID, qpos, radius);
|
mas01mc@292
|
1379 }
|
mas01mc@292
|
1380 pe++;
|
mas01mc@292
|
1381 }
|
mas01mc@292
|
1382 }
|
mas01mc@292
|
1383
|
mas01mc@292
|
1384 void G::bucket_chain_point(bucket* p, Uns32T qpos){
|
mas01mc@292
|
1385 if(!p || p->t2==IFLAG)
|
mas01mc@292
|
1386 return;
|
mas01mc@292
|
1387 if(p->t2==t2){ // match
|
mas01mc@292
|
1388 sbucket_chain_point(p->snext, qpos); // add to reporter
|
mas01mc@292
|
1389 }
|
mas01mc@292
|
1390 if(p->next){
|
mas01mc@292
|
1391 bucket_chain_point(p->next, qpos); // recurse
|
mas01mc@292
|
1392 }
|
mas01mc@292
|
1393 }
|
mas01mc@292
|
1394
|
mas01mc@292
|
1395 void G::sbucket_chain_point(sbucket* p, Uns32T qpos){
|
mas01mc@292
|
1396 add_point_callback(calling_instance, p->pointID, qpos, radius);
|
mas01mc@292
|
1397 if(p->snext){
|
mas01mc@292
|
1398 sbucket_chain_point(p->snext, qpos);
|
mas01mc@292
|
1399 }
|
mas01mc@292
|
1400 }
|
mas01mc@292
|
1401
|
mas01mc@292
|
1402 void G::get_lock(int fd, bool exclusive) {
|
mas01mc@292
|
1403 struct flock lock;
|
mas01mc@292
|
1404 int status;
|
mas01mc@292
|
1405 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
|
mas01mc@292
|
1406 lock.l_whence = SEEK_SET;
|
mas01mc@292
|
1407 lock.l_start = 0;
|
mas01mc@292
|
1408 lock.l_len = 0; /* "the whole file" */
|
mas01mc@292
|
1409 retry:
|
mas01mc@292
|
1410 do {
|
mas01mc@292
|
1411 status = fcntl(fd, F_SETLKW, &lock);
|
mas01mc@292
|
1412 } while (status != 0 && errno == EINTR);
|
mas01mc@292
|
1413 if (status) {
|
mas01mc@292
|
1414 if (errno == EAGAIN) {
|
mas01mc@292
|
1415 sleep(1);
|
mas01mc@292
|
1416 goto retry;
|
mas01mc@292
|
1417 } else {
|
mas01mc@292
|
1418 error("fcntl lock error", "", "fcntl");
|
mas01mc@292
|
1419 }
|
mas01mc@292
|
1420 }
|
mas01mc@292
|
1421 }
|
mas01mc@292
|
1422
|
mas01mc@292
|
1423 void G::release_lock(int fd) {
|
mas01mc@292
|
1424 struct flock lock;
|
mas01mc@292
|
1425 int status;
|
mas01mc@292
|
1426
|
mas01mc@292
|
1427 lock.l_type = F_UNLCK;
|
mas01mc@292
|
1428 lock.l_whence = SEEK_SET;
|
mas01mc@292
|
1429 lock.l_start = 0;
|
mas01mc@292
|
1430 lock.l_len = 0;
|
mas01mc@292
|
1431
|
mas01mc@292
|
1432 status = fcntl(fd, F_SETLKW, &lock);
|
mas01mc@292
|
1433
|
mas01mc@292
|
1434 if (status)
|
mas01mc@292
|
1435 error("fcntl unlock error", "", "fcntl");
|
mas01mc@292
|
1436 }
|