Mercurial > hg > audiodb
diff lshlib.h @ 754:9bd13c7819ae mkc_lsh_update
Adding mkc_lsh_update branch, trunk candidate with improved LSH: merged trunk 1095 and branch multiprobe_lsh
author | mas01mc |
---|---|
date | Thu, 25 Nov 2010 13:42:40 +0000 |
parents | 4b79043f90ba |
children |
line wrap: on
line diff
--- a/lshlib.h Sat Nov 20 15:32:58 2010 +0000 +++ b/lshlib.h Thu Nov 25 13:42:40 2010 +0000 @@ -11,13 +11,37 @@ #ifndef __LSHLIB_H #define __LSHLIB_H -using namespace std; +#include <vector> +#include <queue> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <iostream> +#include <fstream> +#include <math.h> +#include <sys/time.h> +#include <assert.h> +#include <float.h> +#include <signal.h> +#include <time.h> +#include <limits.h> +#include <errno.h> +#ifdef MT19937 +#include "mt19937/mt19937ar.h" +#endif +#include "multiprobe.h" #define IntT int #define LongUns64T long long unsigned #define Uns32T unsigned #define Int32T int #define BooleanT int +#define TRUE 1 +#define FALSE 0 // A big number (>> max # of points) #define INDEX_START_EMPTY 1000000000U @@ -55,7 +79,7 @@ #define O2_INDEX_MAXSTR (256) -#define align_up(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1)) +unsigned align_up(unsigned x, unsigned w); #define O2_SERIAL_FUNCTIONS_SIZE (align_up(sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS * O2_SERIAL_MAX_DIM \ + sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS + \ @@ -71,6 +95,7 @@ fclose(dbFile);error("write error in serial_write_format2",TOKENSTR);} //#define LSH_DUMP_CORE_TABLES // set to dump hashtables on load +//#define _LSH_DEBUG_ // turn on debugging information //#define USE_U_FUNCTIONS // set to use partial hashfunction re-use // Backward-compatible CORE ARRAY lsh index @@ -84,8 +109,14 @@ #define LSH_CORE_ARRAY_BIT (0x80000000) // LSH_CORE_ARRAY test bit for list head +#ifndef LSH_MULTI_PROBE_COUNT +#define LSH_MULTI_PROBE_COUNT 1 // How many adjacent hash-buckets to probe in LSH retrieval +#endif + Uns32T get_page_logn(); +using namespace std; + // Disk table entry typedef class SerialElement SerialElementT; class SerialElement { @@ -202,11 +233,13 @@ Uns32T bucketCount; // count of number of point buckets allocated Uns32T pointCount; // count of number of points inserted Uns32T collisionCount; // number of points collided in a hash-table row + Uns32T tablesPointCount; // count of points per hash table on load Uns32T t1; // first hash table key Uns32T t2; // second hash table key Uns32T P; // hash table prime number + Uns32T N; // num rows per table Uns32T C; // num collision per row Uns32T k; // num projections per hash function @@ -217,6 +250,9 @@ float w; // width of hash slots (relative to normalized feature space) float radius;// scaling coefficient for data (1./radius) + MultiProbe* multiProbePtr; // Utility class for handling multi-probe queries + float ** boundaryDistances; // Array of query bucket-boundary-distances per hashtable + void initialize_data_structures(); void initialize_lsh_functions(); void initialize_partial_functions(); @@ -250,6 +286,7 @@ // Interface to hash functions void compute_hash_functions(vector<float>& v); void generate_hash_keys(Uns32T* g, Uns32T* r1, Uns32T* r2); + void generate_multiprobe_keys(Uns32T*g, Uns32T* r1, Uns32T* r2); Uns32T get_t1(){return t1;} // hash-key t1 Uns32T get_t2(){return t2;} // hash-key t2 }; @@ -267,6 +304,8 @@ void release_lock(int fd); int serial_create(char* filename, Uns32T FMT); int serial_create(char* filename, float binWidth, Uns32T nTables, Uns32T nRows, Uns32T nCols, Uns32T k, Uns32T d, Uns32T FMT); + char* serial_mmap(int dbfid, Uns32T sz, Uns32T w, off_t offset = 0); + void serial_munmap(char* db, Uns32T N); int serial_open(char* filename,int writeFlag); void serial_close(int dbfid); @@ -306,10 +345,7 @@ float* get_serial_hashfunction_base(char* db); SerialElementT* get_serial_hashtable_base(char* db); Uns32T get_serial_hashtable_offset(); // Size of SerialHeader + HashFunctions - SerialHeaderT* serial_get_header(int fd); - void serial_write_header(int fd, SerialHeaderT *header); - void serial_get_table(int, int, void *, size_t); - void serial_write_table(int, int, void *, size_t); + SerialHeaderT* serial_get_header(char* db); SerialHeaderT* lshHeader; // Core Retrieval/Inspections Functions @@ -354,7 +390,8 @@ float get_mean_collision_rate(){ return (float) pointCount / bucketCount ; } char* get_indexName(){return indexName;} void dump_hashtables(); - + void dump_core_row(Uns32T n); + void dump_disk_row(char*, Uns32T n); }; typedef class G LSH;