# HG changeset patch # User mas01mc # Date 1217368877 0 # Node ID d9a88cfd4ab614800533e501ccfa9b522baba194 # Parent 63ae0dfc176752f344481ed51a98b9267fc5fb1d Completed merge of lshlib back to current version of the trunk. diff -r 63ae0dfc1767 -r d9a88cfd4ab6 Makefile --- a/Makefile Tue Jul 22 20:09:31 2008 +0000 +++ b/Makefile Tue Jul 29 22:01:17 2008 +0000 @@ -36,10 +36,10 @@ soapServer.cpp soapClient.cpp soapC.cpp adb.nsmap: audioDBws.h ${SOAPCPP2} audioDBws.h -%.o: %.cpp audioDB.h adb.nsmap cmdline.h reporter.h +%.o: %.cpp audioDB.h adb.nsmap cmdline.h reporter.h ReporterBase.h lshlib.h g++ -c ${CFLAGS} ${GSOAP_INCLUDE} ${GSL_INCLUDE} -Wall $< -OBJS=insert.o create.o common.o dump.o query.o soap.o sample.o audioDB.o +OBJS=insert.o create.o common.o dump.o query.o soap.o sample.o audioDB.o index.o lshlib.o ${EXECUTABLE}: ${OBJS} soapServer.cpp soapClient.cpp soapC.cpp cmdline.c g++ -o ${EXECUTABLE} ${CFLAGS} ${GSL_INCLUDE} ${LIBGSL} ${GSOAP_INCLUDE} $^ ${GSOAP_CPP} diff -r 63ae0dfc1767 -r d9a88cfd4ab6 MakefileLSH --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MakefileLSH Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,34 @@ +CFLAGS= -O3 -g +#CFLAGS= -ggdb --save-temps +MTSOURCES=mt19937/mt19937ar.c +CFLAGS+=-DMT19937 +GENGETOPT=gengetopt +SOAPCPP2=soapcpp2 +GSOAP_CPP=-lgsoap++ +GSOAP_INCLUDE= + +TARGET=TEST_LSH_LIB +TARGET2=UNIT_TEST_LSH +LSHOBS=TEST_LSH_LIB.cpp lshlib.cpp +LSHOBS2=UNIT_TEST_LSH.cpp lshlib.cpp +LSHHDRS=TEST_LSH_LIB.h lshlib.h reporter.h audioDB.h ReporterBase.h +LSHHDRS2=lshlib.h + +ifeq ($(shell uname),Linux) +override CFLAGS+=-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +endif + +all: ${TARGET} + +unit: ${TARGET2} + +${TARGET}: ${LSHOBS} ${LSHHDRS} ${MTSOURCES} MakefileLSH + ${GENGETOPT} -e +#include "lshlib.h" +#include "reporter.h" + +#define LSH_IN_CORE + + +#define N_POINT_BITS 14 +#define POINT_BIT_MASK 0x00003FFF + +// Callback method for LSH point retrieval +void add_point(void* reporter, Uns32T pointID, Uns32T qpos, float dist) +{ + ReporterBase* pr = (ReporterBase*)reporter; + pr->add_point(pointID>>N_POINT_BITS, qpos, pointID&POINT_BIT_MASK, dist); +} + +int main(int argc, char* argv[]){ + + int nT = 100; // num tracks + int nP = 1000; // num points-per-track + float w = 4.0;// LSH bucket width + int k = 10; + int m = 2; + int d = 10; + int N = 100000; + int C = 200; + + float radius = 0.001; + char FILENAME[] = "foo.lsh"; + + assert(nP>=nT); + + int fid = open(FILENAME,O_RDONLY); + LSH* lsh; + bool serialized = false; + Uns32T trackBase = 0; + + if(fid< 0){ // Make a new serial LSH file + lsh = new LSH(w,k,m,d,N,C,radius); + assert(lsh); + cout << "NEW LSH:" << endl; + } + else{ + close(fid); // Load LSH structures from disk + lsh = new LSH(FILENAME); + assert(lsh); + cout << "MERGE WITH EXISTING LSH:" << FILENAME << endl; + serialized=true; + trackBase = (lsh->get_maxp()>>N_POINT_BITS)+1; // Our encoding of tracks and points + } + cout << "k:" << lsh->k << " "; + cout << "m:" << lsh->m << "(L:" << lsh->L << ") "; + cout << "d:" << lsh->d << " "; + cout << "N:" << lsh->N << " "; + cout << "C:" << lsh->C << " "; + cout << "R:" << lsh->get_radius() << endl; + cout << "p:" << lsh->p << endl; + cout.flush(); + + cout << endl << "Constructing " << nT << " tracks with " << nP << " vectors of dimension " << d << endl; + cout.flush(); + // Construct sets of database vectors, use one point from each set for testing + vector< vector > vv = vector< vector >(nP); // track vectors + vector< vector > qq = vector< vector >(nP);// query vectors + for(int i=0; i< nP ; i++){ + vv[i]=vector(d); // allocate vector + qq[i]=vector(d); // allocate vector + } + + for(int k = 0 ; k < nT ; k ++){ + cout << "[" << k << "]"; + cout.flush(); + for(int i = 0 ; i< nP ; i++) + for(int j=0; j< d ; j++) + vv[i][j] = genrand_real2() / radius; // MT_19937 random numbers + lsh->insert_point_set(vv, (trackBase+k)<serialize(FILENAME); + + // TEST LSH RETRIEVAL IN CORE + printf("\n********** In-core LSH retrieval from %d track%c **********\n", + (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>0?'s':' '); + fflush(stdout); + for(int i = 0; i < nT ; i++ ){ + trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); + lsh->retrieve_point(qq[i], i, &add_point, (void*)pr); // LSH point retrieval from core + printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2()); + fflush(stdout); + pr->report(0,0); + delete pr; + } + delete lsh; + + cout << "Loading Serialized LSH functions from disk ..." << endl; + cout.flush(); + lsh = new LSH(FILENAME); + assert(lsh); + // lsh->serial_dump_tables(FILENAME); + printf("\n********** Serialized LSH retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' '); + fflush(stdout); + for(int i= 0; i < nT ; i++ ){ + trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); + lsh->serial_retrieve_point(FILENAME, qq[i], i, &add_point, (void*) pr); // LSH serialized point retrieval method + printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2()); + fflush(stdout); + pr->report(0,0); + delete pr; + } + delete lsh; + +#ifdef LSH_IN_CORE + cout << "Loading Serialized LSH functions and tables from disk ..." << endl; + cout.flush(); + // Unserialize entire lsh tree to core + lsh = new LSH(FILENAME,1); + + // TEST UNSERIALIZED LSH RETRIEVAL IN CORE + printf("\n********** Unserialized LSH in-core retrieval from %d track%c **********\n", (lsh->get_maxp()>>N_POINT_BITS)+1,(lsh->get_maxp()>>N_POINT_BITS)>1?'s':' '); + fflush(stdout); + for(int i = 0; i < nT ; i++ ){ + trackSequenceQueryRadNNReporter* pr = new trackSequenceQueryRadNNReporter(nP,nT,(lsh->get_maxp()>>N_POINT_BITS)+1); + lsh->retrieve_point(qq[i], i, &add_point, (void*) pr); // LSH point retrieval from core + printf("query vector %d] t1:%u t2:%0X\n", i, lsh->get_t1(), lsh->get_t2()); + fflush(stdout); + pr->report(0,0); + delete pr; + } + delete lsh; +#endif + +} diff -r 63ae0dfc1767 -r d9a88cfd4ab6 audioDB.cpp --- a/audioDB.cpp Tue Jul 22 20:09:31 2008 +0000 +++ b/audioDB.cpp Tue Jul 29 22:01:17 2008 +0000 @@ -1,5 +1,23 @@ #include "audioDB.h" +PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){}; + +bool operator<(const PointPair& a, const PointPair& b){ + return ( (a.qpos(const PointPair& a, const PointPair& b){ + return ( (a.qpos>b.qpos) || + ((a.qpos==b.qpos) && + ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) ); +} + +bool operator==(const PointPair& a, const PointPair& b){ + return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) ); +} + audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS { if(processArgs(argc, argv)<0){ @@ -49,6 +67,9 @@ else if(O2_ACTION(COM_DUMP)) dump(dbName); + + else if(O2_ACTION(COM_INDEX)) + index_index_db(dbName); else error("Unrecognized command",command); @@ -96,10 +117,16 @@ munmap(timesTable, timesTableLength); if(l2normTable) munmap(l2normTable, l2normTableLength); - + if(trackOffsetTable) + delete trackOffsetTable; + if(reporter) + delete reporter; + if(exact_evaluation_queue) + delete exact_evaluation_queue; if(rng) gsl_rng_free(rng); - + if(vv) + delete vv; if(dbfid>0) close(dbfid); if(infid>0) @@ -179,6 +206,27 @@ } } + sequenceLength = args_info.sequencelength_arg; + if(sequenceLength < 1 || sequenceLength > 1000) { + error("seqlen out of range: 1 <= seqlen <= 1000"); + } + sequenceHop = args_info.sequencehop_arg; + if(sequenceHop < 1 || sequenceHop > 1000) { + error("seqhop out of range: 1 <= seqhop <= 1000"); + } + + if (args_info.absolute_threshold_given) { + if (args_info.absolute_threshold_arg >= 0) { + error("absolute threshold out of range: should be negative"); + } + use_absolute_threshold = true; + absolute_threshold = args_info.absolute_threshold_arg; + } + if (args_info.relative_threshold_given) { + use_relative_threshold = true; + relative_threshold = args_info.relative_threshold_arg; + } + if(args_info.SERVER_given){ command=COM_SERVER; port=args_info.SERVER_arg; @@ -251,7 +299,10 @@ dbName=args_info.database_arg; inFile=args_info.features_arg; if(args_info.key_given) - key=args_info.key_arg; + if(!args_info.features_given) + error("INSERT: '-k key' argument depends on '-f features'"); + else + key=args_info.key_arg; if(args_info.times_given){ timesFileName=args_info.times_arg; if(strlen(timesFileName)>0){ @@ -277,7 +328,10 @@ dbName=args_info.database_arg; inFile=args_info.featureList_arg; if(args_info.keyList_given) - key=args_info.keyList_arg; // INCONSISTENT NO CHECK + if(!args_info.features_given) + error("INSERT: '-k key' argument depends on '-f features'"); + else + key=args_info.key_arg; // INCONSISTENT NO CHECK /* TO DO: REPLACE WITH if(args_info.keyList_given){ @@ -306,13 +360,64 @@ } return 0; } - + + // Set no_unit_norm flag + no_unit_norming = args_info.no_unit_norming_flag; + lsh_use_u_functions = args_info.lsh_use_u_functions_flag; + + // LSH Index Command + if(args_info.INDEX_given){ + if(radius <= 0 ) + error("INDEXing requires a Radius argument"); + if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN)) + error("INDEXing requires 1 <= sequenceLength <= 1000"); + command=COM_INDEX; + dbName=args_info.database_arg; + + // Whether to store LSH hash tables for query in core (FORMAT2) + lsh_in_core = args_info.lsh_inCore_flag; + + lsh_param_w = args_info.lsh_w_arg; + if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH)) + error("Indexing parameter w out of range (0.0 < w <= 100.0)"); + + lsh_param_k = args_info.lsh_k_arg; + if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS)) + error("Indexing parameter k out of range (1 <= k <= 100)"); + + lsh_param_m = args_info.lsh_m_arg; + if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0)) + error("Indexing parameter m out of range (1 <= m <= 20)"); + + lsh_param_N = args_info.lsh_N_arg; + if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS)) + error("Indexing parameter N out of range (1 <= N <= 1000000)"); + + lsh_param_b = args_info.lsh_b_arg; + if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH)) + error("Indexing parameter b out of range (1 <= b <= 10000)"); + + lsh_param_ncols = args_info.lsh_ncols_arg; + if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS)) + error("Indexing parameter ncols out of range (1 <= ncols <= 1000"); + + return 0; + } + // Query command and arguments if(args_info.QUERY_given){ command=COM_QUERY; dbName=args_info.database_arg; - inFile=args_info.features_arg; - + // XOR features and key search + if(!args_info.features_given && !args_info.key_given || (args_info.features_given && args_info.key_given)) + error("QUERY requires exactly one of either -f features or -k key"); + if(args_info.features_given) + inFile=args_info.features_arg; // query from file + else{ + query_from_key = true; + key=args_info.key_arg; // query from key + } + if(args_info.keyList_given){ trackFileName=args_info.keyList_arg; if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) @@ -358,7 +463,13 @@ if(queryPoint<0 || queryPoint >10000) error("queryPoint out of range: 0 <= queryPoint <= 10000"); } - + + // Whether to pre-load LSH hash tables for query + lsh_in_core = args_info.lsh_inCore_flag; + + // Whether to perform exact evaluation of points returned by LSH + lsh_exact = args_info.lsh_exact_flag; + pointNN = args_info.pointnn_arg; if(pointNN < 1 || pointNN > O2_MAXNN) { error("pointNN out of range: 1 <= pointNN <= 1000000"); @@ -367,26 +478,6 @@ if(trackNN < 1 || trackNN > O2_MAXNN) { error("resultlength out of range: 1 <= resultlength <= 1000000"); } - sequenceLength = args_info.sequencelength_arg; - if(sequenceLength < 1 || sequenceLength > 1000) { - error("seqlen out of range: 1 <= seqlen <= 1000"); - } - sequenceHop = args_info.sequencehop_arg; - if(sequenceHop < 1 || sequenceHop > 1000) { - error("seqhop out of range: 1 <= seqhop <= 1000"); - } - - if (args_info.absolute_threshold_given) { - if (args_info.absolute_threshold_arg >= 0) { - error("absolute threshold out of range: should be negative"); - } - use_absolute_threshold = true; - absolute_threshold = args_info.absolute_threshold_arg; - } - if (args_info.relative_threshold_given) { - use_relative_threshold = true; - relative_threshold = args_info.relative_threshold_arg; - } return 0; } return -1; // no command found diff -r 63ae0dfc1767 -r d9a88cfd4ab6 audioDB.h --- a/audioDB.h Tue Jul 22 20:09:31 2008 +0000 +++ b/audioDB.h Tue Jul 29 22:01:17 2008 +0000 @@ -1,3 +1,6 @@ +#ifndef __AUDIODB_H_ +#define __AUDIODB_H_ + #include #include #include @@ -14,6 +17,10 @@ #include #include +// includes for LSH indexing +#include "ReporterBase.h" +#include "lshlib.h" + // includes for web services #include "soapH.h" #include "cmdline.h" @@ -30,6 +37,7 @@ #define COM_POWER "--POWER" #define COM_DUMP "--DUMP" #define COM_SERVER "--SERVER" +#define COM_INDEX "--INDEX" #define COM_SAMPLE "--SAMPLE" // parameters @@ -62,15 +70,21 @@ #define O2_DEFAULT_DATASIZE (1355U) // in MB #define O2_DEFAULT_NTRACKS (20000U) #define O2_DEFAULT_DATADIM (9U) - +#define O2_REALTYPE (double) #define O2_MAXFILES (20000U) #define O2_MAXFILESTR (256U) #define O2_FILETABLE_ENTRY_SIZE (O2_MAXFILESTR) #define O2_TRACKTABLE_ENTRY_SIZE (sizeof(unsigned)) #define O2_HEADERSIZE (sizeof(dbTableHeaderT)) #define O2_MEANNUMVECTORS (1000U) -#define O2_MAXDIM (1000U) +#define O2_MAXDIM (2000U) #define O2_MAXNN (1000000U) +#define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence +#define O2_MAXTRACKS (10000U) // maximum number of tracks +#define O2_MAXTRACKLEN (1000000U) // maximum shingles in a track +#define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB +#define O2_DISTANCE_TOLERANCE (1e-6) +#define O2_SERIAL_MAX_TRACKBATCH (10000) // Flags #define O2_FLAG_L2NORM (0x1U) @@ -85,6 +99,10 @@ #define O2_N_SEQUENCE_QUERY (0x20U) #define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U) +// Bit masks for packing (trackID,pointID) into 32-bit unsigned int +#define LSH_N_POINT_BITS 14 +#define LSH_TRACK_MASK 0xFFFFC000 // 2^18 = 262144 tracks +#define LSH_POINT_MASK 0x00003FFF // 2^14 = 16384 points per track // Error Codes #define O2_ERR_KEYNOTFOUND (0xFFFFFF00) @@ -131,7 +149,16 @@ off_t dbSize; } dbTableHeaderT, *dbTableHeaderPtr; -class Reporter; + +class PointPair{ + public: + Uns32T trackID; + Uns32T qpos; + Uns32T spos; + PointPair(Uns32T a, Uns32T b, Uns32T c); +}; + +bool operator<(const PointPair& a, const PointPair& b); class audioDB{ @@ -153,6 +180,7 @@ int powerfd; int dbfid; + int lshfid; bool forWrite; int infid; char* db; @@ -164,6 +192,7 @@ char *fileTable; unsigned* trackTable; + off_t *trackOffsetTable; double* dataBuf; double* inBuf; double* l2normTable; @@ -193,6 +222,7 @@ unsigned sequenceLength; unsigned sequenceHop; bool normalizedDistance; + bool no_unit_norming; unsigned queryPoint; unsigned usingQueryPoint; unsigned usingTimes; @@ -202,13 +232,16 @@ unsigned port; double timesTol; double radius; - + bool query_from_key; + Uns32T query_from_key_index; bool use_absolute_threshold; double absolute_threshold; bool use_relative_threshold; double relative_threshold; - + ReporterBase* reporter; // track/point reporter + priority_queue, std::less >* exact_evaluation_queue; + // Timers struct timeval tv1; struct timeval tv2; @@ -223,20 +256,22 @@ void delete_arrays(int track, unsigned int numVectors, double **D, double **DD); void read_data(int track, double **data_buffer_p, size_t *data_buffer_size_p); void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp); + void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex); void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp); - void query_loop(const char* dbName, const char* inFile, Reporter *reporter); - + void query_loop(const char* dbName, Uns32T queryIndex); + void query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors); + double dot_product_points(double* q, double* p, Uns32T L); void initRNG(); void initDBHeader(const char *dbName); void initInputFile(const char *inFile); - void initTables(const char* dbName, const char* inFile); + void initTables(const char* dbName, const char* inFile = 0); + void initTablesFromKey(const char* dbName, const Uns32T queryIndex); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void insertPowerData(unsigned n, int powerfd, double *powerdata); unsigned getKeyPos(char* key); public: - audioDB(const unsigned argc, char* const argv[]); audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse); audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse); @@ -263,64 +298,119 @@ bool powers_acceptable(double p1, double p2); void dump(const char* dbName); - // web services + // LSH indexing parameters and data structures + LSH* lsh; + bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false) + bool lsh_use_u_functions; + bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH + double lsh_param_w; // Width of LSH hash-function bins + Uns32T lsh_param_k; // Number of independent hash functions + Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables + Uns32T lsh_param_N; // Number of rows per hash table + Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration + Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row + + // LSH vector<> containers for one in-core copy of a set of feature vectors + vector::iterator vi; // feature vector iterator + vector > *vv; // one-track's worth data + + // LSH indexing and retrieval methods + void index_index_db(const char* dbName); + void index_initialize(double**,double**,double**,double**,unsigned int*); + void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); + int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp); + Uns32T index_insert_shingles(vector >*, Uns32T trackID, double* spp); + void index_make_shingle(vector >*, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen); + int index_norm_shingles(vector >*, double* snp, double* spp); + int index_query_loop(const char* dbName, Uns32T queryIndex); + vector >* index_initialize_shingles(Uns32T sz); + int index_init_query(const char* dbName); + int index_exists(const char* dbName, double radius, Uns32T sequenceLength); + char* index_get_name(const char*dbName, double radius, Uns32T sequenceLength); + static void index_add_point_approximate(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method + static void index_add_point_exact(void* instance, Uns32T pointID, Uns32T qpos, float dist); // static point reporter callback method + static Uns32T index_to_trackID(Uns32T lshID); // Convert lsh point index to audioDB trackID + static Uns32T index_to_trackPos(Uns32T lshID); // Convert lsh point index to audioDB trackPos (spos) + static Uns32T index_from_trackInfo(Uns32T, Uns32T); // Convert audioDB trackID and trackPos to an lsh point index + void initialize_exact_evalutation_queue(); + void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos); + + // Web Services void startServer(); }; -#define O2_AUDIODB_INITIALIZERS \ - dim(0), \ - dbName(0), \ - inFile(0), \ - key(0), \ - trackFileName(0), \ - trackFile(0), \ - command(0), \ - output(0), \ - timesFileName(0), \ - timesFile(0), \ - powerFileName(0), \ - powerFile(0), \ - powerfd(0), \ - dbfid(0), \ - forWrite(false), \ - infid(0), \ - db(0), \ - indata(0), \ - dbH(0), \ - rng(0), \ - fileTable(0), \ - trackTable(0), \ - dataBuf(0), \ - l2normTable(0), \ - timesTable(0), \ - fileTableLength(0), \ - trackTableLength(0), \ - dataBufLength(0), \ - timesTableLength(0), \ - powerTableLength(0), \ - l2normTableLength(0), \ - verbosity(1), \ - nsamples(2000), \ - datasize(O2_DEFAULT_DATASIZE), \ - ntracks(O2_DEFAULT_NTRACKS), \ - datadim(O2_DEFAULT_DATADIM), \ - queryType(O2_POINT_QUERY), \ - pointNN(O2_DEFAULT_POINTNN), \ - trackNN(O2_DEFAULT_TRACKNN), \ - sequenceLength(16), \ - sequenceHop(1), \ - normalizedDistance(true), \ - queryPoint(0), \ - usingQueryPoint(0), \ - usingTimes(0), \ - usingPower(0), \ - isClient(0), \ - isServer(0), \ - port(0), \ - timesTol(0.1), \ - radius(0), \ - use_absolute_threshold(false), \ - absolute_threshold(0.0), \ - use_relative_threshold(false), \ - relative_threshold(0.0) +#define O2_AUDIODB_INITIALIZERS \ + dim(0), \ + dbName(0), \ + inFile(0), \ + key(0), \ + trackFileName(0), \ + trackFile(0), \ + command(0), \ + output(0), \ + timesFileName(0), \ + timesFile(0), \ + powerFileName(0), \ + powerFile(0), \ + powerfd(0), \ + dbfid(0), \ + lshfid(0), \ + forWrite(false), \ + infid(0), \ + db(0), \ + indata(0), \ + dbH(0), \ + rng(0), \ + fileTable(0), \ + trackTable(0), \ + trackOffsetTable(0), \ + dataBuf(0), \ + l2normTable(0), \ + timesTable(0), \ + fileTableLength(0), \ + trackTableLength(0), \ + dataBufLength(0), \ + timesTableLength(0), \ + powerTableLength(0), \ + l2normTableLength(0), \ + verbosity(1), \ + nsamples(2000), \ + datasize(O2_DEFAULT_DATASIZE), \ + ntracks(O2_DEFAULT_NTRACKS), \ + datadim(O2_DEFAULT_DATADIM), \ + queryType(O2_POINT_QUERY), \ + pointNN(O2_DEFAULT_POINTNN), \ + trackNN(O2_DEFAULT_TRACKNN), \ + sequenceLength(16), \ + sequenceHop(1), \ + normalizedDistance(true), \ + no_unit_norming(false), \ + queryPoint(0), \ + usingQueryPoint(0), \ + usingTimes(0), \ + usingPower(0), \ + isClient(0), \ + isServer(0), \ + port(0), \ + timesTol(0.1), \ + radius(0), \ + query_from_key(false), \ + query_from_key_index(O2_ERR_KEYNOTFOUND), \ + use_absolute_threshold(false), \ + absolute_threshold(0.0), \ + use_relative_threshold(false), \ + relative_threshold(0.0), \ + reporter(0), \ + exact_evaluation_queue(0), \ + lsh(0), \ + lsh_in_core(false), \ + lsh_use_u_functions(false), \ + lsh_exact(false), \ + lsh_param_k(0), \ + lsh_param_m(0), \ + lsh_param_N(0), \ + lsh_param_b(0), \ + lsh_param_ncols(0), \ + vv(0) +#endif diff -r 63ae0dfc1767 -r d9a88cfd4ab6 common.cpp --- a/common.cpp Tue Jul 22 20:09:31 2008 +0000 +++ b/common.cpp Tue Jul 29 22:01:17 2008 +0000 @@ -147,6 +147,14 @@ CHECKED_MMAP(double *, powerTable, dbH->powerTableOffset, powerTableLength); CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength); } + + // build track offset table + trackOffsetTable = new off_t[dbH->numFiles]; + Uns32T cumTrack=0; + for(Uns32T k = 0; k < dbH->numFiles; k++){ + trackOffsetTable[k] = cumTrack; + cumTrack += trackTable[k] * dbH->dim; + } } void audioDB::initInputFile (const char *inFile) { @@ -187,7 +195,7 @@ } } -void audioDB::initTables(const char* dbName, const char* inFile = 0) { +void audioDB::initTables(const char* dbName, const char* inFile) { /* FIXME: initRNG() really logically belongs in the audioDB contructor. However, there are of the order of four constructors at the moment, and more to come from API implementation. Given @@ -196,5 +204,7 @@ database will need an RNG. -- CSR, 2008-07-02 */ initRNG(); initDBHeader(dbName); - initInputFile(inFile); + if(inFile) + initInputFile(inFile); } + diff -r 63ae0dfc1767 -r d9a88cfd4ab6 gengetopt.in --- a/gengetopt.in Tue Jul 22 20:09:31 2008 +0000 +++ b/gengetopt.in Tue Jul 29 22:01:17 2008 +0000 @@ -23,7 +23,7 @@ option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional option "POWER" P "turn on power flag for database." dependon="database" optional - +option "INDEX" X "build an index for -d database at -R radius" dependon="database" dependon="radius" optional section "Database Information" sectiondesc="Information about databases." option "STATUS" S "output database information to stdout." dependon="database" optional @@ -37,7 +37,7 @@ option "features" f "binary series of vectors file {int sz:ieee double[][sz]:eof}." string typestr="filename" dependon="database" optional option "times" t "list of time points (ascii) for feature vectors." string typestr="filename" dependon="features" optional option "power" w "binary power feature file." string typestr="filename" dependon="database" optional -option "key" k "unique identifier associated with features." string typestr="identifier" dependon="features" optional +option "key" k "unique identifier associated with features." string typestr="identifier" optional text "" option "BATCHINSERT" B "add feature vectors named in a --featureList file (with optional keys in a --keyList file) to the named database." dependon="featureList" optional option "featureList" F "text file containing list of binary feature vector files to process, one per track" string typestr="filename" dependon="database" optional @@ -45,12 +45,9 @@ option "powerList" W "text file containing list of binary power feature file." string typestr="filename" dependon="database" optional option "keyList" K "text file containing list of unique identifiers associated with --features." string typestr="filename" optional -option "sequencelength" l "length of sequences for sequence search." int typestr="length" default="16" optional +section "Database Search" sectiondesc="These commands control the retrieval behaviour.\n" - -section "Database Search" sectiondesc="Thse commands control the retrieval behaviour.\n" - -option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence","nsequence","onetoonensequence" typestr="searchtype" dependon="database" dependon="features" optional +option "QUERY" Q "content-based search on --database using --features as a query. Optionally restrict the search to those tracks identified in a --keyList." values="point","track","sequence","nsequence","onetoonensequence" typestr="searchtype" dependon="database" optional option "qpoint" p "ordinal position of query start point in --features file." int typestr="position" default="0" optional option "exhaustive" e "exhaustive search: iterate through all query vectors in search. Overrides --qpoint." flag off optional hidden option "pointnn" n "number of point nearest neighbours to use in retrieval." int typestr="numpoints" default="10" optional @@ -58,15 +55,33 @@ option "expandfactor" x "time compress/expand factor of result length to query length [1.0 .. 100.0]." double default="1.1" optional hidden option "rotate" o "rotate query vectors for rotationally invariant search." flag off optional hidden option "resultlength" r "maximum length of the result list." int typestr="length" default="10" optional +option "sequencelength" l "length of sequences for sequence search." int typestr="length" default="16" optional option "sequencehop" h "hop size of sequence window for sequence search." int typestr="hop" default="1" dependon="QUERY" optional -option "absolute-threshold" - "absolute power threshold for consideration of query or target sequence (in Bels)" double dependon="QUERY" optional +option "absolute-threshold" - "absolute power threshold for consideration of query or target sequence (in Bels)" double optional option "relative-threshold" - "relative power threshold between query and target sequence (in Bels)" double dependon="QUERY" optional +section "Locality-sensitive hashing (LSH) parameters" sectiondesc="These parameters control LSH indexing and retrieval\n" + +option "lsh_w" - "width of LSH hash-function bins. " double default="4.0" dependon="INDEX" optional hidden +option "lsh_k" - "even number of independent hash functions to employ with LSH" int typestr="size" default="8" dependon="INDEX" optional +option "lsh_m" - "number of hash tables is m(m-1)/2" int typestr="size" default="5" dependon="INDEX" optional +option "lsh_N" - "number of rows per hash tables" int typestr="size" default="100000" dependon="INDEX" optional +option "lsh_b" - "number of tracks per indexing iteration" int typestr="size" default="500" dependon="INDEX" optional +option "lsh_ncols" - "number of columns (collisions) to allocate in LSH serialization" int typestr="size" default="250" dependon="INDEX" optional +option "lsh_exact" - "use exact evaluation of points retrieved by LSH." flag off dependon="QUERY" optional +option "lsh_inCore" - "LSH hash tables resident in core (INDEX/QUERY)" flag on optional +option "lsh_use_u_functions" - "use m independent hash functions combinatorically to approximate L independent hash functions." flag off optional + +section "Normalization control parameters" sectiondesc="These parameters control the normalization of feaures at query time\n" + +option "no_unit_norming" - "do not unit norm features when querying an L2Norm databases." flag off optional + section "Web Services" sectiondesc="These commands enable the database process to establish a connection via the internet and operate as separate client and server processes.\n" + option "SERVER" s "run as standalone web service on named port." int typestr="port" default="14475" optional option "client" c "run as a client using named host service." string typestr="hostname:port" optional text " Copyright (c) 2007 Michael Casey, Christophe Rhodes - Goldsmiths, University of London" + Goldsmiths, University of London" diff -r 63ae0dfc1767 -r d9a88cfd4ab6 index.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/index.cpp Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,504 @@ +// LSH indexing +// +// Construct a persistent LSH table structure +// Store at the same location as dbName +// Naming convention: +// dbName.lsh.${radius}.${sequenceLength} +// +// +// Author: Michael Casey +// Date: 23 June 2008 + +#include "audioDB.h" +#include "ReporterBase.h" + + +/************************* LSH point index to audioDB conversion *****************/ +Uns32T audioDB::index_to_trackID(Uns32T lshID){ + return lshID>>LSH_N_POINT_BITS; +} + +Uns32T audioDB::index_to_trackPos(Uns32T lshID){ + return lshID&LSH_POINT_MASK; +} + +Uns32T audioDB::index_from_trackInfo(Uns32T trackID, Uns32T spos){ + return (trackID << LSH_N_POINT_BITS) | spos; +} + +/************************* LSH indexing and query initialization *****************/ + +char* audioDB::index_get_name(const char*dbName, double radius, Uns32T sequenceLength){ + char* indexName = new char[MAXSTR]; + // Attempt to make new file + if(strlen(dbName) > (MAXSTR - 32)) + error("dbName is too long for LSH index filename appendages"); + strncpy(indexName, dbName, MAXSTR); + sprintf(indexName+strlen(dbName), ".lsh.%019.9f.%d", radius, sequenceLength); + return indexName; +} + +// return true if index exists else return false +int audioDB::index_exists(const char* dbName, double radius, Uns32T sequenceLength){ + // Test to see if file exists + char* indexName = index_get_name(dbName, radius, sequenceLength); + lshfid = open (indexName, O_RDONLY); + delete[] indexName; + close(lshfid); + + if(lshfid<0) + return false; + else + return true; +} + +vector >* audioDB::index_initialize_shingles(Uns32T sz){ + if(vv) + delete vv; + vv = new vector >(sz); + for(Uns32T i=0 ; i < sz ; i++) + (*vv)[i]=vector(dbH->dim*sequenceLength); // allocate shingle storage + return vv; +} + +/******************** LSH indexing audioDB database access forall s \in {S} ***********************/ + +// Prepare the AudioDB database for read access and allocate auxillary memory +void audioDB::index_initialize(double **snp, double **vsnp, double **spp, double **vspp, Uns32T *dvp) { + *dvp = dbH->length / (dbH->dim * sizeof(double)); // number of database vectors + *snp = new double[*dvp]; // songs norm pointer: L2 norm table for each vector + + double *snpp = *snp, *sppp = 0; + memcpy(*snp, l2normTable, *dvp * sizeof(double)); + + if (!(dbH->flags & O2_FLAG_POWER)) { + error("database not power-enabled", dbName); + } + *spp = new double[*dvp]; // song powertable pointer + sppp = *spp; + memcpy(*spp, powerTable, *dvp * sizeof(double)); + + for(Uns32T i = 0; i < dbH->numFiles; i++){ + if(trackTable[i] >= sequenceLength) { + sequence_sum(snpp, trackTable[i], sequenceLength); + sequence_sqrt(snpp, trackTable[i], sequenceLength); + + sequence_sum(sppp, trackTable[i], sequenceLength); + sequence_average(sppp, trackTable[i], sequenceLength); + } + snpp += trackTable[i]; + sppp += trackTable[i]; + } + + *vsnp = *snp; + *vspp = *spp; + + // Move the feature vector read pointer to start of fetures in database + lseek(dbfid, dbH->dataOffset, SEEK_SET); +} + + +/************************ LSH indexing ***********************************/ +void audioDB::index_index_db(const char* dbName){ + + char* newIndexName; + double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0; + Uns32T dbVectors = 0; + + printf("INDEX: initializing header\n"); + // Check if audioDB exists, initialize header and open database for read + forWrite = false; + initDBHeader(dbName); + + newIndexName = index_get_name(dbName, radius, sequenceLength); + + // Set unit norming flag override + audioDB::normalizedDistance = !audioDB::no_unit_norming; + + printf("INDEX: dim %d\n", dbH->dim); + printf("INDEX: R %f\n", radius); + printf("INDEX: seqlen %d\n", sequenceLength); + printf("INDEX: lsh_w %f\n", lsh_param_w); + printf("INDEX: lsh_k %d\n", lsh_param_k); + printf("INDEX: lsh_m %d\n", lsh_param_m); + printf("INDEX: lsh_N %d\n", lsh_param_N); + printf("INDEX: lsh_b %d\n", lsh_param_b); + printf("INDEX: normalized? %s\n", normalizedDistance?"true":"false"); + fflush(stdout); + + + index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors); + + if((lshfid = open(newIndexName,O_RDONLY))<0){ + printf("INDEX: constructing new LSH index\n"); + printf("INDEX: making index file %s\n", newIndexName); + fflush(stdout); + // Construct new LSH index + lsh = new LSH((float)lsh_param_w, lsh_param_k, + lsh_param_m, + (Uns32T)(sequenceLength*dbH->dim), + lsh_param_N, + lsh_param_ncols, + (float)radius); + assert(lsh); + + Uns32T endTrack = lsh_param_b; + if( endTrack > dbH->numFiles) + endTrack = dbH->numFiles; + // Insert up to lsh_param_b tracks + index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); + lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); + + // Clean up + delete lsh; + close(lshfid); + } + + // Attempt to open LSH file + if((lshfid = open(newIndexName,O_RDONLY))>0){ + printf("INDEX: merging with existing LSH index\n"); + fflush(stdout); + + // Get the lsh header info and find how many tracks are inserted already + lsh = new LSH(newIndexName, false); // lshInCore=false to avoid loading hashTables here + assert(lsh); + Uns32T maxs = index_to_trackID(lsh->get_maxp())+1; + delete lsh; + + // This allows for updating index after more tracks are inserted into audioDB + for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){ + + Uns32T endTrack = startTrack + lsh_param_b; + if( endTrack > dbH->numFiles) + endTrack = dbH->numFiles; + printf("Indexing track range: %d - %d\n", startTrack, endTrack); + fflush(stdout); + lsh = new LSH(newIndexName, lsh_in_core); // Initialize core memory for LSH tables + assert(lsh); + + // Insert up to lsh_param_b database tracks + index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr); + + // Serialize to file + lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk + delete lsh; + } + + close(lshfid); + printf("INDEX: done constructing LSH index.\n"); + fflush(stdout); + + } + else{ + error("Something's wrong with LSH index file"); + exit(1); + } + + + delete[] newIndexName; + if(sNorm) + delete[] sNorm; + if(sPower) + delete[] sPower; + + +} + +void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track, + double** fvpp, double** sNormpp,double** snPtrp, + double** sPowerp, double** spPtrp){ + size_t nfv = 0; + double* fvp = 0; // Keep pointer for memory allocation and free() for track data + Uns32T trackID = 0; + + VERB_LOG(1, "indexing tracks..."); + + + for(trackID = start_track ; trackID < end_track ; trackID++ ){ + read_data(trackID, &fvp, &nfv); // over-writes fvp and nfv + *fvpp = fvp; // Protect memory allocation and free() for track data + if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp)) + break; + } + std::cout << "finished inserting." << endl; +} + +int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){ + // Loop over the current input track's vectors + Uns32T numVecs = (trackTable[trackID]>O2_MAXTRACKLEN?O2_MAXTRACKLEN:trackTable[trackID]) - sequenceLength + 1 ; + vv = index_initialize_shingles(numVecs); + + for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ ) + index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength); + + Uns32T numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp); + Uns32T collisionCount = index_insert_shingles(vv, trackID, *sppp); + float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0; + + /* index_norm_shingles() only goes as far as the end of the + sequence, which is right, but the space allocated is for the + whole track. */ + + /* But numVecs will be O2_MAXTRACKLEN + * So let's be certain the pointers are in the correct place + */ + + *snpp += trackTable[trackID]; + *sppp += trackTable[trackID]; + *fvpp += trackTable[trackID] * dbH->dim; + + std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl; + std::cout.flush(); + return true; +} + +Uns32T audioDB::index_insert_shingles(vector >* vv, Uns32T trackID, double* spp){ + Uns32T collisionCount = 0; + cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE; + for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID++) + if(!use_absolute_threshold || (use_absolute_threshold && (*spp++ >= absolute_threshold))) + collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID)); + return collisionCount; +} + +/********************* LSH shingle construction ***************************/ + +// Construct shingles out of a feature matrix +// inputs: +// idx is vector index in feature matrix +// fvp is base feature matrix pointer double* [numVecs x dbH->dim] +// +// pre-conditions: +// dbH->dim +// sequenceLength +// idx < numVectors - sequenceLength + 1 +// +// post-conditions: +// (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values + +void audioDB::index_make_shingle(vector >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){ + assert(idx<(*vv).size()); + vector::iterator ve = (*vv)[idx].end(); + vi=(*vv)[idx].begin(); // shingle iterator + // First feature vector in shingle + if(idx==0){ + while(vi!=ve) + *vi++ = (float)(*fvp++); + } + // Not first feature vector in shingle + else{ + vector::iterator ui=(*vv)[idx-1].begin() + dim; // previous shingle iterator + // Previous seqLen-1 dim-vectors + while(vi!=ve-dim) + *vi++=*ui++; + // Move data pointer to next feature vector + fvp += ( seqLen + idx - 1 ) * dim ; + // New d-vector + while(vi!=ve) + *vi++ = (float)(*fvp++); + } +} + +// norm shingles +// in-place norming, no deletions +// If using power, return number of shingles above power threshold +int audioDB::index_norm_shingles(vector >* vv, double* snp, double* spp){ + int z = 0; // number of above-threshold shingles + float l2norm; + double power; + float oneOverRadius = 1./(float)sqrt(radius); // Passed radius is really radius^2 + float oneOverSqrtl2NormDivRad = oneOverRadius; + if(!spp) + error("LSH indexing and query requires a power feature using -w or -W"); + Uns32T shingleSize = sequenceLength*dbH->dim; + for(Uns32T a=0; a<(*vv).size(); a++){ + l2norm = (float)(*snp++); + if(audioDB::normalizedDistance) + oneOverSqrtl2NormDivRad = (1./l2norm)*oneOverRadius; + + for(Uns32T b=0; b < shingleSize ; b++) + (*vv)[a][b]*=oneOverSqrtl2NormDivRad; + + power = *spp++; + if(use_absolute_threshold){ + if ( power >= absolute_threshold ) + z++; + } + else + z++; + } + return z; +} + + +/*********************** LSH retrieval ****************************/ + + +// return true if indexed query performed else return false +int audioDB::index_init_query(const char* dbName){ + + if(!(index_exists(dbName, radius, sequenceLength))) + return false; + + char* indexName = index_get_name(dbName, radius, sequenceLength); + + // Test to see if file exists + if((lshfid = open (indexName, O_RDONLY)) < 0){ + delete[] indexName; + return false; + } + + printf("INDEX: initializing header\n"); + + lsh = new LSH(indexName, false); // Get the header only here + assert(lsh); + sequenceLength = lsh->get_lshHeader()->dataDim / dbH->dim; // shingleDim / vectorDim + + + if( fabs(radius - lsh->get_radius())>fabs(O2_DISTANCE_TOLERANCE)) + printf("*** Warning: adb_radius (%f) != lsh_radius (%f) ***\n", radius, lsh->get_radius()); + + printf("INDEX: dim %d\n", dbH->dim); + printf("INDEX: R %f\n", lsh->get_radius()); + printf("INDEX: seqlen %d\n", sequenceLength); + printf("INDEX: w %f\n", lsh->get_lshHeader()->get_binWidth()); + printf("INDEX: k %d\n", lsh->get_lshHeader()->get_numFuns()); + printf("INDEX: L (m*(m-1))/2 %d\n", lsh->get_lshHeader()->get_numTables()); + printf("INDEX: N %d\n", lsh->get_lshHeader()->get_numRows()); + printf("INDEX: s %d\n", index_to_trackID(lsh->get_maxp())); + printf("INDEX: Opened LSH index file %s\n", indexName); + fflush(stdout); + + // Check to see if we are loading hash tables into core, and do so if true + if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core){ + printf("INDEX: loading hash tables into core %s\n", (lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2)?"FORMAT2":"FORMAT1"); + delete lsh; + lsh = new LSH(indexName, true); + } + + delete[] indexName; + return true; +} + +// *Static* approximate NN point reporter callback method for lshlib +void audioDB::index_add_point_approximate(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ + assert(instancePtr); // We need an instance for this callback + audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance + Uns32T trackID = index_to_trackID(pointID); + Uns32T spos = index_to_trackPos(pointID); + // Skip identity in query_from_key + if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) + myself->reporter->add_point(trackID, qpos, spos, dist); +} + +// *Static* exact NN point reporter callback method for lshlib +// Maintain a queue of points to pass to query_points() for exact evaluation +void audioDB::index_add_point_exact(void* instancePtr, Uns32T pointID, Uns32T qpos, float dist){ + assert(instancePtr); // We need an instance for this callback + audioDB* myself = (audioDB*) instancePtr; // Use explicit cast to recover "this" instance + Uns32T trackID = index_to_trackID(pointID); + Uns32T spos = index_to_trackPos(pointID); + // Skip identity in query_from_key + if( !myself->query_from_key || (myself->query_from_key && ( trackID != myself->query_from_key_index )) ) + myself->index_insert_exact_evaluation_queue(trackID, qpos, spos); +} + +void audioDB::initialize_exact_evalutation_queue(){ + if(exact_evaluation_queue) + delete exact_evaluation_queue; + exact_evaluation_queue = new priority_queue, std::less >; +} + +void audioDB::index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos){ + PointPair p(trackID, qpos, spos); + exact_evaluation_queue->push(p); +} + +// return 0: if index does not exist +// return nqv: if index exists +int audioDB::index_query_loop(const char* dbName, Uns32T queryIndex) { + + unsigned int numVectors; + double *query, *query_data; + double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; + double meanQdur; + void (*add_point_func)(void*,Uns32T,Uns32T,float); + + // Set the point-reporter callback based on the value of lsh_exact + if(lsh_exact){ + initialize_exact_evalutation_queue(); + add_point_func = &index_add_point_exact; + } + else + add_point_func = &index_add_point_approximate; + + if(!index_init_query(dbName)) // sets-up LSH index structures for querying + return 0; + + char* database = index_get_name(dbName, radius, sequenceLength); + + if(query_from_key) + set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); + else + set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); // get query vectors + + VERB_LOG(1, "retrieving tracks..."); + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + gettimeofday(&tv1, NULL); + // query vector index + Uns32T Nq = (numVectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:numVectors) - sequenceLength + 1; + vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles + cout << "Nq=" << Nq; cout.flush(); + // Construct shingles from query features + for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ ) + index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength); + + // Normalize query vectors + Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qnPtr, qpPtr ); + cout << " Nq'=" << numVecsAboveThreshold << endl; cout.flush(); + + // Nq contains number of inspected points in query file, + // numVecsAboveThreshold is number of points with power >= absolute_threshold + double* qpp = qpPtr; // Keep original qpPtr for possible exact evaluation + if(usingQueryPoint && numVecsAboveThreshold){ + if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) + lsh->retrieve_point((*vv)[0], queryPoint, add_point_func, (void*)this); + else + lsh->serial_retrieve_point(database, (*vv)[0], queryPoint, add_point_func, (void*)this); + } + else if(numVecsAboveThreshold) + for( Uns32T pointID = 0 ; pointID < Nq; pointID++ ) + if(!use_absolute_threshold || (use_absolute_threshold && (*qpp++ >= absolute_threshold))) + if((lsh->get_lshHeader()->flags&O2_SERIAL_FILEFORMAT2) || lsh_in_core) + lsh->retrieve_point((*vv)[pointID], pointID, add_point_func, (void*)this); + else + lsh->serial_retrieve_point(database, (*vv)[pointID], pointID, add_point_func, (void*)this); + + if(lsh_exact) + // Perform exact distance computation on point pairs in exact_evaluation_queue + query_loop_points(query, qnPtr, qpPtr, meanQdur, numVectors); + + gettimeofday(&tv2,NULL); + VERB_LOG(1,"elapsed time: %ld msec\n", + (tv2.tv_sec*1000 + tv2.tv_usec/1000) - + (tv1.tv_sec*1000 + tv1.tv_usec/1000)) + + // Close the index file + close(lshfid); + + // Clean up + if(query_data) + delete[] query_data; + if(qNorm) + delete[] qNorm; + if(qPower) + delete[] qPower; + if(database) + delete[] database; + + return Nq; +} + diff -r 63ae0dfc1767 -r d9a88cfd4ab6 lshlib.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lshlib.cpp Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,1429 @@ +#include "lshlib.h" + +//#define __LSH_DUMP_CORE_TABLES__ +//#define USE_U_FUNCTIONS +//#define LSH_BLOCK_FULL_ROWS + +void err(char*s){cout << s << endl;exit(2);} + +Uns32T get_page_logn(){ + int pagesz = (int)sysconf(_SC_PAGESIZE); + return (Uns32T)log2((double)pagesz); +} + +unsigned align_up(unsigned x, unsigned w){ return ((x) + ((1<4000000000UL) + error("Maximum size of LSH file exceded: 12*L*N*C > 4000MB"); + else if(((unsigned long long)N*C*sizeof(SerialElementT))>1000000000UL) + cout << "warning: hash tables exceed 1000MB." << endl; + + if(m<2){ + m=2; + L=1; // check value of L + cout << "warning: setting m=2, L=1" << endl; + } + if(use_u_functions && k%2){ + k++; // make sure k is even + cout << "warning: setting k even" << endl; + } + __initialize_data_structures(); + for(j=0; j> 32); + if (h >= UH_PRIME_DEFAULT) { + h = h - UH_PRIME_DEFAULT; + } + CR_ASSERT(h < UH_PRIME_DEFAULT); + } + return h; +} + +Uns32T H::bucket_insert_point(bucket **pp){ + Uns32T collisionCount = 0; + if(!*pp){ + *pp = new bucket(); +#ifdef LSH_BLOCK_FULL_ROWS + (*pp)->t2 = 0; // Use t2 as a collision counter for the row + (*pp)->next = new bucket(); +#endif + } +#ifdef LSH_BLOCK_FULL_ROWS + collisionCount = (*pp)->t2; + if(collisionCount < H::C){ // Block if row is full + (*pp)->t2++; // Increment collision counter + pointCount++; + collisionCount++; + __bucket_insert_point((*pp)->next); // First bucket holds collision count + } +#else + pointCount++; + __bucket_insert_point(*pp); // No collision count storage +#endif + return collisionCount; +} + +void H::__bucket_insert_point(bucket* p){ + if(p->t2 == IFLAG){ // initialization flag, is it in the domain of t2? + p->t2 = H::t2; + bucketCount++; // Record start of new point-locale collision chain + p->snext = new sbucket(); + __sbucket_insert_point(p->snext); + return; + } + + if(p->t2 == H::t2){ + __sbucket_insert_point(p->snext); + return; + } + + if(p->next){ + __bucket_insert_point(p->next); + } + + else{ + p->next = new bucket(); + __bucket_insert_point(p->next); + } + +} + +void H::__sbucket_insert_point(sbucket* p){ + if(p->pointID==IFLAG){ + p->pointID = H::p; + return; + } + + // Search for pointID + if(p->snext){ + __sbucket_insert_point(p->snext); + } + else{ + // Make new point collision bucket at end of list + p->snext = new sbucket(); + __sbucket_insert_point(p->snext); + } +} + +inline bucket** H::__get_bucket(int j){ + return *(h+j); +} + +// hash functions G +G::G(float ww, Uns32T kk,Uns32T mm, Uns32T dd, Uns32T NN, Uns32T CC, float r): + H(kk,mm,dd,NN,CC), + w(ww), + radius(r), + maxp(0), + calling_instance(0), + add_point_callback(0), + lshHeader(0) +{ + Uns32T j; +#ifdef USE_U_FUNCTIONS + G::A = new float**[ H::m ]; // m x k x d random projectors + G::b = new float*[ H::m ]; // m x k random biases +#else + G::A = new float**[ H::L ]; // m x k x d random projectors + G::b = new float*[ H::L ]; // m x k random biases +#endif + G::g = new Uns32T*[ H::L ]; // L x k random projections + assert( G::g && G::A && G::b ); // failure +#ifdef USE_U_FUNCTIONS + // Use m \times u_i functions \in R^{(k/2) \times (d)} + // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d} + for( j = 0; j < H::m ; j++ ){ // m functions u_i(v) + G::A[j] = new float*[ H::k/2 ]; // k/2 x d 2-stable distribution coefficients + G::b[j] = new float[ H::k/2 ]; // bias + assert( G::A[j] && G::b[j] ); // failure + for( kk = 0; kk < H::k/2 ; kk++ ){ + G::A[j][kk] = new float[ H::d ]; + assert( G::A[j][kk] ); // failure + for(Uns32T i = 0 ; i < H::d ; i++ ) + G::A[j][kk][i] = randn(); // Normal + G::b[j][kk] = ranf()*G::w; // Uniform + } + } +#else + // Use m \times u_i functions \in R^{k \times (d)} + // Combine to make L=m(m-1)/2 hash functions \in R^{k \times d} + for( j = 0; j < H::L ; j++ ){ // m functions u_i(v) + G::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients + G::b[j] = new float[ H::k ]; // bias + assert( G::A[j] && G::b[j] ); // failure + for( kk = 0; kk < H::k ; kk++ ){ + G::A[j][kk] = new float[ H::d ]; + assert( G::A[j][kk] ); // failure + for(Uns32T i = 0 ; i < H::d ; i++ ) + G::A[j][kk][i] = randn(); // Normal + G::b[j][kk] = ranf()*G::w; // Uniform + } + } +#endif + + for( j = 0 ; j < H::L ; j++ ){ // L functions g_j(u_a, u_b) a,b \in nchoosek(m,2) + G::g[j] = new Uns32T[ H::k ]; // k x 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z + assert( G::g[j] ); + } + + initialize_partial_functions(); // m partially evaluated hash functions +} + +// Serialize from file LSH constructor +// Read parameters from database file +// Load the hash functions, close the database +// Optionally load the LSH tables into head-allocated lists in core +G::G(char* filename, bool lshInCoreFlag): + calling_instance(0), + add_point_callback(0) +{ + int dbfid = unserialize_lsh_header(filename); + unserialize_lsh_functions(dbfid); + initialize_partial_functions(); + + // Format1 only needs unserializing if specifically requested + if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2) && lshInCoreFlag){ + unserialize_lsh_hashtables_format1(dbfid); + } + + // Format2 always needs unserializing + if(lshHeader->flags&O2_SERIAL_FILEFORMAT2 && lshInCoreFlag){ + unserialize_lsh_hashtables_format2(dbfid); + } + + close(dbfid); +} + +void G::initialize_partial_functions(){ + +#ifdef USE_U_FUNCTIONS + uu = vector >(H::m); + for( Uns32T aa=0 ; aa < H::m ; aa++ ) + uu[aa] = vector( H::k/2 ); +#else + uu = vector >(H::L); + for( Uns32T aa=0 ; aa < H::L ; aa++ ) + uu[aa] = vector( H::k ); +#endif +} + + +// Generate z ~ N(0,1) +float G::randn(){ +// Box-Muller + float x1, x2; + do{ + x1 = ranf(); + } while (x1 == 0); // cannot take log of 0 + x2 = ranf(); + float z; + z = sqrtf(-2.0 * logf(x1)) * cosf(2.0 * M_PI * x2); + return z; +} + +float G::ranf(){ +#ifdef MT19937 + return (float) genrand_real2(); +#else + return (float)( (double)rand() / ((double)(RAND_MAX)+(double)(1)) ); +#endif +} + +// range is 1..2^29 +/* FIXME: that looks like an ... odd range. Still. */ +Uns32T H::__randr(){ +#ifdef MT19937 + return (Uns32T)((genrand_int32() >> 3) + 1); +#else + return (Uns32T) ((rand() >> 2) + 1); +#endif +} + +G::~G(){ + Uns32T j,kk; +#ifdef USE_U_FUNCTIONS + for( j = 0 ; j < H::m ; j++ ){ + for( kk = 0 ; kk < H::k/2 ; kk++ ) + delete[] A[j][kk]; + delete[] A[j]; + } + delete[] A; + for( j = 0 ; j < H::m ; j++ ) + delete[] b[j]; + delete[] b; +#else + for( j = 0 ; j < H::L ; j++ ){ + for( kk = 0 ; kk < H::k ; kk++ ) + delete[] A[j][kk]; + delete[] A[j]; + } + delete[] A; + for( j = 0 ; j < H::L ; j++ ) + delete[] b[j]; + delete[] b; +#endif + + for( j = 0 ; j < H::L ; j++ ) + delete[] g[j]; + delete[] g; + delete lshHeader; +} + +// Compute all hash functions for vector v +// #ifdef USE_U_FUNCTIONS use Combination of m \times h_i \in R^{(k/2) \times d} +// to make L \times g_j functions \in Z^k +void G::compute_hash_functions(vector& v){ // v \in R^d + float iw = 1. / G::w; // hash bucket width + Uns32T aa, kk; + if( v.size() != H::d ) + error("v.size != H::d","","compute_hash_functions"); // check input vector dimensionality + double tmp = 0; + float *pA, *pb; + Uns32T *pg; + int dd; + vector::iterator vi; + vector::iterator ui; + +#ifdef USE_U_FUNCTIONS + Uns32T bb; + // Store m dot products to expand + for( aa=0; aa < H::m ; aa++ ){ + ui = uu[aa].begin(); + for( kk = 0 ; kk < H::k/2 ; kk++ ){ + pb = *( G::b + aa ) + kk; + pA = * ( * ( G::A + aa ) + kk ); + dd = H::d; + tmp = 0.; + vi = v.begin(); + while( dd-- ) + tmp += *pA++ * *vi++; // project + tmp += *pb; // translate + tmp *= iw; // scale + *ui++ = (Uns32T) floor(tmp); // floor + } + } + // Binomial combinations of functions u_{a,b} \in Z^{(k/2) \times d} + Uns32T j; + for( aa=0, j=0 ; aa < H::m-1 ; aa++ ) + for( bb = aa + 1 ; bb < H::m ; bb++, j++ ){ + pg= *( G::g + j ); // L \times functions g_j(v) \in Z^k + // u_1 \in Z^{(k/2) \times d} + ui = uu[aa].begin(); + kk=H::k/2; + while( kk-- ) + *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... x(k/2)]; xk \in Z + // u_2 \in Z^{(k/2) \times d} + ui = uu[bb].begin(); + kk=H::k/2; + while( kk--) + *pg++ = *ui++; // hash function g_j(v)=[x(k/2+1) x(k/2+2) ... xk]; xk \in Z + } +#else + for( aa=0; aa < H::L ; aa++ ){ + ui = uu[aa].begin(); + for( kk = 0 ; kk < H::k ; kk++ ){ + pb = *( G::b + aa ) + kk; + pA = * ( * ( G::A + aa ) + kk ); + dd = H::d; + tmp = 0.; + vi = v.begin(); + while( dd-- ) + tmp += *pA++ * *vi++; // project + tmp += *pb; // translate + tmp *= iw; // scale + *ui++ = (Uns32T) (floor(tmp)); // floor + } + } + // Compute hash functions + for( aa=0 ; aa < H::L ; aa++ ){ + pg= *( G::g + aa ); // L \times functions g_j(v) \in Z^k + // u_1 \in Z^{k \times d} + ui = uu[aa].begin(); + kk=H::k; + while( kk-- ) + *pg++ = *ui++; // hash function g_j(v)=[x1 x2 ... xk]; xk \in Z + } +#endif + +} + + +// single point insertion; inserted values are hash value and pointID +Uns32T G::insert_point(vector& v, Uns32T pp){ + Uns32T collisionCount = 0; + H::p = pp; + if(pp>G::maxp) + G::maxp=pp; // Store highest pointID in database + compute_hash_functions( v ); + for(Uns32T j = 0 ; j < H::L ; j++ ){ // insertion + __generate_hash_keys( *( G::g + j ), *( H::r1 + j ), *( H::r2 + j ) ); + collisionCount += bucket_insert_point( *(h + j) + t1 ); + } + return collisionCount; +} + + +// batch insert for a point set +// inserted values are vector hash value and pointID starting at basePointID +void G::insert_point_set(vector >& vv, Uns32T basePointID){ + for(Uns32T point=0; point& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){ + calling_instance = caller; + add_point_callback = add_point; + compute_hash_functions( v ); + for(Uns32T j = 0 ; j < H::L ; j++ ){ + __generate_hash_keys( *( G::g + j ), *( H::r1 + j ), *( H::r2 + j ) ); + if( bucket* bPtr = *(__get_bucket(j) + get_t1()) ) +#ifdef LSH_BLOCK_FULL_ROWS + bucket_chain_point( bPtr->next, qpos); +#else + bucket_chain_point( bPtr , qpos); +#endif + } +} + +void G::retrieve_point_set(vector >& vv, ReporterCallbackPtr add_point, void* caller){ + for(Uns32T qpos = 0 ; qpos < vv.size() ; qpos++ ) + retrieve_point(vv[qpos], qpos, add_point, caller); +} + +// export lsh tables to table structure on disk +// +// LSH TABLE STRUCTURE +// ---header 64 bytes --- +// [magic #tables #rows #cols elementSize databaseSize version flags dim #funs 0 0 0 0 0 0] +// +// ---random projections L x k x d float --- +// A[0][0][0] A[0][0][1] ... A[0][0][d-1] +// A[0][1][0] A[0][1][1] ... A[1][1][d-1] +// ... +// A[0][K-1][0] A[0][1][1] ... A[0][k-1][d-1] +// ... +// ... +// A[L-1][0][0] A[M-1][0][1] ... A[L-1][0][d-1] +// A[L-1][1][0] A[M-1][1][1] ... A[L-1][1][d-1] +// ... +// A[L-1][k-1][0] A[M-1][1][1] ... A[L-1][k-1][d-1] +// +// ---bias L x k float --- +// b[0][0] b[0][1] ... b[0][k-1] +// b[1][0] b[1][1] ... b[1][k-1] +// ... +// b[L-1][0] b[L-1][1] ... b[L-1][k-1] +// +// ---random r1 L x k float --- +// r1[0][0] r1[0][1] ... r1[0][k-1] +// r1[1][0] r1[1][1] ... r1[1][k-1] +// ... +// r1[L-1][0] r1[L-1][1] ... r1[L-1][k-1] +// +// ---random r2 L x k float --- +// r2[0][0] r2[0][1] ... r2[0][k-1] +// r2[1][0] r2[1][1] ... r2[1][k-1] +// ... +// r2[L-1][0] r2[L-1][1] ... r2[L-1][k-1] +// +// ---hash table 0: N x C x 8 --- +// [t2 pointID][t2 pointID]...[t2 pointID] +// [t2 pointID][t2 pointID]...[t2 pointID] +// ... +// [t2 pointID][t2 pointID]...[t2 pointID] +// +// ---hash table 1: N x C x 8 --- +// [t2 pointID][t2 pointID]...[t2 pointID] +// [t2 pointID][t2 pointID]...[t2 pointID] +// ... +// [t2 pointID][t2 pointID]...[t2 pointID] +// +// ... +// +// ---hash table L-1: N x C x 8 --- +// [t2 pointID][t2 pointID]...[t2 pointID] +// [t2 pointID][t2 pointID]...[t2 pointID] +// ... +// [t2 pointID][t2 pointID]...[t2 pointID] +// + +// Serial header constructors +SerialHeader::SerialHeader(){;} +SerialHeader::SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float r, Uns32T p, Uns32T FMT): + lshMagic(O2_SERIAL_MAGIC), + binWidth(W), + numTables(L), + numRows(N), + numCols(C), + elementSize(O2_SERIAL_ELEMENT_SIZE), + version(O2_SERIAL_VERSION), + size(L * align_up(N * C * O2_SERIAL_ELEMENT_SIZE, get_page_logn()) // hash tables + + align_up(O2_SERIAL_HEADER_SIZE + // header + hash functions + L*k*( sizeof(float)*d+2*sizeof(Uns32T)+sizeof(float)),get_page_logn())), + flags(FMT), + dataDim(d), + numFuns(k), + radius(r), + maxp(p){;} // header + +float* G::get_serial_hashfunction_base(char* db){ + if(db&&lshHeader) + return (float*)(db+O2_SERIAL_HEADER_SIZE); + else return NULL; +} + +SerialElementT* G::get_serial_hashtable_base(char* db){ + if(db&&lshHeader) + return (SerialElementT*)(db+get_serial_hashtable_offset()); + else + return NULL; +} + +Uns32T G::get_serial_hashtable_offset(){ + if(lshHeader) + return align_up(O2_SERIAL_HEADER_SIZE + + L*lshHeader->numFuns*( sizeof(float)*lshHeader->dataDim+2*sizeof(Uns32T)+sizeof(float)),get_page_logn()); + else + return 0; +} + +void G::serialize(char* filename, Uns32T serialFormat){ + int dbfid; + char* db; + int dbIsNew=0; + + // Check requested serialFormat + if(!(serialFormat==O2_SERIAL_FILEFORMAT1 || serialFormat==O2_SERIAL_FILEFORMAT2)) + error("Unrecognized serial file format request: ", "serialize()"); + + // Test to see if file exists + if((dbfid = open (filename, O_RDONLY)) < 0) + // If it doesn't, then create the file (CREATE) + if(errno == ENOENT){ + // Create the file + std::cout << "Creating new serialized LSH database:" << filename << "..."; + std::cout.flush(); + serial_create(filename, serialFormat); + dbIsNew=1; + } + else + // The file can't be opened + error("Can't open the file", filename, "open"); + + // Load the on-disk header into core + dbfid = serial_open(filename, 1); // open for write + db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer + serial_get_header(db); // read header + serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap + + // Check compatibility of core and disk data structures + if( !serial_can_merge(serialFormat) ) + error("Incompatible core and serial LSH, data structure dimensions mismatch."); + + // For new LSH databases write the hashfunctions + if(dbIsNew) + serialize_lsh_hashfunctions(dbfid); + // Write the hashtables in the requested format + if(serialFormat == O2_SERIAL_FILEFORMAT1) + serialize_lsh_hashtables_format1(dbfid, !dbIsNew); + else + serialize_lsh_hashtables_format2(dbfid, !dbIsNew); + + if(!dbIsNew){ + db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1);// get database pointer + //serial_get_header(db); // read header + cout << "maxp = " << G::maxp << endl; + lshHeader->maxp=G::maxp; + // Default to FILEFORMAT1 + if(!(lshHeader->flags&O2_SERIAL_FILEFORMAT2)) + lshHeader->flags|=O2_SERIAL_FILEFORMAT2; + memcpy((char*)db, (char*)lshHeader, sizeof(SerialHeaderT)); + serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap + } + + serial_close(dbfid); +} + +// Test to see if core structure and requested format is +// compatible with currently opened database +int G::serial_can_merge(Uns32T format){ + SerialHeaderT* that = lshHeader; + if( (format==O2_SERIAL_FILEFORMAT2 && !that->flags&O2_SERIAL_FILEFORMAT2) + || (format!=O2_SERIAL_FILEFORMAT2 && that->flags&O2_SERIAL_FILEFORMAT2) + || !( this->w == that->binWidth && + this->L == that->numTables && + this->N == that->numRows && + this->k == that->numFuns && + this->d == that->dataDim && + sizeof(SerialElementT) == that->elementSize && + this->radius == that->radius)){ + serial_print_header(format); + return 0; + } + else + return 1; +} + +// Used as an error message for serial_can_merge() +void G::serial_print_header(Uns32T format){ + std::cout << "Fc:" << format << " Fs:" << lshHeader->flags << endl; + std::cout << "Wc:" << w << " Ls:" << lshHeader->binWidth << endl; + std::cout << "Lc:" << L << " Ls:" << lshHeader->numTables << endl; + std::cout << "Nc:" << N << " Ns:" << lshHeader->numRows << endl; + std::cout << "kc:" << k << " ks:" << lshHeader->numFuns << endl; + std::cout << "dc:" << d << " ds:" << lshHeader->dataDim << endl; + std::cout << "sc:" << sizeof(SerialElementT) << " ss:" << lshHeader->elementSize << endl; + std::cout << "rc:" << this->radius << " rs:" << lshHeader->radius << endl; +} + +int G::serialize_lsh_hashfunctions(int fid){ + float* pf; + Uns32T *pu; + Uns32T x,y,z; + + db = serial_mmap(fid, get_serial_hashtable_offset(), 1);// get database pointer + pf = get_serial_hashfunction_base(db); + + // HASH FUNCTIONS + // Write the random projectors A[][][] +#ifdef USE_U_FUNCTIONS + for( x = 0 ; x < H::m ; x++ ) + for( y = 0 ; y < H::k/2 ; y++ ) +#else + for( x = 0 ; x < H::L ; x++ ) + for( y = 0 ; y < H::k ; y++ ) +#endif + for( z = 0 ; z < d ; z++ ) + *pf++ = A[x][y][z]; + + // Write the random biases b[][] +#ifdef USE_U_FUNCTIONS + for( x = 0 ; x < H::m ; x++ ) + for( y = 0 ; y < H::k/2 ; y++ ) +#else + for( x = 0 ; x < H::L ; x++ ) + for( y = 0 ; y < H::k ; y++ ) +#endif + *pf++=b[x][y]; + + pu = (Uns32T*)pf; + + // Write the Z projectors r1[][] + for( x = 0 ; x < H::L ; x++) + for( y = 0 ; y < H::k ; y++) + *pu++ = r1[x][y]; + + // Write the Z projectors r2[][] + for( x = 0 ; x < H::L ; x++) + for( y = 0; y < H::k ; y++) + *pu++ = r2[x][y]; + + serial_munmap(db, get_serial_hashtable_offset()); + return 1; +} + +int G::serialize_lsh_hashtables_format1(int fid, int merge){ + SerialElementT *pe, *pt; + Uns32T x,y; + + if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT1) ) + error("Cannot merge core and serial LSH, data structure dimensions mismatch."); + + Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; + Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount; + // Write the hash tables + for( x = 0 ; x < H::L ; x++ ){ + std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT1..."; + std::cout.flush(); + // memory map a single hash table for sequential access + // Align each hash table to page boundary + char* dbtable = serial_mmap(fid, hashTableSize, 1, + align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn())); + if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0) + error("could not advise hashtable memory","","madvise"); + + maxColCount=0; + minColCount=O2_SERIAL_MAX_COLS; + meanColCount=0; + colCountN=0; + pt=(SerialElementT*)dbtable; + for( y = 0 ; y < H::N ; y++ ){ + // Move disk pointer to beginning of row + pe=pt+y*lshHeader->numCols; + + colCount=0; + if(bucket* bPtr = h[x][y]) + if(merge) +#ifdef LSH_BLOCK_FULL_ROWS + serial_merge_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket + else + serial_write_hashtable_row_format1(pe, bPtr->next, colCount); // skip collision counter bucket +#else + serial_merge_hashtable_row_format1(pe, bPtr, colCount); + else + serial_write_hashtable_row_format1(pe, bPtr, colCount); +#endif + if(colCount){ + if(colCountmaxColCount) + maxColCount=colCount; + meanColCount+=colCount; + colCountN++; + } + } + if(colCountN) + std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN + << ", min = " << minColCount << ", max = " << maxColCount + << endl; + serial_munmap(dbtable, hashTableSize); + } + + // We're done writing + return 1; +} + +void G::serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* b, Uns32T& colCount){ + while(b && b->t2!=IFLAG){ + SerialElementT*pe=pr; // reset disk pointer to beginning of row + serial_merge_element_format1(pe, b->snext, b->t2, colCount); + b=b->next; + } +} + +void G::serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount){ + while(sb){ + if(colCount==lshHeader->numCols){ + std::cout << "!point-chain full " << endl; + return; + } + Uns32T c=0; + // Merge collision chains + while(cnumCols){ + if( (pe+c)->hashValue==IFLAG){ + (pe+c)->hashValue=t2; + (pe+c)->pointID=sb->pointID; + colCount=c+1; + if(c+1numCols) + (pe+c+1)->hashValue=IFLAG; + break; + } + c++; + } + sb=sb->snext; + } + return; +} + +void G::serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* b, Uns32T& colCount){ + pe->hashValue=IFLAG; + while(b && b->t2!=IFLAG){ + serial_write_element_format1(pe, b->snext, b->t2, colCount); + b=b->next; + } +} + +void G::serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount){ + while(sb){ + if(colCount==lshHeader->numCols){ + std::cout << "!point-chain full " << endl; + return; + } + pe->hashValue=t2; + pe->pointID=sb->pointID; + pe++; + colCount++; + sb=sb->snext; + } + pe->hashValue=IFLAG; + return; +} + +int G::serialize_lsh_hashtables_format2(int fid, int merge){ + Uns32T x,y; + + if( merge && !serial_can_merge(O2_SERIAL_FILEFORMAT2) ) + error("Cannot merge core and serial LSH, data structure dimensions mismatch."); + + // We must pereform FORMAT1 merges in core + if(merge) + unserialize_lsh_hashtables_format2(fid); + + Uns32T colCount, meanColCount, colCountN, maxColCount, minColCount, t1; + lseek(fid, get_serial_hashtable_offset(), SEEK_SET); + + // Write the hash tables + for( x = 0 ; x < H::L ; x++ ){ + std::cout << (merge ? "merging":"writing") << " hash table " << x << " FORMAT2..."; + std::cout.flush(); + maxColCount=0; + minColCount=O2_SERIAL_MAX_COLS; + meanColCount=0; + colCountN=0; + for( y = 0 ; y < H::N ; y++ ){ + colCount=0; + if(bucket* bPtr = h[x][y]){ + t1 = y | O2_SERIAL_FLAGS_T1_BIT; + if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ + close(fid); + error("write error in serial_write_hashtable_format2() [t1]"); + } +#ifdef LSH_BLOCK_FULL_ROWS + serial_write_hashtable_row_format2(fid, bPtr->next, colCount); // skip collision counter bucket +#else + serial_write_hashtable_row_format2(fid, bPtr, colCount); +#endif + } + if(colCount){ + if(colCountmaxColCount) + maxColCount=colCount; + meanColCount+=colCount; + colCountN++; + } + } + // Write END of table marker + t1 = O2_SERIAL_FLAGS_END_BIT; + if( write(fid, &t1, sizeof(Uns32T)) != sizeof(Uns32T) ){ + close(fid); + error("write error in serial_write_hashtable_format2() [end]"); + } + + if(colCountN) + std::cout << "#rows with collisions =" << colCountN << ", mean = " << meanColCount/(float)colCountN + << ", min = " << minColCount << ", max = " << maxColCount + << endl; + } + + // We're done writing + return 1; +} + +void G::serial_write_hashtable_row_format2(int fid, bucket* b, Uns32T& colCount){ + while(b && b->t2!=IFLAG){ + t2 = O2_SERIAL_FLAGS_T2_BIT; + if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){ + close(fid); + error("write error in serial_write_hashtable_row_format2()"); + } + t2 = b->t2; + if( write(fid, &t2, sizeof(Uns32T)) != sizeof(Uns32T) ){ + close(fid); + error("write error in serial_write_hashtable_row_format2()"); + } + serial_write_element_format2(fid, b->snext, colCount); + b=b->next; + } +} + +void G::serial_write_element_format2(int fid, sbucket* sb, Uns32T& colCount){ + while(sb){ + if(write(fid, &sb->pointID, sizeof(Uns32T))!=sizeof(Uns32T)){ + close(fid); + error("Write error in serial_write_element_format2()"); + } + colCount++; + sb=sb->snext; + } +} + + +int G::serial_create(char* filename, Uns32T FMT){ + return serial_create(filename, w, L, N, C, k, d, FMT); +} + + +int G::serial_create(char* filename, float binWidth, Uns32T numTables, Uns32T numRows, Uns32T numCols, + Uns32T numFuns, Uns32T dim, Uns32T FMT){ + + if(numTables > O2_SERIAL_MAX_TABLES || numRows > O2_SERIAL_MAX_ROWS + || numCols > O2_SERIAL_MAX_COLS || numFuns > O2_SERIAL_MAX_FUNS + || dim>O2_SERIAL_MAX_DIM){ + error("LSH parameters out of bounds for serialization"); + } + + int dbfid; + if ((dbfid = open (filename, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) + error("Can't create serial file", filename, "open"); + get_lock(dbfid, 1); + + // Make header first to get size of serialized database + lshHeader = new SerialHeaderT(binWidth, numTables, numRows, numCols, numFuns, dim, radius, maxp, FMT); + + // go to the location corresponding to the last byte + if (lseek (dbfid, lshHeader->get_size() - 1, SEEK_SET) == -1) + error("lseek error in db file", "", "lseek"); + + // write a dummy byte at the last location + if (write (dbfid, "", 1) != 1) + error("write error", "", "write"); + + db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 1); + + memcpy (db, lshHeader, O2_SERIAL_HEADER_SIZE); + + serial_munmap(db, O2_SERIAL_HEADER_SIZE); + + close(dbfid); + + std::cout << "done initializing tables." << endl; + + return 1; +} + +char* G::serial_mmap(int dbfid, Uns32T memSize, Uns32T forWrite, off_t offset){ + if(forWrite){ + if ((db = (char*) mmap(0, memSize, PROT_READ | PROT_WRITE, + MAP_SHARED, dbfid, offset)) == (caddr_t) -1) + error("mmap error in request for writable serialized database", "", "mmap"); + } + else if ((db = (char*) mmap(0, memSize, PROT_READ, MAP_SHARED, dbfid, offset)) == (caddr_t) -1) + error("mmap error in read-only serialized database", "", "mmap"); + + return db; +} + +SerialHeaderT* G::serial_get_header(char* db){ + lshHeader = new SerialHeaderT(); + memcpy((char*)lshHeader, db, sizeof(SerialHeaderT)); + + if(lshHeader->lshMagic!=O2_SERIAL_MAGIC) + error("Not an LSH database file"); + + return lshHeader; +} + +void G::serial_munmap(char* db, Uns32T N){ + munmap(db, N); +} + +int G::serial_open(char* filename, int writeFlag){ + int dbfid; + if(writeFlag){ + if ((dbfid = open (filename, O_RDWR)) < 0) + error("Can't open serial file for read/write", filename, "open"); + get_lock(dbfid, writeFlag); + } + else{ + if ((dbfid = open (filename, O_RDONLY)) < 0) + error("Can't open serial file for read", filename, "open"); + get_lock(dbfid, 0); + } + + return dbfid; +} + +void G::serial_close(int dbfid){ + + release_lock(dbfid); + close(dbfid); +} + +int G::unserialize_lsh_header(char* filename){ + + int dbfid; + char* db; + // Test to see if file exists + if((dbfid = open (filename, O_RDONLY)) < 0) + error("Can't open the file", filename, "open"); + close(dbfid); + dbfid = serial_open(filename, 0); // open for read + db = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer + serial_get_header(db); // read header + serial_munmap(db, O2_SERIAL_HEADER_SIZE); // drop mmap + + // Unserialize header parameters + H::L = lshHeader->numTables; + H::m = (Uns32T)( (1.0 + sqrt(1 + 8.0*(int)H::L)) / 2.0); + H::N = lshHeader->numRows; + H::C = lshHeader->numCols; + H::k = lshHeader->numFuns; + H::d = lshHeader->dataDim; + G::w = lshHeader->binWidth; + G::radius = lshHeader->radius; + G::maxp = lshHeader->maxp; + + return dbfid; +} + +// unserialize the LSH parameters +// we leave the LSH tree on disk as a flat file +// it is this flat file that we search by memory mapping +void G::unserialize_lsh_functions(int dbfid){ + Uns32T j, kk; + float* pf; + Uns32T* pu; + + // Load the hash functions into core + char* db = serial_mmap(dbfid, get_serial_hashtable_offset(), 0);// get database pointer again + +#ifdef USE_U_FUNCTIONS + G::A = new float**[ H::m ]; // m x k x d random projectors + G::b = new float*[ H::m ]; // m x k random biases +#else + G::A = new float**[ H::L ]; // m x k x d random projectors + G::b = new float*[ H::L ]; // m x k random biases +#endif + G::g = new Uns32T*[ H::L ]; // L x k random projections + assert(g&&A&&b); // failure + + pf = get_serial_hashfunction_base(db); + +#ifdef USE_U_FUNCTIONS + for( j = 0 ; j < H::m ; j++ ){ // L functions gj(v) + G::A[j] = new float*[ H::k/2 ]; // k x d 2-stable distribution coefficients + G::b[j] = new float[ H::k/2 ]; // bias + assert( G::A[j] && G::b[j] ); // failure + for( kk = 0 ; kk < H::k/2 ; kk++ ){ // Normally distributed hash functions +#else + for( j = 0 ; j < H::L ; j++ ){ // L functions gj(v) + G::A[j] = new float*[ H::k ]; // k x d 2-stable distribution coefficients + G::b[j] = new float[ H::k ]; // bias + assert( G::A[j] && G::b[j] ); // failure + for( kk = 0 ; kk < H::k ; kk++ ){ // Normally distributed hash functions +#endif + G::A[j][kk] = new float[ H::d ]; + assert( G::A[j][kk] ); // failure + for(Uns32T i = 0 ; i < H::d ; i++ ) + G::A[j][kk][i] = *pf++; // Normally distributed random vectors + } + } +#ifdef USE_U_FUNCTIONS + for( j = 0 ; j < H::m ; j++ ) // biases b + for( kk = 0 ; kk < H::k/2 ; kk++ ) +#else + for( j = 0 ; j < H::L ; j++ ) // biases b + for( kk = 0 ; kk < H::k ; kk++ ) +#endif + G::b[j][kk] = *pf++; + + for( j = 0 ; j < H::L ; j++){ // 32-bit hash values, gj(v)=[x0 x1 ... xk-1] xk \in Z + G::g[j] = new Uns32T[ H::k ]; + assert( G::g[j] ); + } + + + H::__initialize_data_structures(); + + pu = (Uns32T*)pf; + for( j = 0 ; j < H::L ; j++ ) // Z projectors r1 + for( kk = 0 ; kk < H::k ; kk++ ) + H::r1[j][kk] = *pu++; + + for( j = 0 ; j < H::L ; j++ ) // Z projectors r2 + for( kk = 0 ; kk < H::k ; kk++ ) + H::r2[j][kk] = *pu++; + + serial_munmap(db, get_serial_hashtable_offset()); +} + +void G::unserialize_lsh_hashtables_format1(int fid){ + SerialElementT *pe, *pt; + Uns32T x,y; + Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; + // Read the hash tables into core + for( x = 0 ; x < H::L ; x++ ){ + // memory map a single hash table + // Align each hash table to page boundary + char* dbtable = serial_mmap(fid, hashTableSize, 0, + align_up(get_serial_hashtable_offset()+x*hashTableSize, get_page_logn())); + if(madvise(dbtable, hashTableSize, MADV_SEQUENTIAL)<0) + error("could not advise hashtable memory","","madvise"); + pt=(SerialElementT*)dbtable; + for( y = 0 ; y < H::N ; y++ ){ + // Move disk pointer to beginning of row + pe=pt+y*lshHeader->numCols; + unserialize_hashtable_row_format1(pe, h[x]+y); +#ifdef __LSH_DUMP_CORE_TABLES__ + printf("S[%d,%d]", x, y); + serial_bucket_dump(pe); + printf("C[%d,%d]", x, y); + dump_hashtable_row(h[x][y]); +#endif + } + serial_munmap(dbtable, hashTableSize); + } +} + +void G::unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b){ + Uns32T colCount = 0; + while(colCount!=lshHeader->numCols && pe->hashValue !=IFLAG){ + H::p = pe->pointID; // current point ID + t2 = pe->hashValue; + bucket_insert_point(b); + pe++; + colCount++; + } +} + +void G::unserialize_lsh_hashtables_format2(int fid){ + Uns32T x=0,y=0; + + // Seek to hashtable base offset + if(lseek(fid, get_serial_hashtable_offset(), SEEK_SET)!=get_serial_hashtable_offset()){ + close(fid); + error("Seek error in unserialize_lsh_hashtables_format2"); + } + + // Read the hash tables into core (structure is given in header) + while( x < H::L){ + if(read(fid, &(H::t1), sizeof(Uns32T))!=sizeof(Uns32T)){ + close(fid); + error("Read error","unserialize_lsh_hashtables_format2()"); + } + if(H::t1&O2_SERIAL_FLAGS_END_BIT) + x++; // End of table + else + while(y < H::N){ + // Read a row and move file pointer to beginning of next row or table + if(!(H::t1&O2_SERIAL_FLAGS_T1_BIT)){ + close(fid); + error("State matchine error t1","unserialize_lsh_hashtables_format2()"); + } + y = H::t1 ^ O2_SERIAL_FLAGS_T1_BIT; + if(y>=H::N){ + close(fid); + error("Unserialized hashtable row pointer out of range","unserialize_lsh_hashtables_format2()"); + } + Uns32T token = unserialize_hashtable_row_format2(fid, h[x]+y); + +#ifdef __LSH_DUMP_CORE_TABLES__ + printf("C[%d,%d]", x, y); + dump_hashtable_row(h[x][y]); +#endif + // Check that token is valid + if( !(token&O2_SERIAL_FLAGS_T1_BIT || token&O2_SERIAL_FLAGS_END_BIT) ){ + close(fid); + error("State machine error end of row/table", "unserialize_lsh_hashtables_format2()"); + } + // Check for end of table flag + if(token&O2_SERIAL_FLAGS_END_BIT){ + x++; + break; + } + // Check for new row flag + if(token&O2_SERIAL_FLAGS_T1_BIT) + H::t1 = token; + } + } +} + +Uns32T G::unserialize_hashtable_row_format2(int fid, bucket** b){ + bool pointFound = false; + if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ + close(fid); + error("Read error T2 token","unserialize_hashtable_row_format2"); + } + if( !(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2==O2_SERIAL_FLAGS_T2_BIT)){ + close(fid); + error("State machine error: expected E or T2"); + } + while(!(H::t2==O2_SERIAL_FLAGS_END_BIT || H::t2&O2_SERIAL_FLAGS_T1_BIT)){ + pointFound=false; + // Check for T2 token + if(H::t2!=O2_SERIAL_FLAGS_T2_BIT) + error("State machine error T2 token", "unserialize_hashtable_row_format2()"); + // Read t2 value + if(read(fid, &(H::t2), sizeof(Uns32T)) != sizeof(Uns32T)){ + close(fid); + error("Read error t2","unserialize_hashtable_row_format2"); + } + if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ + close(fid); + error("Read error H::p","unserialize_hashtable_row_format2"); + } + while(!(H::p==O2_SERIAL_FLAGS_END_BIT || H::p&O2_SERIAL_FLAGS_T1_BIT || H::p==O2_SERIAL_FLAGS_T2_BIT )){ + pointFound=true; + bucket_insert_point(b); + if(read(fid, &(H::p), sizeof(Uns32T)) != sizeof(Uns32T)){ + close(fid); + error("Read error H::p","unserialize_hashtable_row_format2"); + } + } + H::t2 = H::p; // Copy last found token to t2 + if(!pointFound) + error("State machine error: point", "unserialize_hashtable_row_format2()"); + } + return H::t2; // holds current token +} + +void G::dump_hashtable_row(bucket* p){ + while(p && p->t2!=IFLAG){ + sbucket* sbp = p->snext; + while(sbp){ + printf("(%0X,%u)", p->t2, sbp->pointID); + fflush(stdout); + sbp=sbp->snext; + } + p=p->next; + } + printf("\n"); +} + + +// G::serial_retrieve_point( ... ) +// retrieves (pointID) from a serialized LSH database +// +// inputs: +// filename - file name of serialized LSH database +// vv - query point set +// +// outputs: +// inserts retrieved points into add_point() callback method +void G::serial_retrieve_point_set(char* filename, vector >& vv, ReporterCallbackPtr add_point, void* caller) +{ + int dbfid = serial_open(filename, 0); // open for read + char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer + serial_get_header(dbheader); // read header + serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap + + if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){ + close(dbfid); + error("serial_retrieve_point_set is for SERIAL_FILEFORMAT1 only"); + } + + // size of each hash table + Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; + calling_instance = caller; // class instance variable used in ...bucket_chain_point() + add_point_callback = add_point; + + for(Uns32T j=0; jnumCols, qpos); // Point to correct row + } + serial_munmap(db, hashTableSize); // drop hashtable mmap + } + serial_close(dbfid); +} + +void G::serial_retrieve_point(char* filename, vector& v, Uns32T qpos, ReporterCallbackPtr add_point, void* caller){ + int dbfid = serial_open(filename, 0); // open for read + char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer + serial_get_header(dbheader); // read header + serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap + + if((lshHeader->flags & O2_SERIAL_FILEFORMAT2)){ + close(dbfid); + error("serial_retrieve_point is for SERIAL_FILEFORMAT1 only"); + } + + // size of each hash table + Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; + calling_instance = caller; + add_point_callback = add_point; + compute_hash_functions(v); + for(Uns32T j=0; jnumCols, qpos); // Point to correct row + serial_munmap(db, hashTableSize); // drop hashtable mmap + } + serial_close(dbfid); +} + +void G::serial_dump_tables(char* filename){ + int dbfid = serial_open(filename, 0); // open for read + char* dbheader = serial_mmap(dbfid, O2_SERIAL_HEADER_SIZE, 0);// get database pointer + serial_get_header(dbheader); // read header + serial_munmap(dbheader, O2_SERIAL_HEADER_SIZE); // drop header mmap + Uns32T hashTableSize=sizeof(SerialElementT)*lshHeader->numRows*lshHeader->numCols; + for(Uns32T j=0; jnumCols; + }while(pe<(SerialElementT*)db+lshHeader->numRows*lshHeader->numCols); + } + +} + +void G::serial_bucket_dump(SerialElementT* pe){ + SerialElementT* pend = pe+lshHeader->numCols; + while( !(pe->hashValue==IFLAG || pe==pend ) ){ + printf("(%0X,%u)",pe->hashValue,pe->pointID); + pe++; + } + printf("\n"); + fflush(stdout); +} + +void G::serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos){ + SerialElementT* pend = pe+lshHeader->numCols; + while( !(pe->hashValue==IFLAG || pe==pend ) ){ + if(pe->hashValue==t2){ // new match + add_point_callback(calling_instance, pe->pointID, qpos, radius); + } + pe++; + } +} + +void G::bucket_chain_point(bucket* p, Uns32T qpos){ + if(!p || p->t2==IFLAG) + return; + if(p->t2==t2){ // match + sbucket_chain_point(p->snext, qpos); // add to reporter + } + if(p->next){ + bucket_chain_point(p->next, qpos); // recurse + } +} + +void G::sbucket_chain_point(sbucket* p, Uns32T qpos){ + add_point_callback(calling_instance, p->pointID, qpos, radius); + if(p->snext){ + sbucket_chain_point(p->snext, qpos); + } +} + +void G::get_lock(int fd, bool exclusive) { + struct flock lock; + int status; + lock.l_type = exclusive ? F_WRLCK : F_RDLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; /* "the whole file" */ + retry: + do { + status = fcntl(fd, F_SETLKW, &lock); + } while (status != 0 && errno == EINTR); + if (status) { + if (errno == EAGAIN) { + sleep(1); + goto retry; + } else { + error("fcntl lock error", "", "fcntl"); + } + } +} + +void G::release_lock(int fd) { + struct flock lock; + int status; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + status = fcntl(fd, F_SETLKW, &lock); + + if (status) + error("fcntl unlock error", "", "fcntl"); +} diff -r 63ae0dfc1767 -r d9a88cfd4ab6 lshlib.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lshlib.h Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,336 @@ +// lshlib.h - a library for locality sensitive hashtable insertion and retrieval +// +// Author: Michael Casey +// Copyright (c) 2008 Michael Casey, All Rights Reserved + +/* GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + See LICENSE.txt +*/ + +#ifndef __LSHLIB_H +#define __LSHLIB_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef MT19937 +#include "mt19937/mt19937ar.h" +#endif + +#define IntT int +#define LongUns64T long long unsigned +#define Uns32T unsigned +#define Int32T int +#define BooleanT int +#define TRUE 1 +#define FALSE 0 + +// A big number (>> max # of points) +#define INDEX_START_EMPTY 1000000000U + +// 4294967291 = 2^32-5 +#define UH_PRIME_DEFAULT 4294967291U + +// 2^29 +#define MAX_HASH_RND 536870912U + +// 2^32-1 +#define TWO_TO_32_MINUS_1 4294967295U + +#define O2_SERIAL_VERSION 1 // Sync with SVN version +#define O2_SERIAL_HEADER_SIZE sizeof(SerialHeaderT) +#define O2_SERIAL_ELEMENT_SIZE sizeof(SerialElementT) +#define O2_SERIAL_MAX_TABLES (200) +#define O2_SERIAL_MAX_ROWS (1000000) +#define O2_SERIAL_MAX_COLS (1000) +#define O2_SERIAL_MAX_DIM (2000) +#define O2_SERIAL_MAX_FUNS (100) +#define O2_SERIAL_MAX_BINWIDTH (200) + +// Flags for Serial Header +#define O2_SERIAL_FILEFORMAT1 (0x1U) // Optimize for on-disk search +#define O2_SERIAL_FILEFORMAT2 (0x2U) // Optimize for in-core search + +// Flags for serialization fileformat2: use high 3 bits of Uns32T +#define O2_SERIAL_FLAGS_T1_BIT (0x80000000U) +#define O2_SERIAL_FLAGS_T2_BIT (0x40000000U) +#define O2_SERIAL_FLAGS_END_BIT (0x20000000U) + +unsigned align_up(unsigned x, unsigned w); + +#define O2_SERIAL_FUNCTIONS_SIZE (align_up(sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS * O2_SERIAL_MAX_DIM \ ++ sizeof(float) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS + \ ++ sizeof(Uns32T) * O2_SERIAL_MAX_TABLES * O2_SERIAL_MAX_FUNS * 2 \ ++ O2_SERIAL_HEADER_SIZE,get_page_logn())) + +#define O2_SERIAL_MAX_LSH_SIZE (O2_SERIAL_ELEMENT_SIZE * O2_SERIAL_MAX_TABLES \ + * O2_SERIAL_MAX_ROWS * O2_SERIAL_MAX_COLS + O2_SERIAL_FUNCTIONS_SIZE) + +#define O2_SERIAL_MAGIC ('o'|'2'<<8|'l'<<16|'s'<<24) + +using namespace std; + +Uns32T get_page_logn(); + +// Disk table entry +typedef class SerialElement SerialElementT; +class SerialElement { +public: + Uns32T hashValue; + Uns32T pointID; + + SerialElement(Uns32T h, Uns32T pID): + hashValue(h), + pointID(pID){} +}; + +// Disk header +typedef class SerialHeader SerialHeaderT; +class SerialHeader { +public: + Uns32T lshMagic; // unique identifier for file header + float binWidth; // hash-function bin width + Uns32T numTables; // number of hash tables in file + Uns32T numRows; // size of each hash table + Uns32T numCols; // max collisions in each hash table + Uns32T elementSize; // size of a hash bucket + Uns32T version; // version number of file format + Uns32T size; // total size of database (bytes) + Uns32T flags; // 32 bits of useful information + Uns32T dataDim; // vector dimensionality + Uns32T numFuns; // number of independent hash functions + float radius; // 32-bit floating point radius + Uns32T maxp; // number of unique IDs in the database + Uns32T value_14; // spare value + Uns32T value_15; // spare value + Uns32T value_16; // spare value + + SerialHeader(); + SerialHeader(float W, Uns32T L, Uns32T N, Uns32T C, Uns32T k, Uns32T d, float radius, Uns32T p, Uns32T FMT); + + float get_binWidth(){return binWidth;} + Uns32T get_numTables(){return numTables;} + Uns32T get_numRows(){return numRows;} + Uns32T get_numCols(){return numCols;} + Uns32T get_elementSize(){return elementSize;} + Uns32T get_version(){return version;} + Uns32T get_flags(){return flags;} + Uns32T get_size(){return size;} + Uns32T get_dataDim(){return dataDim;} + Uns32T get_numFuns(){return numFuns;} + Uns32T get_maxp(){return maxp;} +}; + +#define IFLAG 0xFFFFFFFF + +// Point-set collision bucket (sbucket). +// sbuckets form a collision chain that identifies PointIDs falling in the same locale. +// sbuckets are chained from a bucket containing the collision list's t2 identifier +class sbucket { + friend class bucket; + friend class H; + friend class G; + + public: + class sbucket* snext; + unsigned int pointID; + + sbucket(){ + snext=0; + pointID=IFLAG; + } + ~sbucket(){delete snext;} + sbucket* get_snext(){return snext;} +}; + +// bucket structure for a linked list of locales that collide with the same hash value t1 +// different buckets represent different locales, collisions within a locale are chained +// in sbuckets +class bucket { + friend class H; + friend class G; + bucket* next; + sbucket* snext; + public: + unsigned int t2; + bucket(){ + next=0; + snext=0; + t2=IFLAG; + } + ~bucket(){delete next;delete snext;} + bucket* get_next(){return next;} +}; + + +// The hash_functions for locality-sensitive hashing +class H{ + friend class G; + private: + bucket*** h; // hash functions + Uns32T** r1; // random ints for hashing + Uns32T** r2; // random ints for hashing + Uns32T t1; // first hash table key + Uns32T t2; // second hash table key + Uns32T P; // hash table prime number + bool use_u_functions; // flag to optimize computation of hashes + Uns32T bucketCount; // count of number of point buckets allocated + Uns32T pointCount; // count of number of points inserted + + void __initialize_data_structures(); + void __bucket_insert_point(bucket*); + void __sbucket_insert_point(sbucket*); + Uns32T __computeProductModDefaultPrime(Uns32T*,Uns32T*,IntT); + Uns32T __randr(); + bucket** __get_bucket(int j); + void __generate_hash_keys(Uns32T*,Uns32T*,Uns32T*); + void error(const char* a, const char* b = "", const char *sysFunc = 0); + + public: + Uns32T N; // num rows per table + Uns32T C; // num collision per row + Uns32T k; // num projections per hash function + Uns32T m; // ~sqrt num hash tables + Uns32T L; // L = m*(m-1)/2, conversely, m = (1 + sqrt(1 + 8.0*L)) / 2.0 + Uns32T d; // dimensions + Uns32T p; // current point + + H(){;} + H(Uns32T k, Uns32T m, Uns32T d, Uns32T N, Uns32T C); + ~H(); + + + Uns32T bucket_insert_point(bucket**); + + Uns32T get_t1(){return t1;} + Uns32T get_t2(){return t2;} + +}; + + +typedef void (*ReporterCallbackPtr)(void* objPtr, Uns32T pointID, Uns32T queryIndex, float squaredDistance); + +// Interface for indexing and retrieval +class G: public H{ + private: + float *** A; // m x k x d random projectors from R^d->R^k + float ** b; // m x k uniform additive constants + Uns32T ** g; // L x k random hash projections \in Z^k + float w; // width of hash slots (relative to normalized feature space) + float radius;// scaling coefficient for data (1./radius) + vector > uu; // Storage for m patial hash evaluations + Uns32T maxp; // highest pointID stored in database + void* calling_instance; // store calling object instance for member-function callback + void (*add_point_callback)(void*, Uns32T, Uns32T, float); + + void initialize_partial_functions(); + + void get_lock(int fd, bool exclusive); + void release_lock(int fd); + int serial_create(char* filename, Uns32T FMT); + int serial_create(char* filename, float binWidth, Uns32T nTables, Uns32T nRows, Uns32T nCols, Uns32T k, Uns32T d, Uns32T FMT); + char* serial_mmap(int dbfid, Uns32T sz, Uns32T w, off_t offset = 0); + void serial_munmap(char* db, Uns32T N); + int serial_open(char* filename,int writeFlag); + void serial_close(int dbfid); + + // Function to write hashfunctions to disk + int serialize_lsh_hashfunctions(int fid); + + // Functions to write hashtables to disk in format1 (optimized for on-disk retrieval) + int serialize_lsh_hashtables_format1(int fid, int merge); + void serial_write_hashtable_row_format1(SerialElementT*& pe, bucket* h, Uns32T& colCount); + void serial_write_element_format1(SerialElementT*& pe, sbucket* sb, Uns32T t2, Uns32T& colCount); + void serial_merge_hashtable_row_format1(SerialElementT* pr, bucket* h, Uns32T& colCount); + void serial_merge_element_format1(SerialElementT* pe, sbucket* sb, Uns32T t2, Uns32T& colCount); + int serial_can_merge(Uns32T requestedFormat); // Test to see whether core and on-disk structures are compatible + + // Functions to write hashtables to disk in format2 (optimized for in-core retrieval) + int serialize_lsh_hashtables_format2(int fid, int merge); + void serial_write_hashtable_row_format2(int fid, bucket* h, Uns32T& colCount); + void serial_write_element_format2(int fid, sbucket* sb, Uns32T& colCount); + + // Functions to read serial header and hash functions (format1 and format2) + int unserialize_lsh_header(char* filename); // read lsh header from disk into core + void unserialize_lsh_functions(int fid); // read the lsh hash functions into core + + // Functions to read hashtables in format1 + void unserialize_lsh_hashtables_format1(int fid); // read FORMAT1 hash tables into core (disk format) + void unserialize_hashtable_row_format1(SerialElementT* pe, bucket** b); // read lsh hash table row into core + + // Functions to read hashtables in format2 + void unserialize_lsh_hashtables_format2(int fid); // read FORMAT2 hash tables into core (core format) + Uns32T unserialize_hashtable_row_format2(int fid, bucket** b); // read lsh hash table row into core + + // Helper functions + void serial_print_header(Uns32T requestedFormat); + float* get_serial_hashfunction_base(char* db); + SerialElementT* get_serial_hashtable_base(char* db); + Uns32T get_serial_hashtable_offset(); // Size of SerialHeader + HashFunctions + SerialHeaderT* serial_get_header(char* db); + SerialHeaderT* lshHeader; + + // Core Retrieval/Inspections Functions + void bucket_chain_point(bucket* p, Uns32T qpos); + void sbucket_chain_point(sbucket* p, Uns32T qpos); + void dump_hashtable_row(bucket* p); + + // Serial (Format 1) Retrieval/Inspection Functions + void serial_bucket_chain_point(SerialElementT* pe, Uns32T qpos); + void serial_bucket_dump(SerialElementT* pe); + + // Hash functions + void compute_hash_functions(vector& v); + float randn(); + float ranf(); + + char* db; // pointer to serialized structure + + public: + G(char* lshFile, bool lshInCore = false); // unserialize constructor + G(float w, Uns32T k,Uns32T m, Uns32T d, Uns32T N, Uns32T C, float r); // core constructor + ~G(); + + Uns32T insert_point(vector&, Uns32T pointID); + void insert_point_set(vector >& vv, Uns32T basePointID); + + // point retrieval from core + void retrieve_point(vector& v, Uns32T qpos, ReporterCallbackPtr, void* me=NULL); + // point set retrieval from core + void retrieve_point_set(vector >& vv, ReporterCallbackPtr, void* me=NULL); + // serial point set retrieval + void serial_retrieve_point_set(char* filename, vector >& vv, ReporterCallbackPtr, void* me=NULL); + // serial point retrieval + void serial_retrieve_point(char* filename, vector& vv, Uns32T qpos, ReporterCallbackPtr, void* me=NULL); + + void serialize(char* filename, Uns32T serialFormat = O2_SERIAL_FILEFORMAT1); // write hashfunctions and hashtables to disk + + SerialHeaderT* get_lshHeader(){return lshHeader;} + float get_radius(){return radius;} + Uns32T get_maxp(){return maxp;} + void serial_dump_tables(char* filename); + float get_mean_collision_rate(){ return (float) pointCount / bucketCount ; } +}; + +typedef class G LSH; + + + +#endif diff -r 63ae0dfc1767 -r d9a88cfd4ab6 mt19937/mt19937ar.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mt19937/mt19937ar.c Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,175 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + Copyright (C) 2005, Mutsuo Saito, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +#include +#include "mt19937ar.h" + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ + +static unsigned long mt[N]; /* the array for the state vector */ +static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ + +/* initializes mt[N] with a seed */ +void init_genrand(unsigned long s) +{ + mt[0]= s & 0xffffffffUL; + for (mti=1; mti> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffffUL; + /* for >32 bit machines */ + } +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +void init_by_array(unsigned long init_key[], int key_length) +{ + int i, j, k; + init_genrand(19650218UL); + i=1; j=0; + k = (N>key_length ? N : key_length); + for (; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) + + init_key[j] + j; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; j++; + if (i>=N) { mt[0] = mt[N-1]; i=1; } + if (j>=key_length) j=0; + } + for (k=N-1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) + - i; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i>=N) { mt[0] = mt[N-1]; i=1; } + } + + mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned long genrand_int32(void) +{ + unsigned long y; + static unsigned long mag01[2]={0x0UL, MATRIX_A}; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= N) { /* generate N words at one time */ + int kk; + + if (mti == N+1) /* if init_genrand() has not been called, */ + init_genrand(5489UL); /* a default initial seed is used */ + + for (kk=0;kk> 1) ^ mag01[y & 0x1UL]; + } + for (;kk> 1) ^ mag01[y & 0x1UL]; + } + y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); + mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,0x7fffffff]-interval */ +long genrand_int31(void) +{ + return (long)(genrand_int32()>>1); +} + +/* generates a random number on [0,1]-real-interval */ +double genrand_real1(void) +{ + return genrand_int32()*(1.0/4294967295.0); + /* divided by 2^32-1 */ +} + +/* generates a random number on [0,1)-real-interval */ +double genrand_real2(void) +{ + return genrand_int32()*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1)-real-interval */ +double genrand_real3(void) +{ + return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +double genrand_res53(void) +{ + unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; + return(a*67108864.0+b)*(1.0/9007199254740992.0); +} +/* These real versions are due to Isaku Wada, 2002/01/09 added */ diff -r 63ae0dfc1767 -r d9a88cfd4ab6 mt19937/mt19937ar.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mt19937/mt19937ar.h Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,72 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + Copyright (C) 2005, Mutsuo Saito + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +/* initializes mt[N] with a seed */ +void init_genrand(unsigned long s); + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +void init_by_array(unsigned long init_key[], int key_length); + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned long genrand_int32(void); + +/* generates a random number on [0,0x7fffffff]-interval */ +long genrand_int31(void); + +/* These real versions are due to Isaku Wada, 2002/01/09 added */ +/* generates a random number on [0,1]-real-interval */ +double genrand_real1(void); + +/* generates a random number on [0,1)-real-interval */ +double genrand_real2(void); + +/* generates a random number on (0,1)-real-interval */ +double genrand_real3(void); + +/* generates a random number on [0,1) with 53-bit resolution*/ +double genrand_res53(void); diff -r 63ae0dfc1767 -r d9a88cfd4ab6 mt19937/mt19937ar.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mt19937/mt19937ar.out Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,403 @@ +1000 outputs of genrand_int32() +1067595299 955945823 477289528 4107218783 4228976476 +3344332714 3355579695 227628506 810200273 2591290167 +2560260675 3242736208 646746669 1479517882 4245472273 +1143372638 3863670494 3221021970 1773610557 1138697238 +1421897700 1269916527 2859934041 1764463362 3874892047 +3965319921 72549643 2383988930 2600218693 3237492380 +2792901476 725331109 605841842 271258942 715137098 +3297999536 1322965544 4229579109 1395091102 3735697720 +2101727825 3730287744 2950434330 1661921839 2895579582 +2370511479 1004092106 2247096681 2111242379 3237345263 +4082424759 219785033 2454039889 3709582971 835606218 +2411949883 2735205030 756421180 2175209704 1873865952 +2762534237 4161807854 3351099340 181129879 3269891896 + 776029799 2218161979 3001745796 1866825872 2133627728 + 34862734 1191934573 3102311354 2916517763 1012402762 +2184831317 4257399449 2899497138 3818095062 3030756734 +1282161629 420003642 2326421477 2741455717 1278020671 +3744179621 271777016 2626330018 2560563991 3055977700 +4233527566 1228397661 3595579322 1077915006 2395931898 +1851927286 3013683506 1999971931 3006888962 1049781534 +1488758959 3491776230 104418065 2448267297 3075614115 +3872332600 891912190 3936547759 2269180963 2633455084 +1047636807 2604612377 2709305729 1952216715 207593580 +2849898034 670771757 2210471108 467711165 263046873 +3569667915 1042291111 3863517079 1464270005 2758321352 +3790799816 2301278724 3106281430 7974801 2792461636 + 555991332 621766759 1322453093 853629228 686962251 +1455120532 957753161 1802033300 1021534190 3486047311 +1902128914 3701138056 4176424663 1795608698 560858864 +3737752754 3141170998 1553553385 3367807274 711546358 +2475125503 262969859 251416325 2980076994 1806565895 + 969527843 3529327173 2736343040 2987196734 1649016367 +2206175811 3048174801 3662503553 3138851612 2660143804 +1663017612 1816683231 411916003 3887461314 2347044079 +1015311755 1203592432 2170947766 2569420716 813872093 +1105387678 1431142475 220570551 4243632715 4179591855 +2607469131 3090613241 282341803 1734241730 1391822177 +1001254810 827927915 1886687171 3935097347 2631788714 +3905163266 110554195 2447955646 3717202975 3304793075 +3739614479 3059127468 953919171 2590123714 1132511021 +3795593679 2788030429 982155079 3472349556 859942552 +2681007391 2299624053 647443547 233600422 608168955 +3689327453 1849778220 1608438222 3968158357 2692977776 +2851872572 246750393 3582818628 3329652309 4036366910 +1012970930 950780808 3959768744 2538550045 191422718 +2658142375 3276369011 2927737484 1234200027 1920815603 +3536074689 1535612501 2184142071 3276955054 428488088 +2378411984 4059769550 3913744741 2732139246 64369859 +3755670074 842839565 2819894466 2414718973 1010060670 +1839715346 2410311136 152774329 3485009480 4102101512 +2852724304 879944024 1785007662 2748284463 1354768064 +3267784736 2269127717 3001240761 3179796763 895723219 + 865924942 4291570937 89355264 1471026971 4114180745 +3201939751 2867476999 2460866060 3603874571 2238880432 +3308416168 2072246611 2755653839 3773737248 1709066580 +4282731467 2746170170 2832568330 433439009 3175778732 + 26248366 2551382801 183214346 3893339516 1928168445 +1337157619 3429096554 3275170900 1782047316 4264403756 +1876594403 4289659572 3223834894 1728705513 4068244734 +2867840287 1147798696 302879820 1730407747 1923824407 +1180597908 1569786639 198796327 560793173 2107345620 +2705990316 3448772106 3678374155 758635715 884524671 + 486356516 1774865603 3881226226 2635213607 1181121587 +1508809820 3178988241 1594193633 1235154121 326117244 +2304031425 937054774 2687415945 3192389340 2003740439 +1823766188 2759543402 10067710 1533252662 4132494984 + 82378136 420615890 3467563163 541562091 3535949864 +2277319197 3330822853 3215654174 4113831979 4204996991 +2162248333 3255093522 2219088909 2978279037 255818579 +2859348628 3097280311 2569721123 1861951120 2907080079 +2719467166 998319094 2521935127 2404125338 259456032 +2086860995 1839848496 1893547357 2527997525 1489393124 +2860855349 76448234 2264934035 744914583 2586791259 +1385380501 66529922 1819103258 1899300332 2098173828 +1793831094 276463159 360132945 4178212058 595015228 + 177071838 2800080290 1573557746 1548998935 378454223 +1460534296 1116274283 3112385063 3709761796 827999348 +3580042847 1913901014 614021289 4278528023 1905177404 + 45407939 3298183234 1184848810 3644926330 3923635459 +1627046213 3677876759 969772772 1160524753 1522441192 + 452369933 1527502551 832490847 1003299676 1071381111 +2891255476 973747308 4086897108 1847554542 3895651598 +2227820339 1621250941 2881344691 3583565821 3510404498 + 849362119 862871471 797858058 2867774932 2821282612 +3272403146 3997979905 209178708 1805135652 6783381 +2823361423 792580494 4263749770 776439581 3798193823 +2853444094 2729507474 1071873341 1329010206 1289336450 +3327680758 2011491779 80157208 922428856 1158943220 +1667230961 2461022820 2608845159 387516115 3345351910 +1495629111 4098154157 3156649613 3525698599 4134908037 + 446713264 2137537399 3617403512 813966752 1157943946 +3734692965 1680301658 3180398473 3509854711 2228114612 +1008102291 486805123 863791847 3189125290 1050308116 +3777341526 4291726501 844061465 1347461791 2826481581 + 745465012 2055805750 4260209475 2386693097 2980646741 + 447229436 2077782664 1232942813 4023002732 1399011509 +3140569849 2579909222 3794857471 900758066 2887199683 +1720257997 3367494931 2668921229 955539029 3818726432 +1105704962 3889207255 2277369307 2746484505 1761846513 +2413916784 2685127085 4240257943 1166726899 4215215715 +3082092067 3960461946 1663304043 2087473241 4162589986 +2507310778 1579665506 767234210 970676017 492207530 +1441679602 1314785090 3262202570 3417091742 1561989210 +3011406780 1146609202 3262321040 1374872171 1634688712 +1280458888 2230023982 419323804 3262899800 39783310 +1641619040 1700368658 2207946628 2571300939 2424079766 + 780290914 2715195096 3390957695 163151474 2309534542 +1860018424 555755123 280320104 1604831083 2713022383 +1728987441 3639955502 623065489 3828630947 4275479050 +3516347383 2343951195 2430677756 635534992 3868699749 + 808442435 3070644069 4282166003 2093181383 2023555632 +1568662086 3422372620 4134522350 3016979543 3259320234 +2888030729 3185253876 4258779643 1267304371 1022517473 + 815943045 929020012 2995251018 3371283296 3608029049 +2018485115 122123397 2810669150 1411365618 1238391329 +1186786476 3155969091 2242941310 1765554882 279121160 +4279838515 1641578514 3796324015 13351065 103516986 +1609694427 551411743 2493771609 1316337047 3932650856 +4189700203 463397996 2937735066 1855616529 2626847990 + 55091862 3823351211 753448970 4045045500 1274127772 +1124182256 92039808 2126345552 425973257 386287896 +2589870191 1987762798 4084826973 2172456685 3366583455 +3602966653 2378803535 2901764433 3716929006 3710159000 +2653449155 3469742630 3096444476 3932564653 2595257433 + 318974657 3146202484 853571438 144400272 3768408841 + 782634401 2161109003 570039522 1886241521 14249488 +2230804228 1604941699 3928713335 3921942509 2155806892 + 134366254 430507376 1924011722 276713377 196481886 +3614810992 1610021185 1785757066 851346168 3761148643 +2918835642 3364422385 3012284466 3735958851 2643153892 +3778608231 1164289832 205853021 2876112231 3503398282 +3078397001 3472037921 1748894853 2740861475 316056182 +1660426908 168885906 956005527 3984354789 566521563 +1001109523 1216710575 2952284757 3834433081 3842608301 +2467352408 3974441264 3256601745 1409353924 1329904859 +2307560293 3125217879 3622920184 3832785684 3882365951 +2308537115 2659155028 1450441945 3532257603 3186324194 +1225603425 1124246549 175808705 3009142319 2796710159 +3651990107 160762750 1902254979 1698648476 1134980669 + 497144426 3302689335 4057485630 3603530763 4087252587 + 427812652 286876201 823134128 1627554964 3745564327 +2589226092 4202024494 62878473 3275585894 3987124064 +2791777159 1916869511 2585861905 1375038919 1403421920 + 60249114 3811870450 3021498009 2612993202 528933105 +2757361321 3341402964 2621861700 273128190 4015252178 +3094781002 1621621288 2337611177 1796718448 1258965619 +4241913140 2138560392 3022190223 4174180924 450094611 +3274724580 617150026 2704660665 1469700689 1341616587 + 356715071 1188789960 2278869135 1766569160 2795896635 + 57824704 2893496380 1235723989 1630694347 3927960522 + 428891364 1814070806 2287999787 4125941184 3968103889 +3548724050 1025597707 1404281500 2002212197 92429143 +2313943944 2403086080 3006180634 3561981764 1671860914 +1768520622 1803542985 844848113 3006139921 1410888995 +1157749833 2125704913 1789979528 1799263423 741157179 +2405862309 767040434 2655241390 3663420179 2172009096 +2511931187 1680542666 231857466 1154981000 157168255 +1454112128 3505872099 1929775046 2309422350 2143329496 +2960716902 407610648 2938108129 2581749599 538837155 +2342628867 430543915 740188568 1937713272 3315215132 +2085587024 4030765687 766054429 3517641839 689721775 +1294158986 1753287754 4202601348 1974852792 33459103 +3568087535 3144677435 1686130825 4134943013 3005738435 +3599293386 426570142 754104406 3660892564 1964545167 + 829466833 821587464 1746693036 1006492428 1595312919 +1256599985 1024482560 1897312280 2902903201 691790057 +1037515867 3176831208 1968401055 2173506824 1089055278 +1748401123 2941380082 968412354 1818753861 2973200866 +3875951774 1119354008 3988604139 1647155589 2232450826 +3486058011 3655784043 3759258462 847163678 1082052057 + 989516446 2871541755 3196311070 3929963078 658187585 +3664944641 2175149170 2203709147 2756014689 2456473919 +3890267390 1293787864 2830347984 3059280931 4158802520 +1561677400 2586570938 783570352 1355506163 31495586 +3789437343 3340549429 2092501630 896419368 671715824 +3530450081 3603554138 1055991716 3442308219 1499434728 +3130288473 3639507000 17769680 2259741420 487032199 +4227143402 3693771256 1880482820 3924810796 381462353 +4017855991 2452034943 2736680833 2209866385 2128986379 + 437874044 595759426 641721026 1636065708 3899136933 + 629879088 3591174506 351984326 2638783544 2348444281 +2341604660 2123933692 143443325 1525942256 364660499 + 599149312 939093251 1523003209 106601097 376589484 +1346282236 1297387043 764598052 3741218111 933457002 +1886424424 3219631016 525405256 3014235619 323149677 +2038881721 4100129043 2851715101 2984028078 1888574695 +2014194741 3515193880 4180573530 3461824363 2641995497 +3179230245 2902294983 2217320456 4040852155 1784656905 +3311906931 87498458 2752971818 2635474297 2831215366 +3682231106 2920043893 3772929704 2816374944 309949752 +2383758854 154870719 385111597 1191604312 1840700563 + 872191186 2925548701 1310412747 2102066999 1504727249 +3574298750 1191230036 3330575266 3180292097 3539347721 + 681369118 3305125752 3648233597 950049240 4173257693 +1760124957 512151405 681175196 580563018 1169662867 +4015033554 2687781101 699691603 2673494188 1137221356 + 123599888 472658308 1053598179 1012713758 3481064843 +3759461013 3981457956 3830587662 1877191791 3650996736 + 988064871 3515461600 4089077232 2225147448 1249609188 +2643151863 3896204135 2416995901 1397735321 3460025646 + +1000 outputs of genrand_real2() +0.76275443 0.99000644 0.98670464 0.10143112 0.27933125 +0.69867227 0.94218740 0.03427201 0.78842173 0.28180608 +0.92179002 0.20785655 0.54534773 0.69644020 0.38107718 +0.23978165 0.65286910 0.07514568 0.22765211 0.94872929 +0.74557914 0.62664415 0.54708246 0.90959343 0.42043116 +0.86334511 0.19189126 0.14718544 0.70259889 0.63426346 +0.77408121 0.04531601 0.04605807 0.88595519 0.69398270 +0.05377184 0.61711170 0.05565708 0.10133577 0.41500776 +0.91810699 0.22320679 0.23353705 0.92871862 0.98897234 +0.19786706 0.80558809 0.06961067 0.55840445 0.90479405 +0.63288060 0.95009721 0.54948447 0.20645042 0.45000959 +0.87050869 0.70806991 0.19406895 0.79286390 0.49332866 +0.78483914 0.75145146 0.12341941 0.42030252 0.16728160 +0.59906494 0.37575460 0.97815160 0.39815952 0.43595080 +0.04952478 0.33917805 0.76509902 0.61034321 0.90654701 +0.92915732 0.85365931 0.18812377 0.65913428 0.28814566 +0.59476081 0.27835931 0.60722542 0.68310435 0.69387186 +0.03699800 0.65897714 0.17527003 0.02889304 0.86777366 +0.12352068 0.91439461 0.32022990 0.44445731 0.34903686 +0.74639273 0.65918367 0.92492794 0.31872642 0.77749724 +0.85413832 0.76385624 0.32744211 0.91326300 0.27458185 +0.22190155 0.19865383 0.31227402 0.85321225 0.84243342 +0.78544200 0.71854080 0.92503892 0.82703064 0.88306297 +0.47284073 0.70059042 0.48003761 0.38671694 0.60465770 +0.41747204 0.47163243 0.72750808 0.65830223 0.10955369 +0.64215401 0.23456345 0.95944940 0.72822249 0.40888451 +0.69980355 0.26677428 0.57333635 0.39791582 0.85377858 +0.76962816 0.72004885 0.90903087 0.51376506 0.37732665 +0.12691640 0.71249738 0.81217908 0.37037313 0.32772374 +0.14238259 0.05614811 0.74363008 0.39773267 0.94859135 +0.31452454 0.11730313 0.62962618 0.33334237 0.45547255 +0.10089665 0.56550662 0.60539371 0.16027624 0.13245301 +0.60959939 0.04671662 0.99356286 0.57660859 0.40269560 +0.45274629 0.06699735 0.85064246 0.87742744 0.54508392 +0.87242982 0.29321385 0.67660627 0.68230715 0.79052073 +0.48592054 0.25186266 0.93769755 0.28565487 0.47219067 +0.99054882 0.13155240 0.47110470 0.98556600 0.84397623 +0.12875246 0.90953202 0.49129015 0.23792727 0.79481194 +0.44337770 0.96564297 0.67749118 0.55684872 0.27286897 +0.79538393 0.61965356 0.22487929 0.02226018 0.49248200 +0.42247006 0.91797788 0.99250134 0.23449967 0.52531508 +0.10246337 0.78685622 0.34310922 0.89892996 0.40454552 +0.68608407 0.30752487 0.83601319 0.54956031 0.63777550 +0.82199797 0.24890696 0.48801123 0.48661910 0.51223987 +0.32969635 0.31075073 0.21393155 0.73453207 0.15565705 +0.58584522 0.28976728 0.97621478 0.61498701 0.23891470 +0.28518540 0.46809591 0.18371914 0.37597910 0.13492176 +0.66849449 0.82811466 0.56240330 0.37548956 0.27562998 +0.27521910 0.74096121 0.77176757 0.13748143 0.99747138 +0.92504502 0.09175241 0.21389176 0.21766512 0.31183245 +0.23271221 0.21207367 0.57903312 0.77523344 0.13242613 +0.31037988 0.01204835 0.71652949 0.84487594 0.14982178 +0.57423142 0.45677888 0.48420169 0.53465428 0.52667473 +0.46880526 0.49849733 0.05670710 0.79022476 0.03872047 +0.21697212 0.20443086 0.28949326 0.81678186 0.87629474 +0.92297064 0.27373097 0.84625273 0.51505586 0.00582792 +0.33295971 0.91848412 0.92537226 0.91760033 0.07541125 +0.71745848 0.61158698 0.00941650 0.03135554 0.71527471 +0.24821915 0.63636652 0.86159918 0.26450229 0.60160194 +0.35557725 0.24477500 0.07186456 0.51757096 0.62120362 +0.97981062 0.69954667 0.21065616 0.13382753 0.27693186 +0.59644095 0.71500764 0.04110751 0.95730081 0.91600724 +0.47704678 0.26183479 0.34706971 0.07545431 0.29398385 +0.93236070 0.60486023 0.48015011 0.08870451 0.45548581 +0.91872718 0.38142712 0.10668643 0.01397541 0.04520355 +0.93822273 0.18011940 0.57577277 0.91427606 0.30911399 +0.95853475 0.23611214 0.69619891 0.69601980 0.76765372 +0.58515930 0.49479057 0.11288752 0.97187699 0.32095365 +0.57563608 0.40760618 0.78703383 0.43261152 0.90877651 +0.84686346 0.10599030 0.72872803 0.19315490 0.66152912 +0.10210518 0.06257876 0.47950688 0.47062066 0.72701157 +0.48915116 0.66110261 0.60170685 0.24516994 0.12726050 +0.03451185 0.90864994 0.83494878 0.94800035 0.91035206 +0.14480751 0.88458997 0.53498312 0.15963215 0.55378627 +0.35171349 0.28719791 0.09097957 0.00667896 0.32309622 +0.87561479 0.42534520 0.91748977 0.73908457 0.41793223 +0.99279792 0.87908370 0.28458072 0.59132853 0.98672190 +0.28547393 0.09452165 0.89910674 0.53681109 0.37931425 +0.62683489 0.56609740 0.24801549 0.52948179 0.98328855 +0.66403523 0.55523786 0.75886666 0.84784685 0.86829981 +0.71448906 0.84670080 0.43922919 0.20771016 0.64157936 +0.25664246 0.73055695 0.86395782 0.65852932 0.99061803 +0.40280575 0.39146298 0.07291005 0.97200603 0.20555729 +0.59616495 0.08138254 0.45796388 0.33681125 0.33989127 +0.18717090 0.53545811 0.60550838 0.86520709 0.34290701 +0.72743276 0.73023855 0.34195926 0.65019733 0.02765254 +0.72575740 0.32709576 0.03420866 0.26061893 0.56997511 +0.28439072 0.84422744 0.77637570 0.55982168 0.06720327 +0.58449067 0.71657369 0.15819609 0.58042821 0.07947911 +0.40193792 0.11376012 0.88762938 0.67532159 0.71223735 +0.27829114 0.04806073 0.21144026 0.58830274 0.04140071 +0.43215628 0.12952729 0.94668759 0.87391019 0.98382450 +0.27750768 0.90849647 0.90962737 0.59269720 0.96102026 +0.49544979 0.32007095 0.62585546 0.03119821 0.85953001 +0.22017528 0.05834068 0.80731217 0.53799961 0.74166948 +0.77426600 0.43938444 0.54862081 0.58575513 0.15886492 +0.73214332 0.11649057 0.77463977 0.85788827 0.17061997 +0.66838056 0.96076133 0.07949296 0.68521946 0.89986254 +0.05667410 0.12741385 0.83470977 0.63969104 0.46612929 +0.10200126 0.01194925 0.10476340 0.90285217 0.31221221 +0.32980614 0.46041971 0.52024973 0.05425470 0.28330912 +0.60426543 0.00598243 0.97244013 0.21135841 0.78561597 +0.78428734 0.63422849 0.32909934 0.44771136 0.27380750 +0.14966697 0.18156268 0.65686758 0.28726350 0.97074787 +0.63676171 0.96649494 0.24526295 0.08297372 0.54257548 +0.03166785 0.33735355 0.15946671 0.02102971 0.46228045 +0.11892296 0.33408336 0.29875681 0.29847692 0.73767569 +0.02080745 0.62980060 0.08082293 0.22993106 0.25031439 +0.87787525 0.45150053 0.13673441 0.63407612 0.97907688 +0.52241942 0.50580158 0.06273902 0.05270283 0.77031811 +0.05113352 0.24393329 0.75036441 0.37436336 0.22877652 +0.59975358 0.85707591 0.88691457 0.85547165 0.36641027 +0.58720133 0.45462835 0.09243817 0.32981586 0.07820411 +0.25421519 0.36004706 0.60092307 0.46192412 0.36758683 +0.98424170 0.08019934 0.68594024 0.45826386 0.29962317 +0.79365413 0.89231296 0.49478547 0.87645944 0.23590734 +0.28106737 0.75026285 0.08136314 0.79582424 0.76010628 +0.82792971 0.27947652 0.72482861 0.82191216 0.46171689 +0.79189752 0.96043686 0.51609668 0.88995725 0.28998963 +0.55191845 0.03934737 0.83033700 0.49553013 0.98009549 +0.19017594 0.98347750 0.33452066 0.87144372 0.72106301 +0.71272114 0.71465963 0.88361677 0.85571283 0.73782329 +0.20920458 0.34855153 0.46766817 0.02780062 0.74898344 +0.03680650 0.44866557 0.77426312 0.91025891 0.25195236 +0.87319953 0.63265037 0.25552148 0.27422476 0.95217406 +0.39281839 0.66441573 0.09158900 0.94515992 0.07800798 +0.02507888 0.39901462 0.17382573 0.12141278 0.85502334 +0.19902911 0.02160210 0.44460522 0.14688742 0.68020336 +0.71323733 0.60922473 0.95400380 0.99611159 0.90897777 +0.41073520 0.66206647 0.32064685 0.62805003 0.50677209 +0.52690101 0.87473387 0.73918362 0.39826974 0.43683919 +0.80459118 0.32422684 0.01958019 0.95319576 0.98326137 +0.83931735 0.69060863 0.33671416 0.68062550 0.65152380 +0.33392969 0.03451730 0.95227244 0.68200635 0.85074171 +0.64721009 0.51234433 0.73402047 0.00969637 0.93835057 +0.80803854 0.31485260 0.20089527 0.01323282 0.59933780 +0.31584602 0.20209563 0.33754800 0.68604181 0.24443049 +0.19952227 0.78162632 0.10336988 0.11360736 0.23536740 +0.23262256 0.67803776 0.48749791 0.74658435 0.92156640 +0.56706407 0.36683221 0.99157136 0.23421374 0.45183767 +0.91609720 0.85573315 0.37706276 0.77042618 0.30891908 +0.40709595 0.06944866 0.61342849 0.88817388 0.58734506 +0.98711323 0.14744128 0.63242656 0.87704136 0.68347125 +0.84446569 0.43265239 0.25146321 0.04130111 0.34259839 +0.92697368 0.40878778 0.56990338 0.76204273 0.19820348 +0.66314909 0.02482844 0.06669207 0.50205581 0.26084093 +0.65139159 0.41650223 0.09733904 0.56344203 0.62651696 +0.67332139 0.58037374 0.47258086 0.21010758 0.05713135 +0.89390629 0.10781246 0.32037450 0.07628388 0.34227964 +0.42190597 0.58201860 0.77363549 0.49595133 0.86031236 +0.83906769 0.81098161 0.26694195 0.14215941 0.88210306 +0.53634237 0.12090720 0.82480459 0.75930318 0.31847147 +0.92768077 0.01037616 0.56201727 0.88107122 0.35925856 +0.85860762 0.61109408 0.70408301 0.58434977 0.92192494 +0.62667915 0.75988365 0.06858761 0.36156496 0.58057195 +0.13636150 0.57719713 0.59340255 0.63530602 0.22976282 +0.71915530 0.41162531 0.63979565 0.09931342 0.79344045 +0.10893790 0.84450224 0.23122236 0.99485593 0.73637397 +0.17276368 0.13357764 0.74965804 0.64991737 0.61990341 +0.41523170 0.05878239 0.05687301 0.05497131 0.42868366 +0.42571090 0.25810502 0.89642955 0.30439758 0.39310223 +0.11357431 0.04288255 0.23397550 0.11200634 0.85621396 +0.89733974 0.37508865 0.42077265 0.68597384 0.72781399 +0.19296476 0.61699087 0.31667128 0.67756410 0.00177323 +0.05725176 0.79474693 0.18885238 0.06724856 0.68193156 +0.42202167 0.22082041 0.28554673 0.64995708 0.87851940 +0.29124547 0.61009521 0.87374537 0.05743712 0.69902994 +0.81925115 0.45653873 0.37236821 0.31118709 0.52734307 +0.39672836 0.38185294 0.30163915 0.17374510 0.04913278 +0.90404879 0.25742801 0.58266467 0.97663209 0.79823377 +0.36437958 0.15206043 0.26529938 0.22690047 0.05839021 +0.84721160 0.18622435 0.37809403 0.55706977 0.49828704 +0.47659049 0.24289680 0.88477595 0.07807463 0.56245739 +0.73490635 0.21099431 0.13164942 0.75840044 0.66877037 +0.28988183 0.44046090 0.24967434 0.80048356 0.26029740 +0.30416821 0.64151867 0.52067892 0.12880774 0.85465381 +0.02690525 0.19149288 0.49630295 0.79682619 0.43566145 +0.00288078 0.81484193 0.03763639 0.68529083 0.01339574 +0.38405386 0.30537067 0.22994703 0.44000045 0.27217985 +0.53831243 0.02870435 0.86282045 0.61831306 0.09164956 +0.25609707 0.07445781 0.72185784 0.90058883 0.30070608 +0.94476583 0.56822213 0.21933909 0.96772793 0.80063440 +0.26307906 0.31183306 0.16501252 0.55436179 0.68562285 +0.23829083 0.86511559 0.57868991 0.81888344 0.20126869 +0.93172350 0.66028129 0.21786948 0.78515828 0.10262106 +0.35390326 0.79303876 0.63427924 0.90479631 0.31024934 +0.60635447 0.56198079 0.63573813 0.91854197 0.99701497 +0.83085849 0.31692291 0.01925964 0.97446405 0.98751283 +0.60944293 0.13751018 0.69519957 0.68956636 0.56969015 +0.46440193 0.88341765 0.36754434 0.89223647 0.39786427 +0.85055280 0.12749961 0.79452122 0.89449784 0.14567830 +0.45716830 0.74822309 0.28200437 0.42546044 0.17464886 +0.68308746 0.65496587 0.52935411 0.12736159 0.61523955 +0.81590528 0.63107864 0.39786553 0.20102294 0.53292914 +0.75485590 0.59847044 0.32861691 0.12125866 0.58917183 +0.07638293 0.86845380 0.29192617 0.03989733 0.52180460 +0.32503407 0.64071852 0.69516575 0.74254998 0.54587026 +0.48713246 0.32920155 0.08719954 0.63497059 0.54328459 +0.64178757 0.45583809 0.70694291 0.85212760 0.86074305 +0.33163422 0.85739792 0.59908488 0.74566046 0.72157152 diff -r 63ae0dfc1767 -r d9a88cfd4ab6 mt19937/mtTest.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mt19937/mtTest.c Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,65 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + Copyright (C) 2005, Mutsuo Saito, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +#include +#include "mt19937ar.h" + +int main(void) +{ + int i; + unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; + init_by_array(init, length); + printf("1000 outputs of genrand_int32()\n"); + for (i=0; i<1000; i++) { + printf("%10lu ", genrand_int32()); + if (i%5==4) printf("\n"); + } + printf("\n1000 outputs of genrand_real2()\n"); + for (i=0; i<1000; i++) { + printf("%10.8f ", genrand_real2()); + if (i%5==4) printf("\n"); + } + return 0; +} diff -r 63ae0dfc1767 -r d9a88cfd4ab6 mt19937/readme-mt.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mt19937/readme-mt.txt Tue Jul 29 22:01:17 2008 +0000 @@ -0,0 +1,79 @@ +This is a Mersenne Twister pseudorandom number generator +with period 2^19937-1 with improved initialization scheme, +modified on 2002/1/26 by Takuji Nishimura and Makoto Matsumoto. +modified on 2005/4/26 by Mutsuo Saito + +Contents of this tar ball: +readme-mt.txt this file +mt19937ar.c the C source (ar: initialize by ARray) +mt19937ar.h the C header file for mt19937ar +mtTest.c the C test main program of mt19937ar.c +mt19937ar.out Test outputs of six types generators. 1000 for each + +1. Initialization + The initialization scheme for the previous versions of MT +(e.g. 1999/10/28 version or earlier) has a tiny problem, that +the most significant bits of the seed is not well reflected +to the state vector of MT. + +This version (2002/1/26) has two initialization schemes: +init_genrand(seed) and init_by_array(init_key, key_length). + +init_genrand(seed) initializes the state vector by using +one unsigned 32-bit integer "seed", which may be zero. + +init_by_array(init_key, key_length) initializes the state vector +by using an array init_key[] of unsigned 32-bit integers +of length key_kength. If key_length is smaller than 624, +then each array of 32-bit integers gives distinct initial +state vector. This is useful if you want a larger seed space +than 32-bit word. + +2. Generation +After initialization, the following type of pseudorandom numbers +are available. + +genrand_int32() generates unsigned 32-bit integers. +genrand_int31() generates unsigned 31-bit integers. +genrand_real1() generates uniform real in [0,1] (32-bit resolution). +genrand_real2() generates uniform real in [0,1) (32-bit resolution). +genrand_real3() generates uniform real in (0,1) (32-bit resolution). +genrand_res53() generates uniform real in [0,1) with 53-bit resolution. + +Note: the last five functions call the first one. +if you need more speed for these five functions, you may +suppress the function call by copying genrand_int32() and +replacing the last return(), following to these five functions. + +3. main() +main() is an example to initialize with an array of length 4, +then 1000 outputs of unsigned 32-bit integers, +then 1000 outputs of real [0,1) numbers. + +4. The outputs +The output of the mt19937ar.c is in the file mt19937ar.out. +If you revise or translate the code, check the output +by using this file. + +5. Cryptography +This generator is not cryptoraphically secure. +You need to use a one-way (or hash) function to obtain +a secure random sequence. + +6. Correspondence +See: +URL http://www.math.keio.ac.jp/matumoto/emt.html +email matumoto@math.keio.ac.jp, nisimura@sci.kj.yamagata-u.ac.jp + +7. Reference +M. Matsumoto and T. Nishimura, +"Mersenne Twister: A 623-Dimensionally Equidistributed Uniform +Pseudo-Random Number Generator", +ACM Transactions on Modeling and Computer Simulation, +Vol. 8, No. 1, January 1998, pp 3--30. + +------- +Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, +All rights reserved. +Copyright (C) 2005, Mutsuo Saito +All rights reserved. diff -r 63ae0dfc1767 -r d9a88cfd4ab6 query.cpp --- a/query.cpp Tue Jul 22 20:09:31 2008 +0000 +++ b/query.cpp Tue Jul 29 22:01:17 2008 +0000 @@ -1,5 +1,4 @@ #include "audioDB.h" - #include "reporter.h" bool audioDB::powers_acceptable(double p1, double p2) { @@ -17,50 +16,88 @@ } void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { - initTables(dbName, inFile); - Reporter *r = 0; + // init database tables and dbH first + if(query_from_key) + initTables(dbName); + else + initTables(dbName, inFile); + + // keyKeyPos requires dbH to be initialized + if(query_from_key && (!key || (query_from_key_index = getKeyPos((char*)key))==O2_ERR_KEYNOTFOUND)) + error("Query key not found :",key); + switch (queryType) { case O2_POINT_QUERY: sequenceLength = 1; normalizedDistance = false; - r = new pointQueryReporter >(pointNN); + reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); break; case O2_TRACK_QUERY: sequenceLength = 1; normalizedDistance = false; - r = new trackAveragingReporter >(pointNN, trackNN, dbH->numFiles); + reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, dbH->numFiles); break; - case O2_SEQUENCE_QUERY: + case O2_SEQUENCE_QUERY: + if(no_unit_norming) + normalizedDistance = false; if(radius == 0) { - r = new trackAveragingReporter >(pointNN, trackNN, dbH->numFiles); + reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, dbH->numFiles); } else { - r = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); + if(index_exists(dbName, radius, sequenceLength)){ + char* indexName = index_get_name(dbName, radius, sequenceLength); + lsh = new LSH(indexName); + assert(lsh); + reporter = new trackSequenceQueryRadReporter(trackNN, index_to_trackID(lsh->get_maxp())+1); + delete lsh; + delete[] indexName; + } + else + reporter = new trackSequenceQueryRadReporter(trackNN, dbH->numFiles); } break; - case O2_N_SEQUENCE_QUERY : + case O2_N_SEQUENCE_QUERY: + if(no_unit_norming) + normalizedDistance = false; if(radius == 0) { - r = new trackSequenceQueryNNReporter >(pointNN, trackNN, dbH->numFiles); + reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, dbH->numFiles); } else { - r = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); + if(index_exists(dbName, radius, sequenceLength)){ + char* indexName = index_get_name(dbName, radius, sequenceLength); + lsh = new LSH(indexName); + assert(lsh); + reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, index_to_trackID(lsh->get_maxp())+1); + delete lsh; + delete[] indexName; + } + else + reporter = new trackSequenceQueryRadNNReporter(pointNN,trackNN, dbH->numFiles); } break; case O2_ONE_TO_ONE_N_SEQUENCE_QUERY : if(radius == 0) { error("query-type not yet supported"); } else { - r = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); + reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, dbH->numFiles); } break; default: error("unrecognized queryType in query()"); } - query_loop(dbName, inFile, r); - r->report(fileTable, adbQueryResponse); - delete r; + + // Test for index (again) here + if(radius && index_exists(dbName, radius, sequenceLength)) + index_query_loop(dbName, query_from_key_index); + else + query_loop(dbName, query_from_key_index); + + reporter->report(fileTable, adbQueryResponse); } // return ordinal position of key in keyTable +// this should really be a STL hash map search unsigned audioDB::getKeyPos(char* key){ + if(!dbH) + error("dbH not initialized","getKeyPos"); for(unsigned k=0; knumFiles; k++) if(strncmp(fileTable + k*O2_FILETABLE_ENTRY_SIZE, key, strlen(key))==0) return k; @@ -155,8 +192,8 @@ sp = DD[j]; spd = D[j+w] + w; k = trackTable[track] - w; - while(k--) - *sp++ += *spd++; + while(k--) + *sp++ += *spd++; } } } else { // HOP_SIZE != 1 @@ -211,7 +248,7 @@ // actual query point's information. -- CSR, 2007-12-05 void audioDB::set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp) { *nvp = (statbuf.st_size - sizeof(int)) / (dbH->dim * sizeof(double)); - + if(!(dbH->flags & O2_FLAG_L2NORM)) { error("Database must be L2 normed for sequence query","use -L2NORM"); } @@ -285,6 +322,92 @@ } } +// Does the same as set_up_query(...) but from database features instead of from a file +// Constructs the same outputs as set_up_query +void audioDB::set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex) { + if(!trackTable) + error("trackTable not initialized","set_up_query_from_key"); + + if(!(dbH->flags & O2_FLAG_L2NORM)) { + error("Database must be L2 normed for sequence query","use -L2NORM"); + } + + if(dbH->flags & O2_FLAG_POWER) + usingPower = true; + + if(dbH->flags & O2_FLAG_TIMES) + usingTimes = true; + + *nvp = trackTable[queryIndex]; + if(*nvp < sequenceLength) { + error("Query shorter than requested sequence length", "maybe use -l"); + } + + VERB_LOG(1, "performing norms... "); + + // Read query feature vectors from database + *qp = NULL; + lseek(dbfid, dbH->dataOffset + trackOffsetTable[queryIndex] * sizeof(double), SEEK_SET); + size_t allocatedSize = 0; + read_data(queryIndex, qp, &allocatedSize); + // Consistency check on allocated memory and query feature size + if(*nvp*sizeof(double)*dbH->dim != allocatedSize) + error("Query memory allocation failed consitency check","set_up_query_from_key"); + + Uns32T trackIndexOffset = trackOffsetTable[queryIndex]/dbH->dim; // Convert num data elements to num vectors + // Copy L2 norm partial-sum coefficients + assert(*qnp = new double[*nvp]); + memcpy(*qnp, l2normTable+trackIndexOffset, *nvp*sizeof(double)); + sequence_sum(*qnp, *nvp, sequenceLength); + sequence_sqrt(*qnp, *nvp, sequenceLength); + + if( usingPower ){ + // Copy Power partial-sum coefficients + assert(*qpp = new double[*nvp]); + memcpy(*qpp, powerTable+trackIndexOffset, *nvp*sizeof(double)); + sequence_sum(*qpp, *nvp, sequenceLength); + sequence_average(*qpp, *nvp, sequenceLength); + } + + if (usingTimes) { + unsigned int k; + *mqdp = 0.0; + double *querydurs = new double[*nvp]; + double *timesdata = new double[*nvp*2]; + assert(querydurs && timesdata); + memcpy(timesdata, timesTable+trackIndexOffset, *nvp*sizeof(double)); + for(k = 0; k < *nvp; k++) { + querydurs[k] = timesdata[2*k+1] - timesdata[2*k]; + *mqdp += querydurs[k]; + } + *mqdp /= k; + + VERB_LOG(1, "mean query file duration: %f\n", *mqdp); + + delete [] querydurs; + delete [] timesdata; + } + // Defaults, for exhaustive search (!usingQueryPoint) + *vqp = *qp; + *vqnp = *qnp; + *vqpp = *qpp; + + if(usingQueryPoint) { + if(queryPoint > *nvp || queryPoint > *nvp - sequenceLength + 1) { + error("queryPoint > numVectors-wL+1 in query"); + } else { + VERB_LOG(1, "query point: %u\n", queryPoint); + *vqp = *qp + queryPoint * dbH->dim; + *vqnp = *qnp + queryPoint; + if (usingPower) { + *vqpp = *qpp + queryPoint; + } + *nvp = sequenceLength; + } + } +} + + // FIXME: this is not the right name; we're not actually setting up // the database, but copying various bits of it out of mmap()ed tables // in order to reduce seeks. @@ -341,14 +464,98 @@ *vspp = *spp; } -void audioDB::query_loop(const char* dbName, const char* inFile, Reporter *reporter) { +// query_points() +// +// using PointPairs held in the exact_evaluation_queue compute squared distance for each PointPair +// and insert result into the current reporter. +// +// Preconditions: +// A query inFile has been opened with setup_query(...) and query pointers initialized +// The database contains some points +// An exact_evaluation_queue has been allocated and populated +// A reporter has been allocated +// +// Postconditions: +// reporter contains the points and distances that meet the reporter constraints + +void audioDB::query_loop_points(double* query, double* qnPtr, double* qpPtr, double meanQdur, Uns32T numVectors){ + unsigned int dbVectors; + double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; + double *meanDBdur = 0; + + // check pre-conditions + assert(exact_evaluation_queue&&reporter); + if(!exact_evaluation_queue->size()) // Exit if no points to evaluate + return; + + // Compute database info + // FIXME: we more than likely don't need very much of the database + // so make a new method to build these values per-track or, even better, per-point + set_up_db(&sNorm, &snPtr, &sPower, &spPtr, &meanDBdur, &dbVectors); + + VERB_LOG(1, "matching points..."); + + assert(pointNN>0 && pointNN<=O2_MAXNN); + assert(trackNN>0 && trackNN<=O2_MAXNN); + + // We are guaranteed that the order of points is sorted by: + // qpos, trackID, spos + // so we can be relatively efficient in initialization of track data. + // Here we assume that points don't overlap, so we will use exhaustive dot + // product evaluation over the sequence + double dist; + size_t data_buffer_size = 0; + double *data_buffer = 0; + Uns32T trackOffset; + Uns32T trackIndexOffset; + Uns32T currentTrack = 0x80000000; // Initialize with a value outside of track index range + Uns32T npairs = exact_evaluation_queue->size(); + while(npairs--){ + PointPair pp = exact_evaluation_queue->top(); + trackOffset=trackOffsetTable[pp.trackID]; // num data elements offset + trackIndexOffset=trackOffset/dbH->dim; // num vectors offset + if((!(usingPower) || powers_acceptable(qpPtr[usingQueryPoint?0:pp.qpos], sPower[trackIndexOffset+pp.spos])) && + ((usingQueryPoint?0:pp.qpos) < numVectors-sequenceLength+1 && pp.spos < trackTable[pp.trackID]-sequenceLength+1)){ + if(currentTrack!=pp.trackID){ + currentTrack=pp.trackID; + lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); + read_data(currentTrack, &data_buffer, &data_buffer_size); + } + dist = dot_product_points(query+(usingQueryPoint?0:pp.qpos*dbH->dim), data_buffer+pp.spos*dbH->dim, dbH->dim*sequenceLength); + if(normalizedDistance) + dist = 2-(2/(qnPtr[usingQueryPoint?0:pp.qpos]*sNorm[trackIndexOffset+pp.spos]))*dist; + else + if(no_unit_norming) + dist = qnPtr[usingQueryPoint?0:pp.qpos]*qnPtr[usingQueryPoint?0:pp.qpos]+sNorm[trackIndexOffset+pp.spos]*sNorm[trackIndexOffset+pp.spos] - 2*dist; + // else + // dist = dist; + if((!radius) || dist <= (radius+O2_DISTANCE_TOLERANCE)) + reporter->add_point(pp.trackID, pp.qpos, pp.spos, dist); + } + exact_evaluation_queue->pop(); + } +} + +// A completely unprotected dot-product method +// Caller is responsible for ensuring that memory is within bounds +inline double audioDB::dot_product_points(double* q, double* p, Uns32T L){ + double dist = 0.0; + while(L--) + dist += *q++ * *p++; + return dist; +} + +void audioDB::query_loop(const char* dbName, Uns32T queryIndex) { unsigned int numVectors; double *query, *query_data; double *qNorm, *qnPtr, *qPower = 0, *qpPtr = 0; double meanQdur; - set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); + if(query_from_key) + set_up_query_from_key(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors, queryIndex); + else + set_up_query(&query_data, &query, &qNorm, &qnPtr, &qPower, &qpPtr, &meanQdur, &numVectors); unsigned int dbVectors; double *sNorm, *snPtr, *sPower = 0, *spPtr = 0; @@ -365,21 +572,12 @@ double **D = 0; // Differences query and target double **DD = 0; // Matched filter distance - D = new double*[numVectors]; + D = new double*[numVectors]; // pre-allocate DD = new double*[numVectors]; gettimeofday(&tv1, NULL); unsigned processedTracks = 0; - - // build track offset table - off_t *trackOffsetTable = new off_t[dbH->numFiles]; - unsigned cumTrack=0; off_t trackIndexOffset; - for(k = 0; k < dbH->numFiles; k++){ - trackOffsetTable[k] = cumTrack; - cumTrack += trackTable[k] * dbH->dim; - } - char nextKey[MAXSTR]; // Track loop @@ -403,6 +601,18 @@ } } + // skip identity on query_from_key + if( query_from_key && (track == queryIndex) ) { + if(queryIndex!=dbH->numFiles-1){ + track++; + trackOffset = trackOffsetTable[track]; + lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); + } + else{ + break; + } + } + trackIndexOffset=trackOffset/dbH->dim; // numVectors offset read_data(track, &data_buffer, &data_buffer_size); @@ -425,15 +635,18 @@ for(j = 0; j <= numVectors - wL; j += HOP_SIZE) { for(k = 0; k <= trackTable[track] - wL; k += HOP_SIZE) { double thisDist; - if(normalizedDistance) { + if(normalizedDistance) thisDist = 2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; - } else { - thisDist = DD[j][k]; - } + else + if(no_unit_norming) + thisDist = qnPtr[j]*qnPtr[j]+sNorm[trackIndexOffset+k]*sNorm[trackIndexOffset+k] - 2*DD[j][k]; + else + thisDist = DD[j][k]; + // Power test if ((!usingPower) || powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k])) { // radius test - if((!radius) || thisDist < radius) { + if((!radius) || thisDist <= (radius+O2_DISTANCE_TOLERANCE)) { reporter->add_point(track, usingQueryPoint ? queryPoint : j, k, thisDist); } } @@ -452,8 +665,6 @@ (tv1.tv_sec*1000 + tv1.tv_usec/1000)) // Clean up - if(trackOffsetTable) - delete[] trackOffsetTable; if(query_data) delete[] query_data; if(qNorm) @@ -493,3 +704,5 @@ } VERB_LOG(2, "done.\n"); } + + diff -r 63ae0dfc1767 -r d9a88cfd4ab6 reporter.h --- a/reporter.h Tue Jul 22 20:09:31 2008 +0000 +++ b/reporter.h Tue Jul 29 22:01:17 2008 +0000 @@ -1,8 +1,13 @@ +#ifndef __REPORTER_H +#define __REPORTER_H + #include #include -#include #include #include +#include +#include "ReporterBase.h" +#include "audioDB.h" typedef struct nnresult { unsigned int trackID; @@ -32,7 +37,7 @@ return a.count > b.count; } -class Reporter { +class Reporter : public ReporterBase { public: virtual ~Reporter() {}; virtual void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) = 0; @@ -42,7 +47,7 @@ // adbQueryResponse thing everywhere; the fileTable argument is // there solely for convertion trackIDs into names. -- CSR, // 2007-12-10. - virtual void report(char *fileTable, adb__queryResponse *adbQueryResponse) = 0; + virtual void report(char *fileTable, void* adbQueryResponse) = 0; }; template class pointQueryReporter : public Reporter { @@ -50,7 +55,7 @@ pointQueryReporter(unsigned int pointNN); ~pointQueryReporter(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); + void report(char *fileTable, void* adbQueryResponse); private: unsigned int pointNN; std::priority_queue< NNresult, std::vector< NNresult >, T> *queue; @@ -79,7 +84,7 @@ } } -template void pointQueryReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { +template void pointQueryReporter::report(char *fileTable, void *adbQueryResponse) { NNresult r; std::vector v; unsigned int size = queue->size(); @@ -93,26 +98,32 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + if(fileTable) + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << r.trackID << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { - adbQueryResponse->result.__sizeRlist=size; - adbQueryResponse->result.__sizeDist=size; - adbQueryResponse->result.__sizeQpos=size; - adbQueryResponse->result.__sizeSpos=size; - adbQueryResponse->result.Rlist= new char*[size]; - adbQueryResponse->result.Dist = new double[size]; - adbQueryResponse->result.Qpos = new unsigned int[size]; - adbQueryResponse->result.Spos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeRlist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeDist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeQpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeSpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.Rlist= new char*[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist = new double[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Spos = new unsigned int[size]; unsigned int k = 0; for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { r = *rit; - adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; - adbQueryResponse->result.Dist[k] = r.dist; - adbQueryResponse->result.Qpos[k] = r.qpos; - adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + ((adb__queryResponse*)adbQueryResponse)->result.Rlist[k] = new char[O2_MAXFILESTR]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist[k] = r.dist; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos[k] = r.qpos; + ((adb__queryResponse*)adbQueryResponse)->result.Spos[k] = r.spos; + if(fileTable) + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + else + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%d", r.trackID); } } } @@ -122,7 +133,7 @@ trackAveragingReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); ~trackAveragingReporter(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); + void report(char *fileTable, void *adbQueryResponse); protected: unsigned int pointNN; unsigned int trackNN; @@ -153,7 +164,7 @@ } } -template void trackAveragingReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { +template void trackAveragingReporter::report(char *fileTable, void *adbQueryResponse) { std::priority_queue < NNresult, std::vector< NNresult>, T> result; for (int i = numFiles-1; i >= 0; i--) { unsigned int size = queues[i].size(); @@ -194,105 +205,36 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + if(fileTable) + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << r.trackID << " "; std::cout << r.dist << " " << r.qpos << " " << r.spos << std::endl; } } else { - adbQueryResponse->result.__sizeRlist=size; - adbQueryResponse->result.__sizeDist=size; - adbQueryResponse->result.__sizeQpos=size; - adbQueryResponse->result.__sizeSpos=size; - adbQueryResponse->result.Rlist= new char*[size]; - adbQueryResponse->result.Dist = new double[size]; - adbQueryResponse->result.Qpos = new unsigned int[size]; - adbQueryResponse->result.Spos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeRlist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeDist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeQpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeSpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.Rlist= new char*[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist = new double[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Spos = new unsigned int[size]; unsigned int k = 0; for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { r = *rit; - adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; - adbQueryResponse->result.Dist[k] = r.dist; - adbQueryResponse->result.Qpos[k] = r.qpos; - adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + ((adb__queryResponse*)adbQueryResponse)->result.Rlist[k] = new char[O2_MAXFILESTR]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist[k] = r.dist; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos[k] = r.qpos; + ((adb__queryResponse*)adbQueryResponse)->result.Spos[k] = r.spos; + if(fileTable) + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + else + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%d", r.trackID); } } } -// track Sequence Query Radius Reporter -// only return tracks and retrieved point counts -class trackSequenceQueryRadReporter : public Reporter { -public: - trackSequenceQueryRadReporter(unsigned int trackNN, unsigned int numFiles); - ~trackSequenceQueryRadReporter(); - void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); - protected: - unsigned int trackNN; - unsigned int numFiles; - std::set > *set; - unsigned int *count; -}; - -trackSequenceQueryRadReporter::trackSequenceQueryRadReporter(unsigned int trackNN, unsigned int numFiles): - trackNN(trackNN), numFiles(numFiles) { - set = new std::set >; - count = new unsigned int[numFiles]; - for (unsigned i = 0; i < numFiles; i++) { - count[i] = 0; - } -} - -trackSequenceQueryRadReporter::~trackSequenceQueryRadReporter() { - delete set; - delete [] count; -} - -void trackSequenceQueryRadReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { - std::set >::iterator it; - std::pair pair = std::make_pair(trackID, qpos); // only count this once - it = set->find(pair); - if (it == set->end()) { - set->insert(pair); - count[trackID]++; // only count if pair is unique - } -} - -void trackSequenceQueryRadReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { - std::priority_queue < Radresult, std::vector, std::greater > result; - // KLUDGE: doing this backwards in an attempt to get the same - // tiebreak behaviour as before. - for (int i = numFiles-1; i >= 0; i--) { - Radresult r; - r.trackID = i; - r.count = count[i]; - if(r.count > 0) { - result.push(r); - if (result.size() > trackNN) { - result.pop(); - } - } - } - - Radresult r; - std::vector v; - unsigned int size = result.size(); - for(unsigned int k = 0; k < size; k++) { - r = result.top(); - v.push_back(r); - result.pop(); - } - std::vector::reverse_iterator rit; - - if(adbQueryResponse==0) { - for(rit = v.rbegin(); rit < v.rend(); rit++) { - r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; - } - } else { - // FIXME - } -} - // Another type of trackAveragingReporter that reports all pointNN nearest neighbours template class trackSequenceQueryNNReporter : public trackAveragingReporter { protected: @@ -302,15 +244,16 @@ using trackAveragingReporter::pointNN; public: trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); + void report(char *fileTable, void *adbQueryResponse); }; template trackSequenceQueryNNReporter::trackSequenceQueryNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles) :trackAveragingReporter(pointNN, trackNN, numFiles){} -template void trackSequenceQueryNNReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { +template void trackSequenceQueryNNReporter::report(char *fileTable, void *adbQueryResponse) { std::priority_queue < NNresult, std::vector< NNresult>, T> result; - std::priority_queue< NNresult, std::vector< NNresult>, std::less > *point_queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::less >[numFiles]; + std::priority_queue< NNresult, std::vector< NNresult>, std::less > *point_queues + = new std::priority_queue< NNresult, std::vector< NNresult>, std::less >[numFiles]; for (int i = numFiles-1; i >= 0; i--) { unsigned int size = queues[i].size(); @@ -353,7 +296,11 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.dist << std::endl; + if(fileTable) + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << r.trackID << " "; + std::cout << r.dist << std::endl; unsigned int qsize = point_queues[r.trackID].size(); // Reverse the order of the points stored in point_queues for(unsigned int k=0; k < qsize; k++){ @@ -368,90 +315,126 @@ } } } else { - adbQueryResponse->result.__sizeRlist=size; - adbQueryResponse->result.__sizeDist=size; - adbQueryResponse->result.__sizeQpos=size; - adbQueryResponse->result.__sizeSpos=size; - adbQueryResponse->result.Rlist= new char*[size]; - adbQueryResponse->result.Dist = new double[size]; - adbQueryResponse->result.Qpos = new unsigned int[size]; - adbQueryResponse->result.Spos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeRlist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeDist=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeQpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.__sizeSpos=size; + ((adb__queryResponse*)adbQueryResponse)->result.Rlist= new char*[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist = new double[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos = new unsigned int[size]; + ((adb__queryResponse*)adbQueryResponse)->result.Spos = new unsigned int[size]; unsigned int k = 0; for(rit = v.rbegin(); rit < v.rend(); rit++, k++) { r = *rit; - adbQueryResponse->result.Rlist[k] = new char[O2_MAXFILESTR]; - adbQueryResponse->result.Dist[k] = r.dist; - adbQueryResponse->result.Qpos[k] = r.qpos; - adbQueryResponse->result.Spos[k] = r.spos; - snprintf(adbQueryResponse->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + ((adb__queryResponse*)adbQueryResponse)->result.Rlist[k] = new char[O2_MAXFILESTR]; + ((adb__queryResponse*)adbQueryResponse)->result.Dist[k] = r.dist; + ((adb__queryResponse*)adbQueryResponse)->result.Qpos[k] = r.qpos; + ((adb__queryResponse*)adbQueryResponse)->result.Spos[k] = r.spos; + if(fileTable) + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%s", fileTable+r.trackID*O2_FILETABLE_ENTRY_SIZE); + else + snprintf(((adb__queryResponse*)adbQueryResponse)->result.Rlist[k], O2_MAXFILESTR, "%d", r.trackID); } } // clean up delete[] point_queues; } +/********************** Radius Reporters **************************/ -// track Sequence Query Radius NN Reporter -// retrieve tracks ordered by query-point matches (one per track per query point) -// -// as well as sorted n-NN points per retrieved track -class trackSequenceQueryRadNNReporter : public Reporter { +class triple{ + public: + unsigned int a; + unsigned int b; + unsigned int c; + + triple(void); + triple(unsigned int, unsigned int, unsigned int); + unsigned int first(); + unsigned int second(); + unsigned int third(); +}; + +triple& make_triple(unsigned int, unsigned int, unsigned int); + +triple::triple(unsigned int a, unsigned int b, unsigned int c):a(a),b(b),c(c){} + +unsigned int triple::first(){return a;} +unsigned int triple::second(){return b;} +unsigned int triple::third(){return c;} + +triple::triple():a(0),b(0),c(0){} + +bool operator< (const triple &t1, const triple &t2) { + return ((t1.a < t2.a) || + ((t1.a == t2.a) && ((t1.b < t2.b) || + ((t1.b == t2.b) && (t1.c < t2.c))))); +} +bool operator== (const triple &t1, const triple &t2) { + return ((t1.a == t2.a) && (t1.b == t2.b) && (t1.c == t2.c)); +} + +triple& make_triple(unsigned int a, unsigned int b, unsigned int c){ + triple* t = new triple(a,b,c); + return *t; +} + + +// track Sequence Query Radius Reporter +// only return tracks and retrieved point counts +class trackSequenceQueryRadReporter : public Reporter { public: - trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); - ~trackSequenceQueryRadNNReporter(); + trackSequenceQueryRadReporter(unsigned int trackNN, unsigned int numFiles); + ~trackSequenceQueryRadReporter(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); + void report(char *fileTable, void *adbQueryResponse); protected: - unsigned int pointNN; unsigned int trackNN; unsigned int numFiles; - std::set< NNresult > *set; - std::priority_queue< NNresult, std::vector< NNresult>, std::less > *point_queues; + std::set > *set; + std::set< triple > *set_triple; unsigned int *count; }; -trackSequenceQueryRadNNReporter::trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles): -pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) { - // Where to count Radius track matches (one-to-one) - set = new std::set< NNresult >; - // Where to insert individual point matches (one-to-many) - point_queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::less >[numFiles]; - +trackSequenceQueryRadReporter::trackSequenceQueryRadReporter(unsigned int trackNN, unsigned int numFiles): + trackNN(trackNN), numFiles(numFiles) { + set = new std::set >; + set_triple = new std::set >; count = new unsigned int[numFiles]; for (unsigned i = 0; i < numFiles; i++) { count[i] = 0; } } -trackSequenceQueryRadNNReporter::~trackSequenceQueryRadNNReporter() { +trackSequenceQueryRadReporter::~trackSequenceQueryRadReporter() { delete set; + delete set_triple; delete [] count; } -void trackSequenceQueryRadNNReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { - std::set< NNresult >::iterator it; - NNresult r; - r.trackID = trackID; - r.qpos = qpos; - r.dist = dist; - r.spos = spos; +void trackSequenceQueryRadReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + std::set >::iterator it; + std::pair pair = std::make_pair(trackID, qpos); // only count this once + std::set::iterator it2; + triple triple; - // Record all matching points (within radius) - if (!isnan(dist)) { - point_queues[trackID].push(r); - if(point_queues[trackID].size() > pointNN) - point_queues[trackID].pop(); - } + triple = make_triple(trackID, qpos, spos); // only count this once + + // Record unique triples (record one collision from all hash tables) + it2 = set_triple->find(triple); - // Record counts of pairs - it = set->find(r); - if (it == set->end()) { - set->insert(r); - count[trackID]++; + if(it2 == set_triple->end()){ + set_triple->insert(triple); + + it = set->find(pair); + if (it == set->end()) { + set->insert(pair); + count[trackID]++; // only count if pair is unique + } } } -void trackSequenceQueryRadNNReporter::report(char *fileTable, adb__queryResponse *adbQueryResponse) { +void trackSequenceQueryRadReporter::report(char *fileTable, void *adbQueryResponse) { std::priority_queue < Radresult, std::vector, std::greater > result; // KLUDGE: doing this backwards in an attempt to get the same // tiebreak behaviour as before. @@ -475,6 +458,120 @@ v.push_back(r); result.pop(); } + std::vector::reverse_iterator rit; + + if(adbQueryResponse==0) { + for(rit = v.rbegin(); rit < v.rend(); rit++) { + r = *rit; + if(fileTable) + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << r.trackID << " "; + std::cout << r.count << std::endl; + } + } else { + // FIXME + } +} + +// track Sequence Query Radius NN Reporter +// retrieve tracks ordered by query-point matches (one per track per query point) +// +// as well as sorted n-NN points per retrieved track +class trackSequenceQueryRadNNReporter : public Reporter { +public: + trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); + ~trackSequenceQueryRadNNReporter(); + void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); + void report(char *fileTable, void *adbQueryResponse); + protected: + unsigned int pointNN; + unsigned int trackNN; + unsigned int numFiles; + std::set > *set; + std::set< triple > *set_triple; + std::priority_queue< NNresult, std::vector< NNresult>, std::less > *point_queues; + unsigned int *count; +}; + +trackSequenceQueryRadNNReporter::trackSequenceQueryRadNNReporter(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles): +pointNN(pointNN), trackNN(trackNN), numFiles(numFiles) { + // Where to count Radius track matches (one-to-one) + set = new std::set >; + set_triple = new std::set >; + // Where to insert individual point matches (one-to-many) + point_queues = new std::priority_queue< NNresult, std::vector< NNresult>, std::less >[numFiles]; + + count = new unsigned int[numFiles]; + for (unsigned i = 0; i < numFiles; i++) { + count[i] = 0; + } +} + +trackSequenceQueryRadNNReporter::~trackSequenceQueryRadNNReporter() { + delete set; + delete set_triple; + delete [] count; +} + +void trackSequenceQueryRadNNReporter::add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist) { + std::set >::iterator it; + std::set::iterator it2; + std::pair pair; + triple triple; + NNresult r; + + pair = std::make_pair(trackID, qpos); // only count this once + triple = make_triple(trackID, qpos, spos); // only count this once + // Record unique triples (record one collision from all hash tables) + it2 = set_triple->find(triple); + if(it2 == set_triple->end()){ + set_triple->insert(triple); + // Record all matching points (within radius) + // Record counts of pairs + it = set->find(pair); + if (it == set->end()) { + set->insert(pair); + count[trackID]++; + if (!isnan(dist)) { + r.trackID = trackID; + r.qpos = qpos; + r.dist = dist; + r.spos = spos; + point_queues[trackID].push(r); + if(point_queues[trackID].size() > pointNN) + point_queues[trackID].pop(); + } + } + } +} + +void trackSequenceQueryRadNNReporter::report(char *fileTable, void *adbQueryResponse) { + std::priority_queue < Radresult, std::vector, std::greater > result; + // KLUDGE: doing this backwards in an attempt to get the same + // tiebreak behaviour as before. + Radresult r; + NNresult rk; + + for (int i = numFiles-1; i >= 0; i--) { + r.trackID = i; + r.count = count[i]; + if(r.count > 0) { + cout.flush(); + result.push(r); + if (result.size() > trackNN) { + result.pop(); + } + } + } + + std::vector v; + unsigned int size = result.size(); + for(unsigned int k = 0; k < size; k++) { + r = result.top(); + v.push_back(r); + result.pop(); + } // Traverse tracks in descending order of count cardinality @@ -484,7 +581,11 @@ if(adbQueryResponse==0) { for(rit = v.rbegin(); rit < v.rend(); rit++) { r = *rit; - std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " " << r.count << std::endl; + if(fileTable) + std::cout << fileTable + r.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << r.trackID << " "; + std::cout << r.count << std::endl; // Reverse the order of the points stored in point_queues unsigned int qsize=point_queues[r.trackID].size(); @@ -492,9 +593,8 @@ point_queue.push(point_queues[r.trackID].top()); point_queues[r.trackID].pop(); } - for(unsigned int k=0; k < qsize; k++){ - NNresult rk = point_queue.top(); + rk = point_queue.top(); std::cout << rk.dist << " " << rk.qpos << " " << rk.spos << std::endl; point_queue.pop(); } @@ -505,7 +605,6 @@ delete[] point_queues; } - /********** ONE-TO-ONE REPORTERS *****************/ // track Sequence Query Radius NN Reporter One-to-One @@ -516,7 +615,7 @@ trackSequenceQueryRadNNReporterOneToOne(unsigned int pointNN, unsigned int trackNN, unsigned int numFiles); ~trackSequenceQueryRadNNReporterOneToOne(); void add_point(unsigned int trackID, unsigned int qpos, unsigned int spos, double dist); - void report(char *fileTable, adb__queryResponse *adbQueryResponse); + void report(char *fileTable, void *adbQueryResponse); protected: unsigned int pointNN; unsigned int trackNN; @@ -564,7 +663,7 @@ } -void trackSequenceQueryRadNNReporterOneToOne::report(char *fileTable, adb__queryResponse *adbQueryResponse) { +void trackSequenceQueryRadNNReporterOneToOne::report(char *fileTable, void *adbQueryResponse) { if(adbQueryResponse==0) { std::vector< NNresult >::iterator vit; NNresult rk; @@ -572,11 +671,16 @@ rk = *vit; std::cout << rk.dist << " " << rk.qpos << " " - << rk.spos << " " - << fileTable + rk.trackID*O2_FILETABLE_ENTRY_SIZE - << std::endl; + << rk.spos << " "; + if(fileTable) + std::cout << fileTable + rk.trackID*O2_FILETABLE_ENTRY_SIZE << " "; + else + std::cout << rk.trackID << " "; + std::cout << std::endl; } } else { // FIXME } } + +#endif