audiodb: index.cpp comparison

comparison index.cpp @ 456:0ef029232213 api-inversion

Baby steps with index.cpp audioDB::index_make_shingle uses almost no shared state. Make it use none at all, and then remove it from the audioDB class.

author	mas01cr
date	Wed, 24 Dec 2008 10:57:27 +0000
parents	93ce12fe2f76
children	913a95f06998

comparison

equal deleted inserted replaced

-:93ce12fe2f76
+:0ef029232213
 // Move the feature vector read pointer to start of fetures in database
 lseek(dbfid, dbH->dataOffset, SEEK_SET);
 }
-/************************ LSH indexing ***********************************/
-void audioDB::index_index_db(const char* dbName){
-char* newIndexName;
-double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
-Uns32T dbVectors = 0;
-printf("INDEX: initializing header\n");
-// Check if audioDB exists, initialize header and open database for read
-forWrite = false;
-initDBHeader(dbName);
-if(dbH->flags & O2_FLAG_POWER)
-usingPower = true;
-if(dbH->flags & O2_FLAG_TIMES)
-usingTimes = true;
-newIndexName = index_get_name(dbName, radius, sequenceLength);
-// Set unit norming flag override
-audioDB::normalizedDistance = !audioDB::no_unit_norming;
-VERB_LOG(1, "INDEX: dim %d\n", (int)dbH->dim);
-VERB_LOG(1, "INDEX: R %f\n", radius);
-VERB_LOG(1, "INDEX: seqlen %d\n", sequenceLength);
-VERB_LOG(1, "INDEX: lsh_w %f\n", lsh_param_w);
-VERB_LOG(1, "INDEX: lsh_k %d\n", lsh_param_k);
-VERB_LOG(1, "INDEX: lsh_m %d\n", lsh_param_m);
-VERB_LOG(1, "INDEX: lsh_N %d\n", lsh_param_N);
-VERB_LOG(1, "INDEX: lsh_C %d\n", lsh_param_ncols);
-VERB_LOG(1, "INDEX: lsh_b %d\n", lsh_param_b);
-VERB_LOG(1, "INDEX: normalized? %s\n", normalizedDistance?"true":"false");
-if((lshfid = open(newIndexName,O_RDONLY))<0){
-printf("INDEX: constructing new LSH index\n");
-printf("INDEX: making index file %s\n", newIndexName);
-fflush(stdout);
-// Construct new LSH index
-lsh = new LSH((float)lsh_param_w, lsh_param_k,
-		  lsh_param_m,
-		  (Uns32T)(sequenceLength*dbH->dim),
-		  lsh_param_N,
-		  lsh_param_ncols,
-		  (float)radius);
-assert(lsh);
-Uns32T endTrack = lsh_param_b;
-if( endTrack > dbH->numFiles)
-endTrack = dbH->numFiles;
-// Insert up to lsh_param_b tracks
-if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
-index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
-}
-index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
-lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
-// Clean up
-delete lsh;
-lsh = 0;
-close(lshfid);
-}
-// Attempt to open LSH file
-if((lshfid = open(newIndexName,O_RDONLY))>0){
-printf("INDEX: merging with existing LSH index\n");
-fflush(stdout);
-char* mergeIndexName = newIndexName;
-// Get the lsh header info and find how many tracks are inserted already
-lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here
-assert(lsh);
-Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1;
-delete lsh;
-lsh = 0;
-// Insert up to lsh_param_b tracks
-if(  !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){
-index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
-}
-// This allows for updating index after more tracks are inserted into audioDB
-for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
-Uns32T endTrack = startTrack + lsh_param_b;
-if( endTrack > dbH->numFiles)
-	endTrack = dbH->numFiles;
-printf("Indexing track range: %d - %d\n", startTrack, endTrack);
-fflush(stdout);
-lsh = new LSH(mergeIndexName, false); // Initialize empty LSH tables
-assert(lsh);
-// Insert up to lsh_param_b database tracks
-index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
-// Serialize to file (merging is performed here)
-lsh->serialize(mergeIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
-delete lsh;
-lsh = 0;
-}
-close(lshfid);
-printf("INDEX: done constructing LSH index.\n");
-fflush(stdout);
-}
-else{
-error("Something's wrong with LSH index file");
-exit(1);
-}
-delete[] newIndexName;
-delete[] sNorm;
-delete[] sPower;
-}
-void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
-if(usingPower){
-int one;
-unsigned int count;
-count = read(powerfd, &one, sizeof(unsigned int));
-if (count != sizeof(unsigned int)) {
-error("powerfd read failed", "int", "read");
-}
-if (one != 1) {
-error("dimensionality of power file not 1", powerFileName);
-}
-// FIXME: should check that the powerfile is the right size for
-// this.  -- CSR, 2007-10-30
-count = read(powerfd, powerdata, numVectors * sizeof(double));
-if (count != numVectors * sizeof(double)) {
-error("powerfd read failed", "double", "read");
-}
-}
-}
-// initialize auxillary track data from filesystem
-// pre-conditions:
-// dbH->flags & O2_FLAG_LARGE_ADB
-// feature data allocated and copied (fvp)
-//
-// post-conditions:
-// allocated power data
-// allocated l2norm data
-//
-void audioDB::init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp){
-if( !(dbH->flags & O2_FLAG_LARGE_ADB) )
-error("error: init_track_large_adb required O2_FLAG_LARGE_ADB");
-// Allocate and read the power sequence
-if(trackTable[trackID]>=sequenceLength){
-char* prefixedString = new char[O2_MAXFILESTR];
-char* tmpStr = prefixedString;
-// Open and check dimensions of power file
-strncpy(prefixedString, powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
-prefix_name((char ** const)&prefixedString, adb_feature_root);
-if(prefixedString!=tmpStr)
-delete[] tmpStr;
-powerfd = open(prefixedString, O_RDONLY);
-if (powerfd < 0) {
-error("failed to open power file", prefixedString);
-}
-if (fstat(powerfd, &statbuf) < 0) {
-error("fstat error finding size of power file", prefixedString, "fstat");
-}
-if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] )
-error("Dimension mismatch: numPowers != numVectors", prefixedString);
-*sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values
-assert(*sPowerp);
-*spPtrp = *sPowerp;
-insertPowerData(trackTable[trackID], powerfd, *sPowerp);
-if (0 < powerfd) {
-close(powerfd);
-}
-audiodb_sequence_sum(*sPowerp, trackTable[trackID], sequenceLength);
-audiodb_sequence_average(*sPowerp, trackTable[trackID], sequenceLength);
-powerTable = 0;
-// Allocate and calculate the l2norm sequence
-*sNormpp = new double[trackTable[trackID]];
-assert(*sNormpp);
-*snPtrp = *sNormpp;
-audiodb_l2norm_buffer(fvp, dbH->dim, trackTable[trackID], *sNormpp);
-audiodb_sequence_sum(*sNormpp, trackTable[trackID], sequenceLength);
-audiodb_sequence_sqrt(*sNormpp, trackTable[trackID], sequenceLength);
-}
-}
-void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track,
-				  double** fvpp, double** sNormpp,double** snPtrp,
-				  double** sPowerp, double** spPtrp){
-size_t nfv = 0;
-double* fvp = 0; // Keep pointer for memory allocation and free() for track data
-Uns32T trackID = 0;
-VERB_LOG(1, "indexing tracks...");
-int trackfd = dbfid;
-for(trackID = start_track ; trackID < end_track ; trackID++ ){
-if( dbH->flags & O2_FLAG_LARGE_ADB ){
-char* prefixedString = new char[O2_MAXFILESTR];
-char* tmpStr = prefixedString;
-// Open and check dimensions of feature file
-strncpy(prefixedString, featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
-prefix_name((char ** const) &prefixedString, adb_feature_root);
-if(prefixedString!=tmpStr)
-	delete[] tmpStr;
-initInputFile(prefixedString);
-trackfd = infid;
-}
-if(audiodb_read_data(adb, trackfd, trackID, &fvp, &nfv))
-error("failed to read data");
-*fvpp = fvp; // Protect memory allocation and free() for track data
-if( dbH->flags & O2_FLAG_LARGE_ADB )
-// Load power and calculate power and l2norm sequence sums
-init_track_aux_data(trackID, fvp, sNormpp, snPtrp, sPowerp, spPtrp);
-if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
-break;
-if ( dbH->flags & O2_FLAG_LARGE_ADB ){
-close(infid);
-delete[] *sNormpp;
-delete[] *sPowerp;
-*sNormpp = *sPowerp = *snPtrp = *snPtrp = 0;
-}
-} // end for(trackID = start_track ; ... )
-std::cout << "finished inserting." << endl;
-}
-int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){
-// Loop over the current input track's vectors
-Uns32T numVecs = 0;
-if (trackTable[trackID] > O2_MAXTRACKLEN) {
-if (O2_MAXTRACKLEN < sequenceLength - 1) {
-numVecs = 0;
-} else {
-numVecs = O2_MAXTRACKLEN - sequenceLength + 1;
-}
-} else {
-if (trackTable[trackID] < sequenceLength - 1) {
-numVecs = 0;
-} else {
-numVecs = trackTable[trackID] - sequenceLength + 1;
-}
-}
-Uns32T numVecsAboveThreshold = 0, collisionCount = 0;
-if(numVecs){
-vv = index_initialize_shingles(numVecs);
-for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
-index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
-numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
-collisionCount = index_insert_shingles(vv, trackID, *sppp);
-}
-float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
-/* index_norm_shingles() only goes as far as the end of the
-sequence, which is right, but the space allocated is for the
-whole track.  */
-/* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
-* So let's be certain the pointers are in the correct place
-*/
-if( !(dbH->flags & O2_FLAG_LARGE_ADB) ){
-*snpp += trackTable[trackID];
-*sppp += trackTable[trackID];
-*fvpp += trackTable[trackID] * dbH->dim;
-}
-std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl;
-std::cout.flush();
-return true;
-}
-Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
-Uns32T collisionCount = 0;
-cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
-for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){
-if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold)))
-collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID, lsh_n_point_bits));
-spp+=sequenceHop;
-}
-return collisionCount;
-}
 /********************* LSH shingle construction ***************************/
 // Construct shingles out of a feature matrix
 // inputs:
 // idx is vector index in feature matrix
 // idx < numVectors - sequenceLength + 1
 //
 // post-conditions:
 // (*vv)[idx] contains a shingle with dbH->dim*sequenceLength float values
-void audioDB::index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
+static void audiodb_index_make_shingle(vector<vector<float> >* vv, Uns32T idx, double* fvp, Uns32T dim, Uns32T seqLen){
 assert(idx<(*vv).size());
 vector<float>::iterator ve = (*vv)[idx].end();
-vi=(*vv)[idx].begin();        // shingle iterator
+vector<float>::iterator vi = (*vv)[idx].begin();
 // First feature vector in shingle
-if(idx==0){
+if(idx == 0) {
-while(vi!=ve)
+while(vi!=ve) {
 *vi++ = (float)(*fvp++);
 }
-// Not first feature vector in shingle
+} else {
-else{
+// Not first feature vector in shingle
-vector<float>::iterator ui=(*vv)[idx-1].begin() + dim; // previous shingle iterator
+vector<float>::iterator ui=(*vv)[idx-1].begin() + dim;
 // Previous seqLen-1 dim-vectors
-while(vi!=ve-dim)
+while(vi!=ve-dim) {
-*vi++=*ui++;
+*vi++ = *ui++;
+}
 // Move data pointer to next feature vector
 fvp += ( seqLen + idx - 1 ) * dim ;
 // New d-vector
-while(vi!=ve)
+while(vi!=ve) {
 *vi++ = (float)(*fvp++);
+}
 }
 }
 // norm shingles
 // in-place norming, no deletions
 }
 return z;
 }
+/************************ LSH indexing ***********************************/
+void audioDB::index_index_db(const char* dbName){
+char* newIndexName;
+double *fvp = 0, *sNorm = 0, *snPtr = 0, *sPower = 0, *spPtr = 0;
+Uns32T dbVectors = 0;
+printf("INDEX: initializing header\n");
+// Check if audioDB exists, initialize header and open database for read
+forWrite = false;
+initDBHeader(dbName);
+if(dbH->flags & O2_FLAG_POWER)
+usingPower = true;
+if(dbH->flags & O2_FLAG_TIMES)
+usingTimes = true;
+newIndexName = index_get_name(dbName, radius, sequenceLength);
+// Set unit norming flag override
+audioDB::normalizedDistance = !audioDB::no_unit_norming;
+VERB_LOG(1, "INDEX: dim %d\n", (int)dbH->dim);
+VERB_LOG(1, "INDEX: R %f\n", radius);
+VERB_LOG(1, "INDEX: seqlen %d\n", sequenceLength);
+VERB_LOG(1, "INDEX: lsh_w %f\n", lsh_param_w);
+VERB_LOG(1, "INDEX: lsh_k %d\n", lsh_param_k);
+VERB_LOG(1, "INDEX: lsh_m %d\n", lsh_param_m);
+VERB_LOG(1, "INDEX: lsh_N %d\n", lsh_param_N);
+VERB_LOG(1, "INDEX: lsh_C %d\n", lsh_param_ncols);
+VERB_LOG(1, "INDEX: lsh_b %d\n", lsh_param_b);
+VERB_LOG(1, "INDEX: normalized? %s\n", normalizedDistance?"true":"false");
+if((lshfid = open(newIndexName,O_RDONLY))<0){
+printf("INDEX: constructing new LSH index\n");
+printf("INDEX: making index file %s\n", newIndexName);
+fflush(stdout);
+// Construct new LSH index
+lsh = new LSH((float)lsh_param_w, lsh_param_k,
+		  lsh_param_m,
+		  (Uns32T)(sequenceLength*dbH->dim),
+		  lsh_param_N,
+		  lsh_param_ncols,
+		  (float)radius);
+assert(lsh);
+Uns32T endTrack = lsh_param_b;
+if( endTrack > dbH->numFiles)
+endTrack = dbH->numFiles;
+// Insert up to lsh_param_b tracks
+if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
+index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
+}
+index_insert_tracks(0, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
+lsh->serialize(newIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1);
+// Clean up
+delete lsh;
+lsh = 0;
+close(lshfid);
+}
+// Attempt to open LSH file
+if((lshfid = open(newIndexName,O_RDONLY))>0){
+printf("INDEX: merging with existing LSH index\n");
+fflush(stdout);
+char* mergeIndexName = newIndexName;
+// Get the lsh header info and find how many tracks are inserted already
+lsh = new LSH(mergeIndexName, false); // lshInCore=false to avoid loading hashTables here
+assert(lsh);
+Uns32T maxs = index_to_trackID(lsh->get_maxp(), lsh_n_point_bits)+1;
+delete lsh;
+lsh = 0;
+// Insert up to lsh_param_b tracks
+if(  !sNorm && !(dbH->flags & O2_FLAG_LARGE_ADB) ){
+index_initialize(&sNorm, &snPtr, &sPower, &spPtr, &dbVectors);
+}
+// This allows for updating index after more tracks are inserted into audioDB
+for(Uns32T startTrack = maxs; startTrack < dbH->numFiles; startTrack+=lsh_param_b){
+Uns32T endTrack = startTrack + lsh_param_b;
+if( endTrack > dbH->numFiles)
+	endTrack = dbH->numFiles;
+printf("Indexing track range: %d - %d\n", startTrack, endTrack);
+fflush(stdout);
+lsh = new LSH(mergeIndexName, false); // Initialize empty LSH tables
+assert(lsh);
+// Insert up to lsh_param_b database tracks
+index_insert_tracks(startTrack, endTrack, &fvp, &sNorm, &snPtr, &sPower, &spPtr);
+// Serialize to file (merging is performed here)
+lsh->serialize(mergeIndexName, lsh_in_core?O2_SERIAL_FILEFORMAT2:O2_SERIAL_FILEFORMAT1); // Serialize core LSH heap to disk
+delete lsh;
+lsh = 0;
+}
+close(lshfid);
+printf("INDEX: done constructing LSH index.\n");
+fflush(stdout);
+}
+else{
+error("Something's wrong with LSH index file");
+exit(1);
+}
+delete[] newIndexName;
+delete[] sNorm;
+delete[] sPower;
+}
+void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
+if(usingPower){
+int one;
+unsigned int count;
+count = read(powerfd, &one, sizeof(unsigned int));
+if (count != sizeof(unsigned int)) {
+error("powerfd read failed", "int", "read");
+}
+if (one != 1) {
+error("dimensionality of power file not 1", powerFileName);
+}
+// FIXME: should check that the powerfile is the right size for
+// this.  -- CSR, 2007-10-30
+count = read(powerfd, powerdata, numVectors * sizeof(double));
+if (count != numVectors * sizeof(double)) {
+error("powerfd read failed", "double", "read");
+}
+}
+}
+// initialize auxillary track data from filesystem
+// pre-conditions:
+// dbH->flags & O2_FLAG_LARGE_ADB
+// feature data allocated and copied (fvp)
+//
+// post-conditions:
+// allocated power data
+// allocated l2norm data
+//
+void audioDB::init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp){
+if( !(dbH->flags & O2_FLAG_LARGE_ADB) )
+error("error: init_track_large_adb required O2_FLAG_LARGE_ADB");
+// Allocate and read the power sequence
+if(trackTable[trackID]>=sequenceLength){
+char* prefixedString = new char[O2_MAXFILESTR];
+char* tmpStr = prefixedString;
+// Open and check dimensions of power file
+strncpy(prefixedString, powerFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+prefix_name((char ** const)&prefixedString, adb_feature_root);
+if(prefixedString!=tmpStr)
+delete[] tmpStr;
+powerfd = open(prefixedString, O_RDONLY);
+if (powerfd < 0) {
+error("failed to open power file", prefixedString);
+}
+if (fstat(powerfd, &statbuf) < 0) {
+error("fstat error finding size of power file", prefixedString, "fstat");
+}
+if( (statbuf.st_size - sizeof(int)) / (sizeof(double)) != trackTable[trackID] )
+error("Dimension mismatch: numPowers != numVectors", prefixedString);
+*sPowerp = new double[trackTable[trackID]]; // Allocate memory for power values
+assert(*sPowerp);
+*spPtrp = *sPowerp;
+insertPowerData(trackTable[trackID], powerfd, *sPowerp);
+if (0 < powerfd) {
+close(powerfd);
+}
+audiodb_sequence_sum(*sPowerp, trackTable[trackID], sequenceLength);
+audiodb_sequence_average(*sPowerp, trackTable[trackID], sequenceLength);
+powerTable = 0;
+// Allocate and calculate the l2norm sequence
+*sNormpp = new double[trackTable[trackID]];
+assert(*sNormpp);
+*snPtrp = *sNormpp;
+audiodb_l2norm_buffer(fvp, dbH->dim, trackTable[trackID], *sNormpp);
+audiodb_sequence_sum(*sNormpp, trackTable[trackID], sequenceLength);
+audiodb_sequence_sqrt(*sNormpp, trackTable[trackID], sequenceLength);
+}
+}
+void audioDB::index_insert_tracks(Uns32T start_track, Uns32T end_track,
+				  double** fvpp, double** sNormpp,double** snPtrp,
+				  double** sPowerp, double** spPtrp){
+size_t nfv = 0;
+double* fvp = 0; // Keep pointer for memory allocation and free() for track data
+Uns32T trackID = 0;
+VERB_LOG(1, "indexing tracks...");
+int trackfd = dbfid;
+for(trackID = start_track ; trackID < end_track ; trackID++ ){
+if( dbH->flags & O2_FLAG_LARGE_ADB ){
+char* prefixedString = new char[O2_MAXFILESTR];
+char* tmpStr = prefixedString;
+// Open and check dimensions of feature file
+strncpy(prefixedString, featureFileNameTable+trackID*O2_FILETABLE_ENTRY_SIZE, O2_MAXFILESTR);
+prefix_name((char ** const) &prefixedString, adb_feature_root);
+if(prefixedString!=tmpStr)
+	delete[] tmpStr;
+initInputFile(prefixedString);
+trackfd = infid;
+}
+if(audiodb_read_data(adb, trackfd, trackID, &fvp, &nfv))
+error("failed to read data");
+*fvpp = fvp; // Protect memory allocation and free() for track data
+if( dbH->flags & O2_FLAG_LARGE_ADB )
+// Load power and calculate power and l2norm sequence sums
+init_track_aux_data(trackID, fvp, sNormpp, snPtrp, sPowerp, spPtrp);
+if(!index_insert_track(trackID, fvpp, snPtrp, spPtrp))
+break;
+if ( dbH->flags & O2_FLAG_LARGE_ADB ){
+close(infid);
+delete[] *sNormpp;
+delete[] *sPowerp;
+*sNormpp = *sPowerp = *snPtrp = *snPtrp = 0;
+}
+} // end for(trackID = start_track ; ... )
+std::cout << "finished inserting." << endl;
+}
+int audioDB::index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp){
+// Loop over the current input track's vectors
+Uns32T numVecs = 0;
+if (trackTable[trackID] > O2_MAXTRACKLEN) {
+if (O2_MAXTRACKLEN < sequenceLength - 1) {
+numVecs = 0;
+} else {
+numVecs = O2_MAXTRACKLEN - sequenceLength + 1;
+}
+} else {
+if (trackTable[trackID] < sequenceLength - 1) {
+numVecs = 0;
+} else {
+numVecs = trackTable[trackID] - sequenceLength + 1;
+}
+}
+Uns32T numVecsAboveThreshold = 0, collisionCount = 0;
+if(numVecs){
+vv = index_initialize_shingles(numVecs);
+for( Uns32T pointID = 0 ; pointID < numVecs; pointID++ )
+audiodb_index_make_shingle(vv, pointID, *fvpp, dbH->dim, sequenceLength);
+numVecsAboveThreshold = index_norm_shingles(vv, *snpp, *sppp);
+collisionCount = index_insert_shingles(vv, trackID, *sppp);
+}
+float meanCollisionCount = numVecsAboveThreshold?(float)collisionCount/numVecsAboveThreshold:0;
+/* index_norm_shingles() only goes as far as the end of the
+sequence, which is right, but the space allocated is for the
+whole track.  */
+/* But numVecs will be <trackTable[track] if trackTable[track]>O2_MAXTRACKLEN
+* So let's be certain the pointers are in the correct place
+*/
+if( !(dbH->flags & O2_FLAG_LARGE_ADB) ){
+*snpp += trackTable[trackID];
+*sppp += trackTable[trackID];
+*fvpp += trackTable[trackID] * dbH->dim;
+}
+std::cout << " n=" << trackTable[trackID] << " n'=" << numVecsAboveThreshold << " E[#c]=" << lsh->get_mean_collision_rate() << " E[#p]=" << meanCollisionCount << endl;
+std::cout.flush();
+return true;
+}
+Uns32T audioDB::index_insert_shingles(vector<vector<float> >* vv, Uns32T trackID, double* spp){
+Uns32T collisionCount = 0;
+cout << "[" << trackID << "]" << fileTable+trackID*O2_FILETABLE_ENTRY_SIZE;
+for( Uns32T pointID=0 ; pointID < (*vv).size(); pointID+=sequenceHop){
+if(!use_absolute_threshold || (use_absolute_threshold && (*spp >= absolute_threshold)))
+collisionCount += lsh->insert_point((*vv)[pointID], index_from_trackInfo(trackID, pointID, lsh_n_point_bits));
+spp+=sequenceHop;
+}
+return collisionCount;
+}
 /*********************** LSH retrieval ****************************/
 // return true if indexed query performed else return false
 int audioDB::index_init_query(const char* dbName){
 Uns32T Nq = (qpointers.nvectors>O2_MAXTRACKLEN?O2_MAXTRACKLEN:qpointers.nvectors) - sequenceLength + 1;
 vv = index_initialize_shingles(Nq); // allocate memory to copy query vectors to shingles
 // Construct shingles from query features
 for( Uns32T pointID = 0 ; pointID < Nq ; pointID++ )
-index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
+audiodb_index_make_shingle(vv, pointID, query, dbH->dim, sequenceLength);
 // Normalize query vectors
 Uns32T numVecsAboveThreshold = index_norm_shingles( vv, qpointers.l2norm, qpointers.power );
 // Nq contains number of inspected points in query file,

Mercurial > hg > audiodb

comparison index.cpp @ 456:0ef029232213 api-inversion