Mercurial > hg > audiodb
changeset 116:531ce5162861 endian-neutral
Towards endian-neutrality, step 1.
dbH treatment is now endian-neutral: all on-disk and in-memory uint32_t
fields of dbH are in network byte order, and all reads and writes in
audioDB host code go through ntohl() and htonl() as appropriate.
author | mas01cr |
---|---|
date | Fri, 12 Oct 2007 11:20:35 +0000 |
parents | 97f4ff699d7c |
children | e800eac265c3 |
files | audioDB.cpp |
diffstat | 1 files changed, 186 insertions(+), 183 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.cpp Fri Oct 12 11:13:53 2007 +0000 +++ b/audioDB.cpp Fri Oct 12 11:20:35 2007 +0000 @@ -420,17 +420,17 @@ assert(dbH); // Initialize header - dbH->magic = O2_MAGIC; - dbH->version = O2_FORMAT_VERSION; + dbH->magic = htonl(O2_MAGIC); + dbH->version = htonl(O2_FORMAT_VERSION); dbH->numFiles = 0; dbH->dim = 0; dbH->flags = 0; dbH->length = 0; - dbH->fileTableOffset = ALIGN_UP(O2_HEADERSIZE, 8); - dbH->trackTableOffset = ALIGN_UP(dbH->fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES, 8); - dbH->dataOffset = ALIGN_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES, 8); - dbH->l2normTableOffset = ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); - dbH->timesTableOffset = ALIGN_DOWN(dbH->l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8); + dbH->fileTableOffset = htonl(ALIGN_UP(O2_HEADERSIZE, 8)); + dbH->trackTableOffset = htonl(ALIGN_UP(ntohl(dbH->fileTableOffset) + O2_FILETABLESIZE*O2_MAXFILES, 8)); + dbH->dataOffset = htonl(ALIGN_UP(ntohl(dbH->trackTableOffset) + O2_TRACKTABLESIZE*O2_MAXFILES, 8)); + dbH->l2normTableOffset = htonl(ALIGN_DOWN(O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8)); + dbH->timesTableOffset = htonl(ALIGN_DOWN(ntohl(dbH->l2normTableOffset) - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double), 8)); memcpy (db, dbH, O2_HEADERSIZE); if(verbosity) { @@ -438,7 +438,6 @@ } } - void audioDB::drop(){ // FIXME: drop something? Should we even allow this? } @@ -467,33 +466,34 @@ error("error reading db header", dbName, "read"); } - if(dbH->magic == O2_OLD_MAGIC) { + if(ntohl(dbH->magic) == O2_OLD_MAGIC) { // FIXME: if anyone ever complains, write the program to convert // from the old audioDB format to the new... error("database file has old O2 header", dbName); } - if(dbH->magic != O2_MAGIC) { - cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; + if(ntohl(dbH->magic) != O2_MAGIC) { + cerr << "expected: " << O2_MAGIC << ", got: " << ntohl(dbH->magic) << endl; error("database file has incorrect header", dbName); } - if(dbH->version != O2_FORMAT_VERSION) { + if(ntohl(dbH->version) != O2_FORMAT_VERSION) { error("database file has incorect version", dbName); } - if(inFile) - if(dbH->dim == 0 && dbH->length == 0) // empty database - // initialize with input dimensionality - read(infid, &dbH->dim, sizeof(unsigned)); - else { - unsigned test; - read(infid, &test, sizeof(unsigned)); - if(dbH->dim != test) { - cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <<endl; + if(inFile) { + uint32_t inDim; + read(infid, &inDim, sizeof(uint32_t)); + if(ntohl(dbH->dim) == 0 && ntohl(dbH->length) == 0) { + // empty database: initialize with input dimensionality + dbH->dim = htonl(inDim); + } else { + if(dbH->dim != htonl(inDim)) { + cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got : " << inDim << endl; error("feature dimensions do not match database table dimensions"); } } + } // mmap the input file if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) @@ -506,29 +506,29 @@ error("mmap error for initting tables of database", "", "mmap"); // Make some handy tables with correct types - fileTable= (char*)(db+dbH->fileTableOffset); - trackTable = (unsigned*)(db+dbH->trackTableOffset); - dataBuf = (double*)(db+dbH->dataOffset); - l2normTable = (double*)(db+dbH->l2normTableOffset); - timesTable = (double*)(db+dbH->timesTableOffset); + fileTable = (char *)(db + ntohl(dbH->fileTableOffset)); + trackTable = (unsigned *)(db + ntohl(dbH->trackTableOffset)); + dataBuf = (double *)(db + ntohl(dbH->dataOffset)); + l2normTable = (double *)(db + ntohl(dbH->l2normTableOffset)); + timesTable = (double *)(db + ntohl(dbH->timesTableOffset)); } void audioDB::insert(const char* dbName, const char* inFile){ initTables(dbName, 1, inFile); - if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) error("Must use timestamps with timestamped database","use --times"); // Check that there is room for at least 1 more file - if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) + if((char*)timesTable<((char*)dataBuf+ntohl(dbH->length)+statbuf.st_size-sizeof(int))) error("No more room in database","insert failed: reason database is full."); if(!key) key=inFile; // Linear scan of filenames check for pre-existing feature unsigned alreadyInserted=0; - for(unsigned k=0; k<dbH->numFiles; k++) + for(unsigned k=0; k<ntohl(dbH->numFiles); k++) if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ alreadyInserted=1; break; @@ -542,7 +542,7 @@ } // Make a track index table of features to file indexes - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); if(!numVectors){ if(verbosity) { cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; @@ -554,35 +554,36 @@ return; } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); + strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, key, strlen(key)); - unsigned insertoffset = dbH->length;// Store current state + unsigned insertoffset = ntohl(dbH->length);// Store current state // Check times status and insert times from file - unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); + unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double)); double* timesdata=timesTable+timesoffset; assert(timesdata+numVectors<l2normTable); insertTimeStamps(numVectors, timesFile, timesdata); // Increment file count - dbH->numFiles++; + dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1); // Update Header information - dbH->length+=(statbuf.st_size-sizeof(int)); + dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); // Copy the header back to the database - memcpy (db, dbH, sizeof(dbTableHeaderT)); + memcpy (db, dbH, sizeof(dbTableHeaderT)); // Update track to file index map //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); - memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; // Update the feature database - memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); + memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); // Norm the vectors on input if the database is already L2 normed - if(dbH->flags & O2_FLAG_L2NORM) - unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append + if(ntohl(dbH->flags) & O2_FLAG_L2NORM) + unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append // Report status status(dbName); @@ -599,15 +600,15 @@ void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ unsigned numtimes=0; if(usingTimes){ - if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) - dbH->flags=dbH->flags|O2_FLAG_TIMES; - else if(!(dbH->flags&O2_FLAG_TIMES)){ + if(!(ntohl(dbH->flags) & O2_FLAG_TIMES) && !(ntohl(dbH->numFiles))) + dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_TIMES); + else if(!(ntohl(dbH->flags) & O2_FLAG_TIMES)) { cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; usingTimes=0; } if(!timesFile->is_open()){ - if(dbH->flags & O2_FLAG_TIMES){ + if(ntohl(dbH->flags) & O2_FLAG_TIMES){ munmap(indata,statbuf.st_size); munmap(db,O2_DEFAULTDBSIZE); error("problem opening times file on timestamped database",timesFileName); @@ -674,11 +675,11 @@ if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) error("error reading db header"); - if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) error("Must use timestamps with timestamped database","use --times"); - if(dbH->magic!=O2_MAGIC){ - cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; + if(ntohl(dbH->magic)!=O2_MAGIC){ + cerr << "expected:" << O2_MAGIC << ", got:" << ntohl(dbH->magic) << endl; error("database file has incorrect header",dbName); } @@ -713,28 +714,29 @@ error("mmap error for batchinsert into database", "", "mmap"); // Make some handy tables with correct types - fileTable= (char*)(db+dbH->fileTableOffset); - trackTable = (unsigned*)(db+dbH->trackTableOffset); - dataBuf = (double*)(db+dbH->dataOffset); - l2normTable = (double*)(db+dbH->l2normTableOffset); - timesTable = (double*)(db+dbH->timesTableOffset); + fileTable= (char*)(db + ntohl(dbH->fileTableOffset)); + trackTable = (unsigned*)(db + ntohl(dbH->trackTableOffset)); + dataBuf = (double*)(db + ntohl(dbH->dataOffset)); + l2normTable = (double*)(db+ntohl(dbH->l2normTableOffset)); + timesTable = (double*)(db+ntohl(dbH->timesTableOffset)); // Check that there is room for at least 1 more file - if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) + if((char*)timesTable<((char*)dataBuf+(ntohl(dbH->length)+statbuf.st_size-sizeof(int)))) error("No more room in database","insert failed: reason database is full."); - if(thisFile) - if(dbH->dim==0 && dbH->length==0) // empty database - read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality - else { - unsigned test; - read(infid,&test,sizeof(unsigned)); - if(dbH->dim!=test){ - cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <<endl; + if(thisFile) { + uint32_t thisDim; + read(infid,&thisDim,sizeof(uint32_t)); + if(ntohl(dbH->dim) == 0 && ntohl(dbH->length)==0) { + // empty database: initialize with input dimensionality + dbH->dim = htonl(thisDim); + } else { + if(ntohl(dbH->dim) != thisDim) { + cerr << "error: expected dimension: " << ntohl(dbH->dim) << ", got :" << thisDim <<endl; error("feature dimensions do not match database table dimensions"); } } - + } // mmap the input file if (thisFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) == (caddr_t) -1) @@ -744,7 +746,7 @@ // Linear scan of filenames check for pre-existing feature unsigned alreadyInserted=0; - for(unsigned k=0; k<dbH->numFiles; k++) + for(unsigned k=0; k < ntohl(dbH->numFiles); k++) if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ alreadyInserted=1; break; @@ -758,7 +760,7 @@ else{ // Make a track index table of features to file indexes - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); if(!numVectors){ if(verbosity) { cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; @@ -771,8 +773,8 @@ thisTimesFile=new ifstream(thisTimesFileName,ios::in); if(!thisTimesFile->is_open()) error("Cannot open timestamp file",thisTimesFileName); - unsigned insertoffset=dbH->length; - unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); + unsigned insertoffset= ntohl(dbH->length); + unsigned timesoffset=insertoffset/(ntohl(dbH->dim)*sizeof(double)); double* timesdata=timesTable+timesoffset; assert(timesdata+numVectors<l2normTable); insertTimeStamps(numVectors,thisTimesFile,timesdata); @@ -780,28 +782,29 @@ delete thisTimesFile; } - strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); + strncpy(fileTable + ntohl(dbH->numFiles)*O2_FILETABLESIZE, thisKey, strlen(thisKey)); - unsigned insertoffset = dbH->length;// Store current state + unsigned insertoffset = ntohl(dbH->length);// Store current state // Increment file count - dbH->numFiles++; + dbH->numFiles = htonl(ntohl(dbH->numFiles) + 1); // Update Header information - dbH->length+=(statbuf.st_size-sizeof(int)); + dbH->length = htonl(ntohl(dbH->length) + (statbuf.st_size-sizeof(int))); // Copy the header back to the database - memcpy (db, dbH, sizeof(dbTableHeaderT)); + memcpy (db, dbH, sizeof(dbTableHeaderT)); // Update track to file index map //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); - memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + //memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); + *(trackTable + ntohl(dbH->numFiles) - 1) = numVectors; // Update the feature database - memcpy (db+dbH->dataOffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); + memcpy (db+ntohl(dbH->dataOffset)+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); // Norm the vectors on input if the database is already L2 normed - if(dbH->flags & O2_FLAG_L2NORM) - unitNormAndInsertL2((double*)(db+dbH->dataOffset+insertoffset), dbH->dim, numVectors, 1); // append + if(ntohl(dbH->flags) & O2_FLAG_L2NORM) + unitNormAndInsertL2((double*)(db+ntohl(dbH->dataOffset)+insertoffset), ntohl(dbH->dim), numVectors, 1); // append totalVectors+=numVectors; } @@ -819,7 +822,7 @@ if(verbosity) { cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " - << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; + << totalVectors*ntohl(dbH->dim)*sizeof(double) << " bytes." << endl; } // Report status @@ -882,7 +885,7 @@ unsigned dudCount=0; unsigned nullCount=0; - for(unsigned k=0; k<dbH->numFiles; k++){ + for(unsigned k=0; k<ntohl(dbH->numFiles); k++){ if(trackTable[k]<sequenceLength){ dudCount++; if(!trackTable[k]) @@ -893,25 +896,25 @@ if(adbStatusResult == 0) { // Update Header information - cout << "num files:" << dbH->numFiles << endl; - cout << "data dim:" << dbH->dim <<endl; - if(dbH->dim>0){ - cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<endl; - cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; + cout << "num files:" << ntohl(dbH->numFiles) << endl; + cout << "data dim:" << ntohl(dbH->dim) <<endl; + if(ntohl(dbH->dim)>0){ + cout << "total vectors:" << ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))<<endl; + cout << "vectors available:" << (ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)))/(sizeof(double)*ntohl(dbH->dim)) << endl; } - cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; - cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << - (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; - cout << "flags:" << dbH->flags << endl; + cout << "total bytes:" << ntohl(dbH->length) << " (" << (100.0*ntohl(dbH->length))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl; + cout << "bytes available:" << ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length)) << " (" << + (100.0*(ntohl(dbH->timesTableOffset)-(ntohl(dbH->dataOffset)+ntohl(dbH->length))))/(ntohl(dbH->timesTableOffset)-ntohl(dbH->dataOffset)) << "%)" << endl; + cout << "flags:" << ntohl(dbH->flags) << endl; cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; } else { - adbStatusResult->numFiles = dbH->numFiles; - adbStatusResult->dim = dbH->dim; - adbStatusResult->length = dbH->length; + adbStatusResult->numFiles = ntohl(dbH->numFiles); + adbStatusResult->dim = ntohl(dbH->dim); + adbStatusResult->length = ntohl(dbH->length); adbStatusResult->dudCount = dudCount; adbStatusResult->nullCount = nullCount; - adbStatusResult->flags = dbH->flags; + adbStatusResult->flags = ntohl(dbH->flags); } } @@ -919,7 +922,7 @@ if(!dbH) initTables(dbName, 0, 0); - for(unsigned k=0, j=0; k<dbH->numFiles; k++){ + for(unsigned k=0, j=0; k<ntohl(dbH->numFiles); k++){ cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; j+=trackTable[k]; } @@ -929,12 +932,12 @@ void audioDB::l2norm(const char* dbName){ initTables(dbName, true, 0); - if(dbH->length>0){ - unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); - unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append + if(ntohl(dbH->length)>0){ + unsigned numVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); + unitNormAndInsertL2(dataBuf, ntohl(dbH->dim), numVectors, 0); // No append } // Update database flags - dbH->flags = dbH->flags|O2_FLAG_L2NORM; + dbH->flags = htonl(ntohl(dbH->flags) | O2_FLAG_L2NORM); memcpy (db, dbH, O2_HEADERSIZE); } @@ -962,7 +965,7 @@ //return ordinal position of key in keyTable unsigned audioDB::getKeyPos(char* key){ - for(unsigned k=0; k<dbH->numFiles; k++) + for(unsigned k=0; k< ntohl(dbH->numFiles); k++) if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) return k; error("Key not found",key); @@ -976,19 +979,19 @@ // For each input vector, find the closest pointNN matching output vectors and report // we use stdout in this stub version - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); double* query = (double*)(indata+sizeof(int)); double* data = dataBuf; double* queryCopy = 0; - if( dbH->flags & O2_FLAG_L2NORM ){ + if(ntohl(dbH->flags) & O2_FLAG_L2NORM ){ // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; + queryCopy = new double[numVectors*ntohl(dbH->dim)]; qNorm = new double[numVectors]; assert(queryCopy&&qNorm); - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); + unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); query = queryCopy; } @@ -1007,20 +1010,20 @@ unsigned k,l,n; double thisDist; - unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double)); + unsigned totalVecs=ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double)); double meanQdur = 0; double* timesdata = 0; double* dbdurs = 0; - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; usingTimes=0; } - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ timesdata = new double[numVectors]; insertTimeStamps(numVectors, timesFile, timesdata); // Calculate durations of points @@ -1043,7 +1046,7 @@ if(verbosity>1) { cerr << "query point: " << queryPoint << endl; cerr.flush(); } - query=query+queryPoint*dbH->dim; + query=query+queryPoint*ntohl(dbH->dim); numVectors=queryPoint+1; j=1; } @@ -1054,7 +1057,7 @@ k=totalVecs; // number of database vectors while(k--){ // database thisDist=0; - l=dbH->dim; + l=ntohl(dbH->dim); double* q=query; while(l--) thisDist+=*q++**data++; @@ -1073,7 +1076,7 @@ } distances[n]=thisDist; qIndexes[n]=numVectors-j-1; - sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; + sIndexes[n]=ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim))-k-1; break; } } @@ -1083,7 +1086,7 @@ } } // Move query pointer to next query point - query+=dbH->dim; + query+=ntohl(dbH->dim); } gettimeofday(&tv2, NULL); @@ -1097,7 +1100,7 @@ for(k=0; k < pointNN; k++){ // Scan for key unsigned cumTrack=0; - for(l=0 ; l<dbH->numFiles; l++){ + for(l=0 ; l<ntohl(dbH->numFiles); l++){ cumTrack+=trackTable[l]; if(sIndexes[k]<cumTrack){ cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " " @@ -1128,7 +1131,7 @@ adbQueryResult->Dist[k]=distances[k]; adbQueryResult->Qpos[k]=qIndexes[k]; unsigned cumTrack=0; - for(l=0 ; l<dbH->numFiles; l++){ + for(l=0 ; l<ntohl(dbH->numFiles); l++){ cumTrack+=trackTable[l]; if(sIndexes[k]<cumTrack){ sprintf(adbQueryResult->Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); @@ -1157,18 +1160,18 @@ initTables(dbName, 0, inFile); // For each input vector, find the closest pointNN matching output vectors and report - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); double* query = (double*)(indata+sizeof(int)); double* data = dataBuf; double* queryCopy = 0; - if( dbH->flags & O2_FLAG_L2NORM ){ + if( ntohl(dbH->flags) & O2_FLAG_L2NORM ){ // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; + queryCopy = new double[numVectors*ntohl(dbH->dim)]; qNorm = new double[numVectors]; assert(queryCopy&&qNorm); - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); + unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); query = queryCopy; } @@ -1206,15 +1209,15 @@ double* timesdata = 0; double* meanDBdur = 0; - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; usingTimes=0; } - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ timesdata = new double[numVectors]; insertTimeStamps(numVectors, timesFile, timesdata); // Calculate durations of points @@ -1223,8 +1226,8 @@ meanQdur+=timesdata[k]; } meanQdur/=k; - meanDBdur = new double[dbH->numFiles]; - for(k=0; k<dbH->numFiles; k++){ + meanDBdur = new double[ntohl(dbH->numFiles)]; + for(k=0; k<ntohl(dbH->numFiles); k++){ meanDBdur[k]=0.0; for(j=0; j<trackTable[k]-1 ; j++) meanDBdur[k]+=timesTable[j+1]-timesTable[j]; @@ -1239,24 +1242,24 @@ if(verbosity>1) { cerr << "query point: " << queryPoint << endl; cerr.flush(); } - query=query+queryPoint*dbH->dim; + query=query+queryPoint*ntohl(dbH->dim); numVectors=queryPoint+1; } // build track offset table - unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; unsigned cumTrack=0; unsigned trackIndexOffset; - for(k=0; k<dbH->numFiles;k++){ + for(k=0; k<ntohl(dbH->numFiles);k++){ trackOffsetTable[k]=cumTrack; - cumTrack+=trackTable[k]*dbH->dim; + cumTrack+=trackTable[k]*ntohl(dbH->dim); } char nextKey[MAXSTR]; gettimeofday(&tv1, NULL); - for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ if(trackFile){ if(!trackFile->eof()){ trackFile->getline(nextKey,MAXSTR); @@ -1266,15 +1269,15 @@ break; } trackOffset=trackOffsetTable[track]; // numDoubles offset - trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset if(verbosity>7) { - cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); + cerr << track << "." << trackOffset/(ntohl(dbH->dim)) << "." << trackTable[track] << " | ";cerr.flush(); } - if(dbH->flags & O2_FLAG_L2NORM) - usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; + if(ntohl(dbH->flags) & O2_FLAG_L2NORM) + usingQueryPoint?query=queryCopy+queryPoint*ntohl(dbH->dim):query=queryCopy; else - usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); + usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*ntohl(dbH->dim):query=(double*)(indata+sizeof(int)); if(usingQueryPoint) j=1; else @@ -1284,7 +1287,7 @@ data=dataBuf+trackOffset; // data for track while(k--){ thisDist=0; - l=dbH->dim; + l=ntohl(dbH->dim); double* q=query; while(l--) thisDist+=*q++**data++; @@ -1313,7 +1316,7 @@ } } // track // Move query pointer to next query point - query+=dbH->dim; + query+=ntohl(dbH->dim); } // query // Take the average of this track's distance // Test the track distances @@ -1413,7 +1416,7 @@ // For each input vector, find the closest pointNN matching output vectors and report // we use stdout in this stub version - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); double* query = (double*)(indata+sizeof(int)); double* queryCopy = 0; @@ -1424,7 +1427,7 @@ double SILENCE_THRESH=0; double DIFF_THRESH=0; - if(!(dbH->flags & O2_FLAG_L2NORM) ) + if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) ) error("Database must be L2 normed for sequence query","use -L2NORM"); if(numVectors<sequenceLength) @@ -1433,16 +1436,16 @@ if(verbosity>1) { cerr << "performing norms ... "; cerr.flush(); } - unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + queryCopy = new double[numVectors*ntohl(dbH->dim)]; + memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); qNorm = new double[numVectors]; sNorm = new double[dbVectors]; - sMeanL2=new double[dbH->numFiles]; + sMeanL2=new double[ntohl(dbH->numFiles)]; assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); query = queryCopy; // Make norm measurements relative to sequenceLength @@ -1454,7 +1457,7 @@ // Copy the L2 norm values to core to avoid disk random access later on memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); double* snPtr = sNorm; - for(i=0; i<dbH->numFiles; i++){ + for(i=0; i<ntohl(dbH->numFiles); i++){ if(trackTable[i]>=sequenceLength){ tmp1=*snPtr; j=1; @@ -1480,12 +1483,12 @@ } double* pn = sMeanL2; - w=dbH->numFiles; + w=ntohl(dbH->numFiles); while(w--) *pn++=0.0; ps=sNorm; unsigned processedTracks=0; - for(i=0; i<dbH->numFiles; i++){ + for(i=0; i<ntohl(dbH->numFiles); i++){ if(trackTable[i]>sequenceLength-1){ w = trackTable[i]-sequenceLength+1; pn = sMeanL2+i; @@ -1575,15 +1578,15 @@ double* timesdata = 0; double* meanDBdur = 0; - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; usingTimes=0; } - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ timesdata = new double[numVectors]; assert(timesdata); insertTimeStamps(numVectors, timesFile, timesdata); @@ -1596,9 +1599,9 @@ if(verbosity>1) { cerr << "mean query file duration: " << meanQdur << endl; } - meanDBdur = new double[dbH->numFiles]; + meanDBdur = new double[ntohl(dbH->numFiles)]; assert(meanDBdur); - for(k=0; k<dbH->numFiles; k++){ + for(k=0; k<ntohl(dbH->numFiles); k++){ meanDBdur[k]=0.0; for(j=0; j<trackTable[k]-1 ; j++) meanDBdur[k]+=timesTable[j+1]-timesTable[j]; @@ -1613,7 +1616,7 @@ if(verbosity>1) { cerr << "query point: " << queryPoint << endl; cerr.flush(); } - query=query+queryPoint*dbH->dim; + query=query+queryPoint*ntohl(dbH->dim); qNorm=qNorm+queryPoint; numVectors=wL; } @@ -1635,12 +1638,12 @@ double* dp; // build track offset table - unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; unsigned cumTrack=0; unsigned trackIndexOffset; - for(k=0; k<dbH->numFiles;k++){ + for(k=0; k<ntohl(dbH->numFiles);k++){ trackOffsetTable[k]=cumTrack; - cumTrack+=trackTable[k]*dbH->dim; + cumTrack+=trackTable[k]*ntohl(dbH->dim); } char nextKey [MAXSTR]; @@ -1653,7 +1656,7 @@ double maxSample = 0; // Track loop - for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ // get trackID from file if using a control file if(trackFile){ @@ -1666,7 +1669,7 @@ } trackOffset=trackOffsetTable[track]; // numDoubles offset - trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset if(sequenceLength<=trackTable[track]){ // test for short sequences @@ -1690,12 +1693,12 @@ // Dot product for(j=0; j<numVectors; j++) for(k=0; k<trackTable[track]; k++){ - qp=query+j*dbH->dim; - sp=dataBuf+trackOffset+k*dbH->dim; + qp=query+j*ntohl(dbH->dim); + sp=dataBuf+trackOffset+k*ntohl(dbH->dim); DD[j][k]=0.0; // Initialize matched filter array dp=&D[j][k]; // point to correlation cell j,k *dp=0.0; // initialize correlation cell - l=dbH->dim; // size of vectors + l=ntohl(dbH->dim); // size of vectors while(l--) *dp+=*qp++**sp++; } @@ -1915,7 +1918,7 @@ // For each input vector, find the closest pointNN matching output vectors and report // we use stdout in this stub version - unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); + unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*ntohl(dbH->dim)); double* query = (double*)(indata+sizeof(int)); double* queryCopy = 0; @@ -1926,22 +1929,22 @@ double SILENCE_THRESH=0; double DIFF_THRESH=0; - if(!(dbH->flags & O2_FLAG_L2NORM) ) + if(!(ntohl(dbH->flags) & O2_FLAG_L2NORM) ) error("Database must be L2 normed for sequence query","use -l2norm"); if(verbosity>1) { cerr << "performing norms ... "; cerr.flush(); } - unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); + unsigned dbVectors = ntohl(dbH->length)/(sizeof(double)*ntohl(dbH->dim)); // Make a copy of the query - queryCopy = new double[numVectors*dbH->dim]; - memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); + queryCopy = new double[numVectors*ntohl(dbH->dim)]; + memcpy(queryCopy, query, numVectors*ntohl(dbH->dim)*sizeof(double)); qNorm = new double[numVectors]; sNorm = new double[dbVectors]; - sMeanL2=new double[dbH->numFiles]; + sMeanL2=new double[ntohl(dbH->numFiles)]; assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); - unitNorm(queryCopy, dbH->dim, numVectors, qNorm); + unitNorm(queryCopy, ntohl(dbH->dim), numVectors, qNorm); query = queryCopy; // Make norm measurements relative to sequenceLength @@ -1953,7 +1956,7 @@ // Copy the L2 norm values to core to avoid disk random access later on memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); double* snPtr = sNorm; - for(i=0; i<dbH->numFiles; i++){ + for(i=0; i<ntohl(dbH->numFiles); i++){ if(trackTable[i]>=sequenceLength){ tmp1=*snPtr; j=1; @@ -1979,12 +1982,12 @@ } double* pn = sMeanL2; - w=dbH->numFiles; + w=ntohl(dbH->numFiles); while(w--) *pn++=0.0; ps=sNorm; unsigned processedTracks=0; - for(i=0; i<dbH->numFiles; i++){ + for(i=0; i<ntohl(dbH->numFiles); i++){ if(trackTable[i]>sequenceLength-1){ w = trackTable[i]-sequenceLength+1; pn = sMeanL2+i; @@ -2074,15 +2077,15 @@ double* timesdata = 0; double* meanDBdur = 0; - if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ + if(usingTimes && !(ntohl(dbH->flags) & O2_FLAG_TIMES)){ cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; usingTimes=0; } - else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) + else if(!usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)) cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; - else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ + else if(usingTimes && (ntohl(dbH->flags) & O2_FLAG_TIMES)){ timesdata = new double[numVectors]; assert(timesdata); insertTimeStamps(numVectors, timesFile, timesdata); @@ -2095,9 +2098,9 @@ if(verbosity>1) { cerr << "mean query file duration: " << meanQdur << endl; } - meanDBdur = new double[dbH->numFiles]; + meanDBdur = new double[ntohl(dbH->numFiles)]; assert(meanDBdur); - for(k=0; k<dbH->numFiles; k++){ + for(k=0; k<ntohl(dbH->numFiles); k++){ meanDBdur[k]=0.0; for(j=0; j<trackTable[k]-1 ; j++) meanDBdur[k]+=timesTable[j+1]-timesTable[j]; @@ -2112,7 +2115,7 @@ if(verbosity>1) { cerr << "query point: " << queryPoint << endl; cerr.flush(); } - query=query+queryPoint*dbH->dim; + query=query+queryPoint*ntohl(dbH->dim); qNorm=qNorm+queryPoint; numVectors=wL; } @@ -2134,12 +2137,12 @@ double* dp; // build track offset table - unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; + unsigned *trackOffsetTable = new unsigned[ntohl(dbH->numFiles)]; unsigned cumTrack=0; unsigned trackIndexOffset; - for(k=0; k<dbH->numFiles;k++){ + for(k=0; k<ntohl(dbH->numFiles);k++){ trackOffsetTable[k]=cumTrack; - cumTrack+=trackTable[k]*dbH->dim; + cumTrack+=trackTable[k]*ntohl(dbH->dim); } char nextKey [MAXSTR]; @@ -2152,7 +2155,7 @@ double maxSample = 0; // Track loop - for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ + for(processedTracks=0, track=0 ; processedTracks < ntohl(dbH->numFiles) ; track++, processedTracks++){ // get trackID from file if using a control file if(trackFile){ @@ -2165,7 +2168,7 @@ } trackOffset=trackOffsetTable[track]; // numDoubles offset - trackIndexOffset=trackOffset/dbH->dim; // numVectors offset + trackIndexOffset=trackOffset/ntohl(dbH->dim); // numVectors offset if(sequenceLength<=trackTable[track]){ // test for short sequences @@ -2189,12 +2192,12 @@ // Dot product for(j=0; j<numVectors; j++) for(k=0; k<trackTable[track]; k++){ - qp=query+j*dbH->dim; - sp=dataBuf+trackOffset+k*dbH->dim; + qp=query+j*ntohl(dbH->dim); + sp=dataBuf+trackOffset+k*ntohl(dbH->dim); DD[j][k]=0.0; // Initialize matched filter array dp=&D[j][k]; // point to correlation cell j,k *dp=0.0; // initialize correlation cell - l=dbH->dim; // size of vectors + l=ntohl(dbH->dim); // size of vectors while(l--) *dp+=*qp++**sp++; } @@ -2421,7 +2424,7 @@ assert(l2normTable); - if( !append && (dbH->flags & O2_FLAG_L2NORM) ) + if( !append && (ntohl(dbH->flags) & O2_FLAG_L2NORM) ) error("Database is already L2 normed", "automatic norm on insert is enabled"); if(verbosity>2) { @@ -2458,7 +2461,7 @@ // been inserted, and dbH->length has already been updated. We // need to subtract off again the number of vectors that we've // inserted this time... - offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors + offset=(ntohl(dbH->length)/(ntohl(dbH->dim)*sizeof(double)))-n; // number of vectors } else { offset=0; }