comparison query.cpp @ 204:2ea1908707c7 refactoring

Filewise refactor. Break apart huge monolithic audioDB.cpp file into seven broadly independent portions: * SOAP * DB creation * insertion * query * dump * common functionality * constructor functions Remove the "using namespace std" from the header file, though that wasn't actually a problem: the problem in question is solved by including adb.nsmap in only soap.cpp. Makefile improvements.
author mas01cr
date Wed, 28 Nov 2007 15:10:28 +0000
parents
children 3c7c8b84e4f3
comparison
equal deleted inserted replaced
203:4b05c5bbf06d 204:2ea1908707c7
1 #include "audioDB.h"
2
3 bool audioDB::powers_acceptable(double p1, double p2) {
4 if (use_absolute_threshold) {
5 if ((p1 < absolute_threshold) || (p2 < absolute_threshold)) {
6 return false;
7 }
8 }
9 if (use_relative_threshold) {
10 if (fabs(p1-p2) > fabs(relative_threshold)) {
11 return false;
12 }
13 }
14 return true;
15 }
16
17 void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
18 switch(queryType){
19 case O2_POINT_QUERY:
20 pointQuery(dbName, inFile, adbQueryResponse);
21 break;
22 case O2_SEQUENCE_QUERY:
23 if(radius==0)
24 trackSequenceQueryNN(dbName, inFile, adbQueryResponse);
25 else
26 trackSequenceQueryRad(dbName, inFile, adbQueryResponse);
27 break;
28 case O2_TRACK_QUERY:
29 trackPointQuery(dbName, inFile, adbQueryResponse);
30 break;
31 default:
32 error("unrecognized queryType in query()");
33
34 }
35 }
36
37 //return ordinal position of key in keyTable
38 unsigned audioDB::getKeyPos(char* key){
39 for(unsigned k=0; k<dbH->numFiles; k++)
40 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0)
41 return k;
42 error("Key not found",key);
43 return O2_ERR_KEYNOTFOUND;
44 }
45
46 // Basic point query engine
47 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
48
49 initTables(dbName, inFile);
50
51 // For each input vector, find the closest pointNN matching output vectors and report
52 // we use stdout in this stub version
53 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
54
55 double* query = (double*)(indata+sizeof(int));
56 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
57 double* data = dataBuf;
58 double* queryCopy = 0;
59
60 if( dbH->flags & O2_FLAG_L2NORM ){
61 // Make a copy of the query
62 queryCopy = new double[numVectors*dbH->dim];
63 qNorm = new double[numVectors];
64 assert(queryCopy&&qNorm);
65 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
66 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
67 query = queryCopy;
68 }
69
70 // Make temporary dynamic memory for results
71 assert(pointNN>0 && pointNN<=O2_MAXNN);
72 double distances[pointNN];
73 unsigned qIndexes[pointNN];
74 unsigned sIndexes[pointNN];
75 for(unsigned k=0; k<pointNN; k++){
76 distances[k]=-DBL_MAX;
77 qIndexes[k]=~0;
78 sIndexes[k]=~0;
79 }
80
81 unsigned j=numVectors;
82 unsigned k,l,n;
83 double thisDist;
84
85 unsigned totalVecs=dbH->length/(dbH->dim*sizeof(double));
86 double meanQdur = 0;
87 double *timesdata = 0;
88 double *querydurs = 0;
89 double *dbdurs = 0;
90
91 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
92 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
93 usingTimes=0;
94 }
95
96 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
97 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
98
99 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
100 timesdata = new double[2*numVectors];
101 querydurs = new double[numVectors];
102 insertTimeStamps(numVectors, timesFile, timesdata);
103 // Calculate durations of points
104 for(k=0; k<numVectors-1; k++){
105 querydurs[k]=timesdata[2*k+1]-timesdata[2*k];
106 meanQdur+=querydurs[k];
107 }
108 meanQdur/=k;
109 // Individual exhaustive timepoint durations
110 dbdurs = new double[totalVecs];
111 for(k=0; k<totalVecs-1; k++) {
112 dbdurs[k]=timesTable[2*k+1]-timesTable[2*k];
113 }
114 }
115
116 if(usingQueryPoint)
117 if(queryPoint>numVectors-1)
118 error("queryPoint > numVectors in query");
119 else{
120 if(verbosity>1) {
121 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
122 }
123 query=query+queryPoint*dbH->dim;
124 numVectors=queryPoint+1;
125 j=1;
126 }
127
128 gettimeofday(&tv1, NULL);
129 while(j--){ // query
130 data=dataBuf;
131 k=totalVecs; // number of database vectors
132 while(k--){ // database
133 thisDist=0;
134 l=dbH->dim;
135 double* q=query;
136 while(l--)
137 thisDist+=*q++**data++;
138 if(!usingTimes ||
139 (usingTimes
140 && fabs(dbdurs[totalVecs-k-1]-querydurs[numVectors-j-1])<querydurs[numVectors-j-1]*timesTol)){
141 n=pointNN;
142 while(n--){
143 if(thisDist>=distances[n]){
144 if((n==0 || thisDist<=distances[n-1])){
145 // Copy all values above up the queue
146 for( l=pointNN-1 ; l >= n+1 ; l--){
147 distances[l]=distances[l-1];
148 qIndexes[l]=qIndexes[l-1];
149 sIndexes[l]=sIndexes[l-1];
150 }
151 distances[n]=thisDist;
152 qIndexes[n]=numVectors-j-1;
153 sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1;
154 break;
155 }
156 }
157 else
158 break;
159 }
160 }
161 }
162 // Move query pointer to next query point
163 query+=dbH->dim;
164 }
165
166 gettimeofday(&tv2, NULL);
167 if(verbosity>1) {
168 std::cerr << std::endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
169 }
170
171 if(adbQueryResponse==0){
172 // Output answer
173 // Loop over nearest neighbours
174 for(k=0; k < pointNN; k++){
175 // Scan for key
176 unsigned cumTrack=0;
177 for(l=0 ; l<dbH->numFiles; l++){
178 cumTrack+=trackTable[l];
179 if(sIndexes[k]<cumTrack){
180 std::cout << fileTable+l*O2_FILETABLESIZE << " " << distances[k] << " " << qIndexes[k] << " "
181 << sIndexes[k]+trackTable[l]-cumTrack << std::endl;
182 break;
183 }
184 }
185 }
186 }
187 else{ // Process Web Services Query
188 int listLen;
189 for(k = 0; k < pointNN; k++) {
190 if(distances[k] == -DBL_MAX)
191 break;
192 }
193 listLen = k;
194
195 adbQueryResponse->result.__sizeRlist=listLen;
196 adbQueryResponse->result.__sizeDist=listLen;
197 adbQueryResponse->result.__sizeQpos=listLen;
198 adbQueryResponse->result.__sizeSpos=listLen;
199 adbQueryResponse->result.Rlist= new char*[listLen];
200 adbQueryResponse->result.Dist = new double[listLen];
201 adbQueryResponse->result.Qpos = new unsigned int[listLen];
202 adbQueryResponse->result.Spos = new unsigned int[listLen];
203 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
204 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
205 adbQueryResponse->result.Dist[k]=distances[k];
206 adbQueryResponse->result.Qpos[k]=qIndexes[k];
207 unsigned cumTrack=0;
208 for(l=0 ; l<dbH->numFiles; l++){
209 cumTrack+=trackTable[l];
210 if(sIndexes[k]<cumTrack){
211 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE);
212 break;
213 }
214 }
215 adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack;
216 }
217 }
218
219 // Clean up
220 if(queryCopy)
221 delete queryCopy;
222 if(qNorm)
223 delete qNorm;
224 if(timesdata)
225 delete[] timesdata;
226 if(querydurs)
227 delete[] querydurs;
228 if(dbdurs)
229 delete dbdurs;
230 }
231
232 // trackPointQuery
233 // return the trackNN closest tracks to the query track
234 // uses average of pointNN points per track
235 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
236 initTables(dbName, inFile);
237
238 // For each input vector, find the closest pointNN matching output vectors and report
239 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
240 double* query = (double*)(indata+sizeof(int));
241 double* data;
242 double* queryCopy = 0;
243
244 if( dbH->flags & O2_FLAG_L2NORM ){
245 // Make a copy of the query
246 queryCopy = new double[numVectors*dbH->dim];
247 qNorm = new double[numVectors];
248 assert(queryCopy&&qNorm);
249 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
250 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
251 query = queryCopy;
252 }
253
254 assert(pointNN>0 && pointNN<=O2_MAXNN);
255 assert(trackNN>0 && trackNN<=O2_MAXNN);
256
257 // Make temporary dynamic memory for results
258 double trackDistances[trackNN];
259 unsigned trackIDs[trackNN];
260 unsigned trackQIndexes[trackNN];
261 unsigned trackSIndexes[trackNN];
262
263 double distances[pointNN];
264 unsigned qIndexes[pointNN];
265 unsigned sIndexes[pointNN];
266
267 unsigned j=numVectors; // number of query points
268 unsigned k,l,n, track, trackOffset=0, processedTracks=0;
269 double thisDist;
270
271 for(k=0; k<pointNN; k++){
272 distances[k]=-DBL_MAX;
273 qIndexes[k]=~0;
274 sIndexes[k]=~0;
275 }
276
277 for(k=0; k<trackNN; k++){
278 trackDistances[k]=-DBL_MAX;
279 trackQIndexes[k]=~0;
280 trackSIndexes[k]=~0;
281 trackIDs[k]=~0;
282 }
283
284 double meanQdur = 0;
285 double *timesdata = 0;
286 double *querydurs = 0;
287 double *meanDBdur = 0;
288
289 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
290 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
291 usingTimes=0;
292 }
293
294 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
295 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
296
297 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
298 timesdata = new double[2*numVectors];
299 querydurs = new double[numVectors];
300 insertTimeStamps(numVectors, timesFile, timesdata);
301 // Calculate durations of points
302 for(k=0; k<numVectors-1; k++) {
303 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
304 meanQdur += querydurs[k];
305 }
306 meanQdur/=k;
307 meanDBdur = new double[dbH->numFiles];
308 for(k=0; k<dbH->numFiles; k++){
309 meanDBdur[k]=0.0;
310 for(j=0; j<trackTable[k]-1 ; j++) {
311 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
312 }
313 meanDBdur[k]/=j;
314 }
315 }
316
317 if(usingQueryPoint)
318 if(queryPoint>numVectors-1)
319 error("queryPoint > numVectors in query");
320 else{
321 if(verbosity>1) {
322 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
323 }
324 query=query+queryPoint*dbH->dim;
325 numVectors=queryPoint+1;
326 }
327
328 // build track offset table
329 off_t *trackOffsetTable = new off_t[dbH->numFiles];
330 unsigned cumTrack=0;
331 off_t trackIndexOffset;
332 for(k=0; k<dbH->numFiles;k++){
333 trackOffsetTable[k]=cumTrack;
334 cumTrack+=trackTable[k]*dbH->dim;
335 }
336
337 char nextKey[MAXSTR];
338
339 gettimeofday(&tv1, NULL);
340
341 size_t data_buffer_size = 0;
342 double *data_buffer = 0;
343 lseek(dbfid, dbH->dataOffset, SEEK_SET);
344
345 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
346
347 trackOffset = trackOffsetTable[track]; // numDoubles offset
348
349 // get trackID from file if using a control file
350 if(trackFile) {
351 trackFile->getline(nextKey,MAXSTR);
352 if(!trackFile->eof()) {
353 track = getKeyPos(nextKey);
354 trackOffset = trackOffsetTable[track];
355 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
356 } else {
357 break;
358 }
359 }
360
361 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
362
363 if(verbosity>7) {
364 std::cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";std::cerr.flush();
365 }
366
367 if(dbH->flags & O2_FLAG_L2NORM)
368 usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy;
369 else
370 usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int));
371 if(usingQueryPoint)
372 j=1;
373 else
374 j=numVectors;
375
376 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
377 if(data_buffer) {
378 free(data_buffer);
379 }
380 {
381 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
382 void *tmp = malloc(data_buffer_size);
383 if (tmp == NULL) {
384 error("error allocating data buffer");
385 }
386 data_buffer = (double *) tmp;
387 }
388 }
389
390 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
391
392 while(j--){
393 k=trackTable[track]; // number of vectors in track
394 data=data_buffer; // data for track
395 while(k--){
396 thisDist=0;
397 l=dbH->dim;
398 double* q=query;
399 while(l--)
400 thisDist+=*q++**data++;
401 if(!usingTimes ||
402 (usingTimes
403 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
404 n=pointNN;
405 while(n--){
406 if(thisDist>=distances[n]){
407 if((n==0 || thisDist<=distances[n-1])){
408 // Copy all values above up the queue
409 for( l=pointNN-1 ; l > n ; l--){
410 distances[l]=distances[l-1];
411 qIndexes[l]=qIndexes[l-1];
412 sIndexes[l]=sIndexes[l-1];
413 }
414 distances[n]=thisDist;
415 qIndexes[n]=numVectors-j-1;
416 sIndexes[n]=trackTable[track]-k-1;
417 break;
418 }
419 }
420 else
421 break;
422 }
423 }
424 } // track
425 // Move query pointer to next query point
426 query+=dbH->dim;
427 } // query
428 // Take the average of this track's distance
429 // Test the track distances
430 thisDist=0;
431 for (n = 0; n < pointNN; n++) {
432 if (distances[n] == -DBL_MAX) break;
433 thisDist += distances[n];
434 }
435 thisDist /= n;
436
437 n=trackNN;
438 while(n--){
439 if(thisDist>=trackDistances[n]){
440 if((n==0 || thisDist<=trackDistances[n-1])){
441 // Copy all values above up the queue
442 for( l=trackNN-1 ; l > n ; l--){
443 trackDistances[l]=trackDistances[l-1];
444 trackQIndexes[l]=trackQIndexes[l-1];
445 trackSIndexes[l]=trackSIndexes[l-1];
446 trackIDs[l]=trackIDs[l-1];
447 }
448 trackDistances[n]=thisDist;
449 trackQIndexes[n]=qIndexes[0];
450 trackSIndexes[n]=sIndexes[0];
451 trackIDs[n]=track;
452 break;
453 }
454 }
455 else
456 break;
457 }
458 for(unsigned k=0; k<pointNN; k++){
459 distances[k]=-DBL_MAX;
460 qIndexes[k]=~0;
461 sIndexes[k]=~0;
462 }
463 } // tracks
464
465 free(data_buffer);
466
467 gettimeofday(&tv2, NULL);
468
469 if(verbosity>1) {
470 std::cerr << std::endl << "processed tracks :" << processedTracks
471 << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
472 }
473
474 if(adbQueryResponse==0){
475 if(verbosity>1) {
476 std::cerr<<std::endl;
477 }
478 // Output answer
479 // Loop over nearest neighbours
480 for(k=0; k < std::min(trackNN,processedTracks); k++)
481 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE
482 << " " << trackDistances[k] << " " << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
483 }
484 else{ // Process Web Services Query
485 int listLen = std::min(trackNN, processedTracks);
486 adbQueryResponse->result.__sizeRlist=listLen;
487 adbQueryResponse->result.__sizeDist=listLen;
488 adbQueryResponse->result.__sizeQpos=listLen;
489 adbQueryResponse->result.__sizeSpos=listLen;
490 adbQueryResponse->result.Rlist= new char*[listLen];
491 adbQueryResponse->result.Dist = new double[listLen];
492 adbQueryResponse->result.Qpos = new unsigned int[listLen];
493 adbQueryResponse->result.Spos = new unsigned int[listLen];
494 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
495 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
496 adbQueryResponse->result.Dist[k]=trackDistances[k];
497 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
498 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
499 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
500 }
501 }
502
503 // Clean up
504 if(trackOffsetTable)
505 delete trackOffsetTable;
506 if(queryCopy)
507 delete queryCopy;
508 if(qNorm)
509 delete qNorm;
510 if(timesdata)
511 delete[] timesdata;
512 if(querydurs)
513 delete[] querydurs;
514 if(meanDBdur)
515 delete meanDBdur;
516 }
517
518 // This is a common pattern in sequence queries: what we are doing is
519 // taking a window of length seqlen over a buffer of length length,
520 // and placing the sum of the elements in that window in the first
521 // element of the window: thus replacing all but the last seqlen
522 // elements in the buffer the corresponding windowed sum.
523 void audioDB::sequence_sum(double *buffer, int length, int seqlen) {
524 double tmp1, tmp2, *ps;
525 int j, w;
526
527 tmp1 = *buffer;
528 j = 1;
529 w = seqlen - 1;
530 while(w--) {
531 *buffer += buffer[j++];
532 }
533 ps = buffer + 1;
534 w = length - seqlen; // +1 - 1
535 while(w--) {
536 tmp2 = *ps;
537 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
538 tmp1 = tmp2;
539 ps++;
540 }
541 }
542
543 void audioDB::sequence_sqrt(double *buffer, int length, int seqlen) {
544 int w = length - seqlen + 1;
545 while(w--) {
546 *buffer = sqrt(*buffer);
547 buffer++;
548 }
549 }
550
551 void audioDB::sequence_average(double *buffer, int length, int seqlen) {
552 int w = length - seqlen + 1;
553 while(w--) {
554 *buffer /= seqlen;
555 buffer++;
556 }
557 }
558
559 // k nearest-neighbor (k-NN) search between query and target tracks
560 // efficient implementation based on matched filter
561 // assumes normed shingles
562 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
563 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
564
565 initTables(dbName, inFile);
566
567 // For each input vector, find the closest pointNN matching output vectors and report
568 // we use stdout in this stub version
569 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
570 double* query = (double*)(indata+sizeof(int));
571 double* queryCopy = 0;
572
573 if(!(dbH->flags & O2_FLAG_L2NORM) )
574 error("Database must be L2 normed for sequence query","use -L2NORM");
575
576 if(numVectors<sequenceLength)
577 error("Query shorter than requested sequence length", "maybe use -l");
578
579 if(verbosity>1) {
580 std::cerr << "performing norms ... "; std::cerr.flush();
581 }
582 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
583
584 // Make a copy of the query
585 queryCopy = new double[numVectors*dbH->dim];
586 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
587 qNorm = new double[numVectors];
588 sNorm = new double[dbVectors];
589 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
590 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
591 query = queryCopy;
592
593 // Make norm measurements relative to sequenceLength
594 unsigned w = sequenceLength-1;
595 unsigned i,j;
596
597 // Copy the L2 norm values to core to avoid disk random access later on
598 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
599 double* qnPtr = qNorm;
600 double* snPtr = sNorm;
601
602 double *sPower = 0, *qPower = 0;
603 double *spPtr = 0, *qpPtr = 0;
604
605 if (usingPower) {
606 if (!(dbH->flags & O2_FLAG_POWER)) {
607 error("database not power-enabled", dbName);
608 }
609 sPower = new double[dbVectors];
610 spPtr = sPower;
611 memcpy(sPower, powerTable, dbVectors * sizeof(double));
612 }
613
614 for(i=0; i<dbH->numFiles; i++){
615 if(trackTable[i]>=sequenceLength) {
616 sequence_sum(snPtr, trackTable[i], sequenceLength);
617 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
618
619 if (usingPower) {
620 sequence_sum(spPtr, trackTable[i], sequenceLength);
621 sequence_average(spPtr, trackTable[i], sequenceLength);
622 }
623 }
624 snPtr += trackTable[i];
625 if (usingPower) {
626 spPtr += trackTable[i];
627 }
628 }
629
630 sequence_sum(qnPtr, numVectors, sequenceLength);
631 sequence_sqrt(qnPtr, numVectors, sequenceLength);
632
633 if (usingPower) {
634 qPower = new double[numVectors];
635 qpPtr = qPower;
636 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
637 error("error seeking to data", powerFileName, "lseek");
638 }
639 int count = read(powerfd, qPower, numVectors * sizeof(double));
640 if (count == -1) {
641 error("error reading data", powerFileName, "read");
642 }
643 if ((unsigned) count != numVectors * sizeof(double)) {
644 error("short read", powerFileName);
645 }
646
647 sequence_sum(qpPtr, numVectors, sequenceLength);
648 sequence_average(qpPtr, numVectors, sequenceLength);
649 }
650
651 if(verbosity>1) {
652 std::cerr << "done." << std::endl;
653 }
654
655 if(verbosity>1) {
656 std::cerr << "matching tracks..." << std::endl;
657 }
658
659 assert(pointNN>0 && pointNN<=O2_MAXNN);
660 assert(trackNN>0 && trackNN<=O2_MAXNN);
661
662 // Make temporary dynamic memory for results
663 double trackDistances[trackNN];
664 unsigned trackIDs[trackNN];
665 unsigned trackQIndexes[trackNN];
666 unsigned trackSIndexes[trackNN];
667
668 double distances[pointNN];
669 unsigned qIndexes[pointNN];
670 unsigned sIndexes[pointNN];
671
672
673 unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
674 double thisDist;
675
676 for(k=0; k<pointNN; k++){
677 distances[k]=1.0e6;
678 qIndexes[k]=~0;
679 sIndexes[k]=~0;
680 }
681
682 for(k=0; k<trackNN; k++){
683 trackDistances[k]=1.0e6;
684 trackQIndexes[k]=~0;
685 trackSIndexes[k]=~0;
686 trackIDs[k]=~0;
687 }
688
689 // Timestamp and durations processing
690 double meanQdur = 0;
691 double *timesdata = 0;
692 double *querydurs = 0;
693 double *meanDBdur = 0;
694
695 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
696 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
697 usingTimes=0;
698 }
699
700 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
701 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
702
703 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
704 timesdata = new double[2*numVectors];
705 querydurs = new double[numVectors];
706
707 insertTimeStamps(numVectors, timesFile, timesdata);
708 // Calculate durations of points
709 for(k=0; k<numVectors-1; k++) {
710 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
711 meanQdur += querydurs[k];
712 }
713 meanQdur/=k;
714 if(verbosity>1) {
715 std::cerr << "mean query file duration: " << meanQdur << std::endl;
716 }
717 meanDBdur = new double[dbH->numFiles];
718 assert(meanDBdur);
719 for(k=0; k<dbH->numFiles; k++){
720 meanDBdur[k]=0.0;
721 for(j=0; j<trackTable[k]-1 ; j++) {
722 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
723 }
724 meanDBdur[k]/=j;
725 }
726 }
727
728 if(usingQueryPoint)
729 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
730 error("queryPoint > numVectors-wL+1 in query");
731 else{
732 if(verbosity>1) {
733 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
734 }
735 query = query + queryPoint * dbH->dim;
736 qnPtr = qnPtr + queryPoint;
737 if (usingPower) {
738 qpPtr = qpPtr + queryPoint;
739 }
740 numVectors=wL;
741 }
742
743 double ** D = 0; // Differences query and target
744 double ** DD = 0; // Matched filter distance
745
746 D = new double*[numVectors];
747 assert(D);
748 DD = new double*[numVectors];
749 assert(DD);
750
751 gettimeofday(&tv1, NULL);
752 unsigned processedTracks = 0;
753 unsigned successfulTracks=0;
754
755 double* qp;
756 double* sp;
757 double* dp;
758
759 // build track offset table
760 off_t *trackOffsetTable = new off_t[dbH->numFiles];
761 unsigned cumTrack=0;
762 off_t trackIndexOffset;
763 for(k=0; k<dbH->numFiles;k++){
764 trackOffsetTable[k]=cumTrack;
765 cumTrack+=trackTable[k]*dbH->dim;
766 }
767
768 char nextKey [MAXSTR];
769
770 // chi^2 statistics
771 double sampleCount = 0;
772 double sampleSum = 0;
773 double logSampleSum = 0;
774 double minSample = 1e9;
775 double maxSample = 0;
776
777 // Track loop
778 size_t data_buffer_size = 0;
779 double *data_buffer = 0;
780 lseek(dbfid, dbH->dataOffset, SEEK_SET);
781
782 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) {
783
784 trackOffset = trackOffsetTable[track]; // numDoubles offset
785
786 // get trackID from file if using a control file
787 if(trackFile) {
788 trackFile->getline(nextKey,MAXSTR);
789 if(!trackFile->eof()) {
790 track = getKeyPos(nextKey);
791 trackOffset = trackOffsetTable[track];
792 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
793 } else {
794 break;
795 }
796 }
797
798 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
799
800 if(sequenceLength<=trackTable[track]){ // test for short sequences
801
802 if(verbosity>7) {
803 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
804 }
805
806 // Sum products matrix
807 for(j=0; j<numVectors;j++){
808 D[j]=new double[trackTable[track]];
809 assert(D[j]);
810
811 }
812
813 // Matched filter matrix
814 for(j=0; j<numVectors;j++){
815 DD[j]=new double[trackTable[track]];
816 assert(DD[j]);
817 }
818
819 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
820 if(data_buffer) {
821 free(data_buffer);
822 }
823 {
824 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
825 void *tmp = malloc(data_buffer_size);
826 if (tmp == NULL) {
827 error("error allocating data buffer");
828 }
829 data_buffer = (double *) tmp;
830 }
831 }
832
833 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
834
835 // Dot product
836 for(j=0; j<numVectors; j++)
837 for(k=0; k<trackTable[track]; k++){
838 qp=query+j*dbH->dim;
839 sp=data_buffer+k*dbH->dim;
840 DD[j][k]=0.0; // Initialize matched filter array
841 dp=&D[j][k]; // point to correlation cell j,k
842 *dp=0.0; // initialize correlation cell
843 l=dbH->dim; // size of vectors
844 while(l--)
845 *dp+=*qp++**sp++;
846 }
847
848 // Matched Filter
849 // HOP SIZE == 1
850 double* spd;
851 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
852 for(w=0; w<wL; w++)
853 for(j=0; j<numVectors-w; j++){
854 sp=DD[j];
855 spd=D[j+w]+w;
856 k=trackTable[track]-w;
857 while(k--)
858 *sp+++=*spd++;
859 }
860 }
861
862 else{ // HOP_SIZE != 1
863 for(w=0; w<wL; w++)
864 for(j=0; j<numVectors-w; j+=HOP_SIZE){
865 sp=DD[j];
866 spd=D[j+w]+w;
867 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
868 *sp+=*spd;
869 sp+=HOP_SIZE;
870 spd+=HOP_SIZE;
871 }
872 }
873 }
874
875 if(verbosity>3 && usingTimes) {
876 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
877 std::cerr.flush();
878 }
879
880 if(!usingTimes ||
881 (usingTimes
882 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
883
884 if(verbosity>3 && usingTimes) {
885 std::cerr << "within duration tolerance." << std::endl;
886 std::cerr.flush();
887 }
888
889 // Search for minimum distance by shingles (concatenated vectors)
890 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
891 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
892 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
893 if(verbosity>9) {
894 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
895 }
896 // Gather chi^2 statistics
897 if(thisDist<minSample)
898 minSample=thisDist;
899 else if(thisDist>maxSample)
900 maxSample=thisDist;
901 if(thisDist>1e-9){
902 sampleCount++;
903 sampleSum+=thisDist;
904 logSampleSum+=log(thisDist);
905 }
906
907 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
908 // Power test
909 if (usingPower) {
910 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
911 thisDist = 1000000.0;
912 }
913 }
914
915 // k-NN match algorithm
916 m=pointNN;
917 while(m--){
918 if(thisDist<=distances[m])
919 if(m==0 || thisDist>=distances[m-1]){
920 // Shuffle distances up the list
921 for(l=pointNN-1; l>m; l--){
922 distances[l]=distances[l-1];
923 qIndexes[l]=qIndexes[l-1];
924 sIndexes[l]=sIndexes[l-1];
925 }
926 distances[m]=thisDist;
927 if(usingQueryPoint)
928 qIndexes[m]=queryPoint;
929 else
930 qIndexes[m]=j;
931 sIndexes[m]=k;
932 break;
933 }
934 }
935 }
936 // Calculate the mean of the N-Best matches
937 thisDist=0.0;
938 for(m=0; m<pointNN; m++) {
939 if (distances[m] == 1000000.0) break;
940 thisDist+=distances[m];
941 }
942 thisDist/=m;
943
944 // Let's see the distances then...
945 if(verbosity>3) {
946 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
947 }
948
949
950 // All the track stuff goes here
951 n=trackNN;
952 while(n--){
953 if(thisDist<=trackDistances[n]){
954 if((n==0 || thisDist>=trackDistances[n-1])){
955 // Copy all values above up the queue
956 for( l=trackNN-1 ; l > n ; l--){
957 trackDistances[l]=trackDistances[l-1];
958 trackQIndexes[l]=trackQIndexes[l-1];
959 trackSIndexes[l]=trackSIndexes[l-1];
960 trackIDs[l]=trackIDs[l-1];
961 }
962 trackDistances[n]=thisDist;
963 trackQIndexes[n]=qIndexes[0];
964 trackSIndexes[n]=sIndexes[0];
965 successfulTracks++;
966 trackIDs[n]=track;
967 break;
968 }
969 }
970 else
971 break;
972 }
973 } // Duration match
974
975 // Clean up current track
976 if(D!=NULL){
977 for(j=0; j<numVectors; j++)
978 delete[] D[j];
979 }
980
981 if(DD!=NULL){
982 for(j=0; j<numVectors; j++)
983 delete[] DD[j];
984 }
985 }
986 // per-track reset array values
987 for(unsigned k=0; k<pointNN; k++){
988 distances[k]=1.0e6;
989 qIndexes[k]=~0;
990 sIndexes[k]=~0;
991 }
992 }
993
994 free(data_buffer);
995
996 gettimeofday(&tv2,NULL);
997 if(verbosity>1) {
998 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
999 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
1000 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
1001 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
1002 }
1003 if(adbQueryResponse==0){
1004 if(verbosity>1) {
1005 std::cerr<<std::endl;
1006 }
1007 // Output answer
1008 // Loop over nearest neighbours
1009 for(k=0; k < std::min(trackNN,successfulTracks); k++)
1010 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << " "
1011 << trackQIndexes[k] << " " << trackSIndexes[k] << std::endl;
1012 }
1013 else{ // Process Web Services Query
1014 int listLen = std::min(trackNN, processedTracks);
1015 adbQueryResponse->result.__sizeRlist=listLen;
1016 adbQueryResponse->result.__sizeDist=listLen;
1017 adbQueryResponse->result.__sizeQpos=listLen;
1018 adbQueryResponse->result.__sizeSpos=listLen;
1019 adbQueryResponse->result.Rlist= new char*[listLen];
1020 adbQueryResponse->result.Dist = new double[listLen];
1021 adbQueryResponse->result.Qpos = new unsigned int[listLen];
1022 adbQueryResponse->result.Spos = new unsigned int[listLen];
1023 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
1024 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
1025 adbQueryResponse->result.Dist[k]=trackDistances[k];
1026 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
1027 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
1028 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
1029 }
1030 }
1031
1032 // Clean up
1033 if(trackOffsetTable)
1034 delete[] trackOffsetTable;
1035 if(queryCopy)
1036 delete[] queryCopy;
1037 if(qNorm)
1038 delete[] qNorm;
1039 if(sNorm)
1040 delete[] sNorm;
1041 if(qPower)
1042 delete[] qPower;
1043 if(sPower)
1044 delete[] sPower;
1045 if(D)
1046 delete[] D;
1047 if(DD)
1048 delete[] DD;
1049 if(timesdata)
1050 delete[] timesdata;
1051 if(querydurs)
1052 delete[] querydurs;
1053 if(meanDBdur)
1054 delete[] meanDBdur;
1055 }
1056
1057 // Radius search between query and target tracks
1058 // efficient implementation based on matched filter
1059 // assumes normed shingles
1060 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
1061 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
1062
1063 initTables(dbName, inFile);
1064
1065 // For each input vector, find the closest pointNN matching output vectors and report
1066 // we use stdout in this stub version
1067 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1068 double* query = (double*)(indata+sizeof(int));
1069 double* queryCopy = 0;
1070
1071 if(!(dbH->flags & O2_FLAG_L2NORM) )
1072 error("Database must be L2 normed for sequence query","use -l2norm");
1073
1074 if(verbosity>1) {
1075 std::cerr << "performing norms ... "; std::cerr.flush();
1076 }
1077 unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim);
1078
1079 // Make a copy of the query
1080 queryCopy = new double[numVectors*dbH->dim];
1081 memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double));
1082 qNorm = new double[numVectors];
1083 sNorm = new double[dbVectors];
1084 assert(qNorm&&sNorm&&queryCopy&&sequenceLength);
1085 unitNorm(queryCopy, dbH->dim, numVectors, qNorm);
1086 query = queryCopy;
1087
1088 // Make norm measurements relative to sequenceLength
1089 unsigned w = sequenceLength-1;
1090 unsigned i,j;
1091
1092 // Copy the L2 norm values to core to avoid disk random access later on
1093 memcpy(sNorm, l2normTable, dbVectors*sizeof(double));
1094 double* snPtr = sNorm;
1095 double* qnPtr = qNorm;
1096
1097 double *sPower = 0, *qPower = 0;
1098 double *spPtr = 0, *qpPtr = 0;
1099
1100 if (usingPower) {
1101 if(!(dbH->flags & O2_FLAG_POWER)) {
1102 error("database not power-enabled", dbName);
1103 }
1104 sPower = new double[dbVectors];
1105 spPtr = sPower;
1106 memcpy(sPower, powerTable, dbVectors * sizeof(double));
1107 }
1108
1109 for(i=0; i<dbH->numFiles; i++){
1110 if(trackTable[i]>=sequenceLength) {
1111 sequence_sum(snPtr, trackTable[i], sequenceLength);
1112 sequence_sqrt(snPtr, trackTable[i], sequenceLength);
1113 if (usingPower) {
1114 sequence_sum(spPtr, trackTable[i], sequenceLength);
1115 sequence_average(spPtr, trackTable[i], sequenceLength);
1116 }
1117 }
1118 snPtr += trackTable[i];
1119 if (usingPower) {
1120 spPtr += trackTable[i];
1121 }
1122 }
1123
1124 sequence_sum(qnPtr, numVectors, sequenceLength);
1125 sequence_sqrt(qnPtr, numVectors, sequenceLength);
1126
1127 if (usingPower) {
1128 qPower = new double[numVectors];
1129 qpPtr = qPower;
1130 if (lseek(powerfd, sizeof(int), SEEK_SET) == (off_t) -1) {
1131 error("error seeking to data", powerFileName, "lseek");
1132 }
1133 int count = read(powerfd, qPower, numVectors * sizeof(double));
1134 if (count == -1) {
1135 error("error reading data", powerFileName, "read");
1136 }
1137 if ((unsigned) count != numVectors * sizeof(double)) {
1138 error("short read", powerFileName);
1139 }
1140
1141 sequence_sum(qpPtr, numVectors, sequenceLength);
1142 sequence_average(qpPtr, numVectors, sequenceLength);
1143 }
1144
1145 if(verbosity>1) {
1146 std::cerr << "done." << std::endl;
1147 }
1148
1149 if(verbosity>1) {
1150 std::cerr << "matching tracks..." << std::endl;
1151 }
1152
1153 assert(pointNN>0 && pointNN<=O2_MAXNN);
1154 assert(trackNN>0 && trackNN<=O2_MAXNN);
1155
1156 // Make temporary dynamic memory for results
1157 double trackDistances[trackNN];
1158 unsigned trackIDs[trackNN];
1159 unsigned trackQIndexes[trackNN];
1160 unsigned trackSIndexes[trackNN];
1161
1162 double distances[pointNN];
1163 unsigned qIndexes[pointNN];
1164 unsigned sIndexes[pointNN];
1165
1166
1167 unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength;
1168 double thisDist;
1169
1170 for(k=0; k<pointNN; k++){
1171 distances[k]=0.0;
1172 qIndexes[k]=~0;
1173 sIndexes[k]=~0;
1174 }
1175
1176 for(k=0; k<trackNN; k++){
1177 trackDistances[k]=0.0;
1178 trackQIndexes[k]=~0;
1179 trackSIndexes[k]=~0;
1180 trackIDs[k]=~0;
1181 }
1182
1183 // Timestamp and durations processing
1184 double meanQdur = 0;
1185 double *timesdata = 0;
1186 double *querydurs = 0;
1187 double *meanDBdur = 0;
1188
1189 if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){
1190 std::cerr << "warning: ignoring query timestamps for non-timestamped database" << std::endl;
1191 usingTimes=0;
1192 }
1193
1194 else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
1195 std::cerr << "warning: no timestamps given for query. Ignoring database timestamps." << std::endl;
1196
1197 else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){
1198 timesdata = new double[2*numVectors];
1199 querydurs = new double[numVectors];
1200
1201 insertTimeStamps(numVectors, timesFile, timesdata);
1202 // Calculate durations of points
1203 for(k=0; k<numVectors-1; k++){
1204 querydurs[k] = timesdata[2*k+1] - timesdata[2*k];
1205 meanQdur += querydurs[k];
1206 }
1207 meanQdur/=k;
1208 if(verbosity>1) {
1209 std::cerr << "mean query file duration: " << meanQdur << std::endl;
1210 }
1211 meanDBdur = new double[dbH->numFiles];
1212 assert(meanDBdur);
1213 for(k=0; k<dbH->numFiles; k++){
1214 meanDBdur[k]=0.0;
1215 for(j=0; j<trackTable[k]-1 ; j++) {
1216 meanDBdur[k]+=timesTable[2*j+1]-timesTable[2*j];
1217 }
1218 meanDBdur[k]/=j;
1219 }
1220 }
1221
1222 if(usingQueryPoint)
1223 if(queryPoint>numVectors || queryPoint>numVectors-wL+1)
1224 error("queryPoint > numVectors-wL+1 in query");
1225 else{
1226 if(verbosity>1) {
1227 std::cerr << "query point: " << queryPoint << std::endl; std::cerr.flush();
1228 }
1229 query = query + queryPoint*dbH->dim;
1230 qnPtr = qnPtr + queryPoint;
1231 if (usingPower) {
1232 qpPtr = qpPtr + queryPoint;
1233 }
1234 numVectors=wL;
1235 }
1236
1237 double ** D = 0; // Differences query and target
1238 double ** DD = 0; // Matched filter distance
1239
1240 D = new double*[numVectors];
1241 assert(D);
1242 DD = new double*[numVectors];
1243 assert(DD);
1244
1245 gettimeofday(&tv1, NULL);
1246 unsigned processedTracks = 0;
1247 unsigned successfulTracks=0;
1248
1249 double* qp;
1250 double* sp;
1251 double* dp;
1252
1253 // build track offset table
1254 off_t *trackOffsetTable = new off_t[dbH->numFiles];
1255 unsigned cumTrack=0;
1256 off_t trackIndexOffset;
1257 for(k=0; k<dbH->numFiles;k++){
1258 trackOffsetTable[k]=cumTrack;
1259 cumTrack+=trackTable[k]*dbH->dim;
1260 }
1261
1262 char nextKey [MAXSTR];
1263
1264 // chi^2 statistics
1265 double sampleCount = 0;
1266 double sampleSum = 0;
1267 double logSampleSum = 0;
1268 double minSample = 1e9;
1269 double maxSample = 0;
1270
1271 // Track loop
1272 size_t data_buffer_size = 0;
1273 double *data_buffer = 0;
1274 lseek(dbfid, dbH->dataOffset, SEEK_SET);
1275
1276 for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){
1277
1278 trackOffset = trackOffsetTable[track]; // numDoubles offset
1279
1280 // get trackID from file if using a control file
1281 if(trackFile) {
1282 trackFile->getline(nextKey,MAXSTR);
1283 if(!trackFile->eof()) {
1284 track = getKeyPos(nextKey);
1285 trackOffset = trackOffsetTable[track];
1286 lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET);
1287 } else {
1288 break;
1289 }
1290 }
1291
1292 trackIndexOffset=trackOffset/dbH->dim; // numVectors offset
1293
1294 if(sequenceLength<=trackTable[track]){ // test for short sequences
1295
1296 if(verbosity>7) {
1297 std::cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";std::cerr.flush();
1298 }
1299
1300 // Sum products matrix
1301 for(j=0; j<numVectors;j++){
1302 D[j]=new double[trackTable[track]];
1303 assert(D[j]);
1304
1305 }
1306
1307 // Matched filter matrix
1308 for(j=0; j<numVectors;j++){
1309 DD[j]=new double[trackTable[track]];
1310 assert(DD[j]);
1311 }
1312
1313 if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) {
1314 if(data_buffer) {
1315 free(data_buffer);
1316 }
1317 {
1318 data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim;
1319 void *tmp = malloc(data_buffer_size);
1320 if (tmp == NULL) {
1321 error("error allocating data buffer");
1322 }
1323 data_buffer = (double *) tmp;
1324 }
1325 }
1326
1327 read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim);
1328
1329 // Dot product
1330 for(j=0; j<numVectors; j++)
1331 for(k=0; k<trackTable[track]; k++){
1332 qp=query+j*dbH->dim;
1333 sp=data_buffer+k*dbH->dim;
1334 DD[j][k]=0.0; // Initialize matched filter array
1335 dp=&D[j][k]; // point to correlation cell j,k
1336 *dp=0.0; // initialize correlation cell
1337 l=dbH->dim; // size of vectors
1338 while(l--)
1339 *dp+=*qp++**sp++;
1340 }
1341
1342 // Matched Filter
1343 // HOP SIZE == 1
1344 double* spd;
1345 if(HOP_SIZE==1){ // HOP_SIZE = shingleHop
1346 for(w=0; w<wL; w++)
1347 for(j=0; j<numVectors-w; j++){
1348 sp=DD[j];
1349 spd=D[j+w]+w;
1350 k=trackTable[track]-w;
1351 while(k--)
1352 *sp+++=*spd++;
1353 }
1354 }
1355
1356 else{ // HOP_SIZE != 1
1357 for(w=0; w<wL; w++)
1358 for(j=0; j<numVectors-w; j+=HOP_SIZE){
1359 sp=DD[j];
1360 spd=D[j+w]+w;
1361 for(k=0; k<trackTable[track]-w; k+=HOP_SIZE){
1362 *sp+=*spd;
1363 sp+=HOP_SIZE;
1364 spd+=HOP_SIZE;
1365 }
1366 }
1367 }
1368
1369 if(verbosity>3 && usingTimes) {
1370 std::cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << std::endl;
1371 std::cerr.flush();
1372 }
1373
1374 if(!usingTimes ||
1375 (usingTimes
1376 && fabs(meanDBdur[track]-meanQdur)<meanQdur*timesTol)){
1377
1378 if(verbosity>3 && usingTimes) {
1379 std::cerr << "within duration tolerance." << std::endl;
1380 std::cerr.flush();
1381 }
1382
1383 // Search for minimum distance by shingles (concatenated vectors)
1384 for(j=0;j<=numVectors-wL;j+=HOP_SIZE)
1385 for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){
1386 thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k];
1387 if(verbosity>9) {
1388 std::cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << std::endl;
1389 }
1390 // Gather chi^2 statistics
1391 if(thisDist<minSample)
1392 minSample=thisDist;
1393 else if(thisDist>maxSample)
1394 maxSample=thisDist;
1395 if(thisDist>1e-9){
1396 sampleCount++;
1397 sampleSum+=thisDist;
1398 logSampleSum+=log(thisDist);
1399 }
1400
1401 // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]);
1402 // Power test
1403 if (usingPower) {
1404 if (!(powers_acceptable(qpPtr[j], sPower[trackIndexOffset + k]))) {
1405 thisDist = 1000000.0;
1406 }
1407 }
1408
1409 if(thisDist>=0 && thisDist<=radius){
1410 distances[0]++; // increment count
1411 break; // only need one track point per query point
1412 }
1413 }
1414 // How many points were below threshold ?
1415 thisDist=distances[0];
1416
1417 // Let's see the distances then...
1418 if(verbosity>3) {
1419 std::cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << std::endl;
1420 }
1421
1422 // All the track stuff goes here
1423 n=trackNN;
1424 while(n--){
1425 if(thisDist>trackDistances[n]){
1426 if((n==0 || thisDist<=trackDistances[n-1])){
1427 // Copy all values above up the queue
1428 for( l=trackNN-1 ; l > n ; l--){
1429 trackDistances[l]=trackDistances[l-1];
1430 trackQIndexes[l]=trackQIndexes[l-1];
1431 trackSIndexes[l]=trackSIndexes[l-1];
1432 trackIDs[l]=trackIDs[l-1];
1433 }
1434 trackDistances[n]=thisDist;
1435 trackQIndexes[n]=qIndexes[0];
1436 trackSIndexes[n]=sIndexes[0];
1437 successfulTracks++;
1438 trackIDs[n]=track;
1439 break;
1440 }
1441 }
1442 else
1443 break;
1444 }
1445 } // Duration match
1446
1447 // Clean up current track
1448 if(D!=NULL){
1449 for(j=0; j<numVectors; j++)
1450 delete[] D[j];
1451 }
1452
1453 if(DD!=NULL){
1454 for(j=0; j<numVectors; j++)
1455 delete[] DD[j];
1456 }
1457 }
1458 // per-track reset array values
1459 for(unsigned k=0; k<pointNN; k++){
1460 distances[k]=0.0;
1461 qIndexes[k]=~0;
1462 sIndexes[k]=~0;
1463 }
1464 }
1465
1466 free(data_buffer);
1467
1468 gettimeofday(&tv2,NULL);
1469 if(verbosity>1) {
1470 std::cerr << std::endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:"
1471 << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << std::endl;
1472 std::cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum
1473 << " minSample: " << minSample << " maxSample: " << maxSample << std::endl;
1474 }
1475
1476 if(adbQueryResponse==0){
1477 if(verbosity>1) {
1478 std::cerr<<std::endl;
1479 }
1480 // Output answer
1481 // Loop over nearest neighbours
1482 for(k=0; k < std::min(trackNN,successfulTracks); k++)
1483 std::cout << fileTable+trackIDs[k]*O2_FILETABLESIZE << " " << trackDistances[k] << std::endl;
1484 }
1485 else{ // Process Web Services Query
1486 int listLen = std::min(trackNN, processedTracks);
1487 adbQueryResponse->result.__sizeRlist=listLen;
1488 adbQueryResponse->result.__sizeDist=listLen;
1489 adbQueryResponse->result.__sizeQpos=listLen;
1490 adbQueryResponse->result.__sizeSpos=listLen;
1491 adbQueryResponse->result.Rlist= new char*[listLen];
1492 adbQueryResponse->result.Dist = new double[listLen];
1493 adbQueryResponse->result.Qpos = new unsigned int[listLen];
1494 adbQueryResponse->result.Spos = new unsigned int[listLen];
1495 for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){
1496 adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR];
1497 adbQueryResponse->result.Dist[k]=trackDistances[k];
1498 adbQueryResponse->result.Qpos[k]=trackQIndexes[k];
1499 adbQueryResponse->result.Spos[k]=trackSIndexes[k];
1500 sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
1501 }
1502 }
1503
1504 // Clean up
1505 if(trackOffsetTable)
1506 delete[] trackOffsetTable;
1507 if(queryCopy)
1508 delete[] queryCopy;
1509 if(qNorm)
1510 delete[] qNorm;
1511 if(sNorm)
1512 delete[] sNorm;
1513 if(qPower)
1514 delete[] qPower;
1515 if(sPower)
1516 delete[] sPower;
1517 if(D)
1518 delete[] D;
1519 if(DD)
1520 delete[] DD;
1521 if(timesdata)
1522 delete[] timesdata;
1523 if(querydurs)
1524 delete[] querydurs;
1525 if(meanDBdur)
1526 delete[] meanDBdur;
1527 }
1528
1529 // Unit norm block of features
1530 void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){
1531 unsigned d;
1532 double L2, *p;
1533 if(verbosity>2) {
1534 std::cerr << "norming " << n << " vectors...";std::cerr.flush();
1535 }
1536 while(n--){
1537 p=X;
1538 L2=0.0;
1539 d=dim;
1540 while(d--){
1541 L2+=*p**p;
1542 p++;
1543 }
1544 /* L2=sqrt(L2);*/
1545 if(qNorm)
1546 *qNorm++=L2;
1547 /*
1548 oneOverL2 = 1.0/L2;
1549 d=dim;
1550 while(d--){
1551 *X*=oneOverL2;
1552 X++;
1553 */
1554 X+=dim;
1555 }
1556 if(verbosity>2) {
1557 std::cerr << "done..." << std::endl;
1558 }
1559 }