comparison audioDB.cpp @ 177:c32bf13c3978 no-big-mmap

forWrite audioDB flag (rather than function argument) so that we can tell when it comes to munmap() how much we need to. ALIGN_PAGE_FOO() takes one argument not two.
author mas01cr
date Thu, 15 Nov 2007 14:58:16 +0000
parents 8193dbd66e34
children 7bbe5d48a7ef
comparison
equal deleted inserted replaced
176:8193dbd66e34 177:c32bf13c3978
404 dbH->version = O2_FORMAT_VERSION; 404 dbH->version = O2_FORMAT_VERSION;
405 dbH->numFiles = 0; 405 dbH->numFiles = 0;
406 dbH->dim = 0; 406 dbH->dim = 0;
407 dbH->flags = 0; 407 dbH->flags = 0;
408 dbH->length = 0; 408 dbH->length = 0;
409 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE, 8); 409 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE);
410 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8); 410 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles);
411 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8); 411 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles);
412 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); 412 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
413 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); 413 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double));
414 dbH->dbSize = size; 414 dbH->dbSize = size;
415 415
416 write(dbfid, dbH, O2_HEADERSIZE); 416 write(dbfid, dbH, O2_HEADERSIZE);
417 417
418 // go to the location corresponding to the last byte 418 // go to the location corresponding to the last byte
430 430
431 void audioDB::drop(){ 431 void audioDB::drop(){
432 // FIXME: drop something? Should we even allow this? 432 // FIXME: drop something? Should we even allow this?
433 } 433 }
434 434
435 void audioDB::initDBHeader(const char* dbName, bool forWrite) { 435 void audioDB::initDBHeader(const char* dbName) {
436 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { 436 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
437 error("Can't open database file", dbName, "open"); 437 error("Can't open database file", dbName, "open");
438 } 438 }
439 439
440 get_lock(dbfid, forWrite); 440 get_lock(dbfid, forWrite);
475 var = (type) tmp; \ 475 var = (type) tmp; \
476 } 476 }
477 477
478 CHECKED_MMAP(char *, db, 0, getpagesize()); 478 CHECKED_MMAP(char *, db, 0, getpagesize());
479 479
480 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->trackTableOffset); 480 if(forWrite) {
481 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->dataOffset); 481 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->trackTableOffset);
482 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->timesTableOffset); 482
483 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->l2normTableOffset); 483 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->dataOffset);
484 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->dbSize); 484 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->timesTableOffset);
485 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->l2normTableOffset);
486 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->dbSize);
487 } else {
488 if(dbH->length > 0) {
489 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->fileTableOffset + ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE));
490 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->trackTableOffset + ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE));
491 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->dataOffset + ALIGN_PAGE_UP(dbH->length));
492 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->timesTableOffset + ALIGN_PAGE_UP(dbH->length / dbH->dim));
493 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->l2normTableOffset + ALIGN_PAGE_UP(dbH->length / dbH->dim));
494 }
495 }
485 } 496 }
486 497
487 void audioDB::initInputFile (const char *inFile) { 498 void audioDB::initInputFile (const char *inFile) {
488 if (inFile) { 499 if (inFile) {
489 if ((infid = open(inFile, O_RDONLY)) < 0) { 500 if ((infid = open(inFile, O_RDONLY)) < 0) {
520 error("mmap error for input", inFile, "mmap"); 531 error("mmap error for input", inFile, "mmap");
521 } 532 }
522 } 533 }
523 } 534 }
524 535
525 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { 536 void audioDB::initTables(const char* dbName, const char* inFile = 0) {
526 537 initDBHeader(dbName);
527 initDBHeader(dbName, forWrite);
528 initInputFile(inFile); 538 initInputFile(inFile);
529 } 539 }
530 540
531 void audioDB::insert(const char* dbName, const char* inFile){ 541 void audioDB::insert(const char* dbName, const char* inFile) {
532 542 forWrite = true;
533 initTables(dbName, 1, inFile); 543 initTables(dbName, inFile);
534 544
535 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 545 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
536 error("Must use timestamps with timestamped database","use --times"); 546 error("Must use timestamps with timestamped database","use --times");
537 547
538 // Check that there is room for at least 1 more file 548 // Check that there is room for at least 1 more file
663 } 673 }
664 } 674 }
665 675
666 void audioDB::batchinsert(const char* dbName, const char* inFile) { 676 void audioDB::batchinsert(const char* dbName, const char* inFile) {
667 677
668 initDBHeader(dbName, true); 678 forWrite = true;
679 initDBHeader(dbName);
669 680
670 if(!key) 681 if(!key)
671 key=inFile; 682 key=inFile;
672 ifstream *filesIn = 0; 683 ifstream *filesIn = 0;
673 ifstream *keysIn = 0; 684 ifstream *keysIn = 0;
832 } 843 }
833 844
834 845
835 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ 846 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
836 if(!dbH) 847 if(!dbH)
837 initTables(dbName, 0, 0); 848 initTables(dbName, 0);
838 849
839 unsigned dudCount=0; 850 unsigned dudCount=0;
840 unsigned nullCount=0; 851 unsigned nullCount=0;
841 for(unsigned k=0; k<dbH->numFiles; k++){ 852 for(unsigned k=0; k<dbH->numFiles; k++){
842 if(trackTable[k]<sequenceLength){ 853 if(trackTable[k]<sequenceLength){
871 } 882 }
872 } 883 }
873 884
874 void audioDB::dump(const char* dbName){ 885 void audioDB::dump(const char* dbName){
875 if(!dbH) { 886 if(!dbH) {
876 initTables(dbName, 0, 0); 887 initTables(dbName, 0);
877 } 888 }
878 889
879 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { 890 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
880 error("error making output directory", output, "mkdir"); 891 error("error making output directory", output, "mkdir");
881 } 892 }
984 delete[] fName; 995 delete[] fName;
985 996
986 status(dbName); 997 status(dbName);
987 } 998 }
988 999
989 void audioDB::l2norm(const char* dbName){ 1000 void audioDB::l2norm(const char* dbName) {
990 initTables(dbName, true, 0); 1001 forWrite = true;
1002 initTables(dbName, 0);
991 if(dbH->length>0){ 1003 if(dbH->length>0){
992 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); 1004 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
993 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append 1005 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
994 } 1006 }
995 // Update database flags 1007 // Update database flags
1027 error("Key not found",key); 1039 error("Key not found",key);
1028 return O2_ERR_KEYNOTFOUND; 1040 return O2_ERR_KEYNOTFOUND;
1029 } 1041 }
1030 1042
1031 // Basic point query engine 1043 // Basic point query engine
1032 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ 1044 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
1033 1045
1034 initTables(dbName, 0, inFile); 1046 initTables(dbName, inFile);
1035 1047
1036 // For each input vector, find the closest pointNN matching output vectors and report 1048 // For each input vector, find the closest pointNN matching output vectors and report
1037 // we use stdout in this stub version 1049 // we use stdout in this stub version
1038 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1050 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1039 1051
1210 } 1222 }
1211 1223
1212 // trackPointQuery 1224 // trackPointQuery
1213 // return the trackNN closest tracks to the query track 1225 // return the trackNN closest tracks to the query track
1214 // uses average of pointNN points per track 1226 // uses average of pointNN points per track
1215 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ 1227 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) {
1216 initTables(dbName, 0, inFile); 1228 initTables(dbName, inFile);
1217 1229
1218 // For each input vector, find the closest pointNN matching output vectors and report 1230 // For each input vector, find the closest pointNN matching output vectors and report
1219 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1231 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1220 double* query = (double*)(indata+sizeof(int)); 1232 double* query = (double*)(indata+sizeof(int));
1221 double* data = dataBuf; 1233 double* data = dataBuf;
1466 // efficient implementation based on matched filter 1478 // efficient implementation based on matched filter
1467 // assumes normed shingles 1479 // assumes normed shingles
1468 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track 1480 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track
1469 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ 1481 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
1470 1482
1471 initTables(dbName, 0, inFile); 1483 initTables(dbName, inFile);
1472 1484
1473 // For each input vector, find the closest pointNN matching output vectors and report 1485 // For each input vector, find the closest pointNN matching output vectors and report
1474 // we use stdout in this stub version 1486 // we use stdout in this stub version
1475 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1487 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1476 double* query = (double*)(indata+sizeof(int)); 1488 double* query = (double*)(indata+sizeof(int));
1973 // efficient implementation based on matched filter 1985 // efficient implementation based on matched filter
1974 // assumes normed shingles 1986 // assumes normed shingles
1975 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track 1987 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track
1976 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ 1988 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){
1977 1989
1978 initTables(dbName, 0, inFile); 1990 initTables(dbName, inFile);
1979 1991
1980 // For each input vector, find the closest pointNN matching output vectors and report 1992 // For each input vector, find the closest pointNN matching output vectors and report
1981 // we use stdout in this stub version 1993 // we use stdout in this stub version
1982 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); 1994 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
1983 double* query = (double*)(indata+sizeof(int)); 1995 double* query = (double*)(indata+sizeof(int));