Mercurial > hg > audiodb
comparison audioDB.cpp @ 177:c32bf13c3978 no-big-mmap
forWrite audioDB flag (rather than function argument) so that we can
tell when it comes to munmap() how much we need to.
ALIGN_PAGE_FOO() takes one argument not two.
author | mas01cr |
---|---|
date | Thu, 15 Nov 2007 14:58:16 +0000 |
parents | 8193dbd66e34 |
children | 7bbe5d48a7ef |
comparison
equal
deleted
inserted
replaced
176:8193dbd66e34 | 177:c32bf13c3978 |
---|---|
404 dbH->version = O2_FORMAT_VERSION; | 404 dbH->version = O2_FORMAT_VERSION; |
405 dbH->numFiles = 0; | 405 dbH->numFiles = 0; |
406 dbH->dim = 0; | 406 dbH->dim = 0; |
407 dbH->flags = 0; | 407 dbH->flags = 0; |
408 dbH->length = 0; | 408 dbH->length = 0; |
409 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE, 8); | 409 dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); |
410 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles, 8); | 410 dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); |
411 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles, 8); | 411 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); |
412 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); | 412 dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); |
413 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double), 8); | 413 dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); |
414 dbH->dbSize = size; | 414 dbH->dbSize = size; |
415 | 415 |
416 write(dbfid, dbH, O2_HEADERSIZE); | 416 write(dbfid, dbH, O2_HEADERSIZE); |
417 | 417 |
418 // go to the location corresponding to the last byte | 418 // go to the location corresponding to the last byte |
430 | 430 |
431 void audioDB::drop(){ | 431 void audioDB::drop(){ |
432 // FIXME: drop something? Should we even allow this? | 432 // FIXME: drop something? Should we even allow this? |
433 } | 433 } |
434 | 434 |
435 void audioDB::initDBHeader(const char* dbName, bool forWrite) { | 435 void audioDB::initDBHeader(const char* dbName) { |
436 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { | 436 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { |
437 error("Can't open database file", dbName, "open"); | 437 error("Can't open database file", dbName, "open"); |
438 } | 438 } |
439 | 439 |
440 get_lock(dbfid, forWrite); | 440 get_lock(dbfid, forWrite); |
475 var = (type) tmp; \ | 475 var = (type) tmp; \ |
476 } | 476 } |
477 | 477 |
478 CHECKED_MMAP(char *, db, 0, getpagesize()); | 478 CHECKED_MMAP(char *, db, 0, getpagesize()); |
479 | 479 |
480 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->trackTableOffset); | 480 if(forWrite) { |
481 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->dataOffset); | 481 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->trackTableOffset); |
482 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->timesTableOffset); | 482 |
483 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->l2normTableOffset); | 483 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->dataOffset); |
484 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->dbSize); | 484 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->timesTableOffset); |
485 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->l2normTableOffset); | |
486 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->dbSize); | |
487 } else { | |
488 if(dbH->length > 0) { | |
489 CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, dbH->fileTableOffset + ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE)); | |
490 CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, dbH->trackTableOffset + ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE)); | |
491 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dbH->dataOffset + ALIGN_PAGE_UP(dbH->length)); | |
492 CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, dbH->timesTableOffset + ALIGN_PAGE_UP(dbH->length / dbH->dim)); | |
493 CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, dbH->l2normTableOffset + ALIGN_PAGE_UP(dbH->length / dbH->dim)); | |
494 } | |
495 } | |
485 } | 496 } |
486 | 497 |
487 void audioDB::initInputFile (const char *inFile) { | 498 void audioDB::initInputFile (const char *inFile) { |
488 if (inFile) { | 499 if (inFile) { |
489 if ((infid = open(inFile, O_RDONLY)) < 0) { | 500 if ((infid = open(inFile, O_RDONLY)) < 0) { |
520 error("mmap error for input", inFile, "mmap"); | 531 error("mmap error for input", inFile, "mmap"); |
521 } | 532 } |
522 } | 533 } |
523 } | 534 } |
524 | 535 |
525 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) { | 536 void audioDB::initTables(const char* dbName, const char* inFile = 0) { |
526 | 537 initDBHeader(dbName); |
527 initDBHeader(dbName, forWrite); | |
528 initInputFile(inFile); | 538 initInputFile(inFile); |
529 } | 539 } |
530 | 540 |
531 void audioDB::insert(const char* dbName, const char* inFile){ | 541 void audioDB::insert(const char* dbName, const char* inFile) { |
532 | 542 forWrite = true; |
533 initTables(dbName, 1, inFile); | 543 initTables(dbName, inFile); |
534 | 544 |
535 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) | 545 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) |
536 error("Must use timestamps with timestamped database","use --times"); | 546 error("Must use timestamps with timestamped database","use --times"); |
537 | 547 |
538 // Check that there is room for at least 1 more file | 548 // Check that there is room for at least 1 more file |
663 } | 673 } |
664 } | 674 } |
665 | 675 |
666 void audioDB::batchinsert(const char* dbName, const char* inFile) { | 676 void audioDB::batchinsert(const char* dbName, const char* inFile) { |
667 | 677 |
668 initDBHeader(dbName, true); | 678 forWrite = true; |
679 initDBHeader(dbName); | |
669 | 680 |
670 if(!key) | 681 if(!key) |
671 key=inFile; | 682 key=inFile; |
672 ifstream *filesIn = 0; | 683 ifstream *filesIn = 0; |
673 ifstream *keysIn = 0; | 684 ifstream *keysIn = 0; |
832 } | 843 } |
833 | 844 |
834 | 845 |
835 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ | 846 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ |
836 if(!dbH) | 847 if(!dbH) |
837 initTables(dbName, 0, 0); | 848 initTables(dbName, 0); |
838 | 849 |
839 unsigned dudCount=0; | 850 unsigned dudCount=0; |
840 unsigned nullCount=0; | 851 unsigned nullCount=0; |
841 for(unsigned k=0; k<dbH->numFiles; k++){ | 852 for(unsigned k=0; k<dbH->numFiles; k++){ |
842 if(trackTable[k]<sequenceLength){ | 853 if(trackTable[k]<sequenceLength){ |
871 } | 882 } |
872 } | 883 } |
873 | 884 |
874 void audioDB::dump(const char* dbName){ | 885 void audioDB::dump(const char* dbName){ |
875 if(!dbH) { | 886 if(!dbH) { |
876 initTables(dbName, 0, 0); | 887 initTables(dbName, 0); |
877 } | 888 } |
878 | 889 |
879 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { | 890 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { |
880 error("error making output directory", output, "mkdir"); | 891 error("error making output directory", output, "mkdir"); |
881 } | 892 } |
984 delete[] fName; | 995 delete[] fName; |
985 | 996 |
986 status(dbName); | 997 status(dbName); |
987 } | 998 } |
988 | 999 |
989 void audioDB::l2norm(const char* dbName){ | 1000 void audioDB::l2norm(const char* dbName) { |
990 initTables(dbName, true, 0); | 1001 forWrite = true; |
1002 initTables(dbName, 0); | |
991 if(dbH->length>0){ | 1003 if(dbH->length>0){ |
992 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); | 1004 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); |
993 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append | 1005 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append |
994 } | 1006 } |
995 // Update database flags | 1007 // Update database flags |
1027 error("Key not found",key); | 1039 error("Key not found",key); |
1028 return O2_ERR_KEYNOTFOUND; | 1040 return O2_ERR_KEYNOTFOUND; |
1029 } | 1041 } |
1030 | 1042 |
1031 // Basic point query engine | 1043 // Basic point query engine |
1032 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | 1044 void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { |
1033 | 1045 |
1034 initTables(dbName, 0, inFile); | 1046 initTables(dbName, inFile); |
1035 | 1047 |
1036 // For each input vector, find the closest pointNN matching output vectors and report | 1048 // For each input vector, find the closest pointNN matching output vectors and report |
1037 // we use stdout in this stub version | 1049 // we use stdout in this stub version |
1038 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1050 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1039 | 1051 |
1210 } | 1222 } |
1211 | 1223 |
1212 // trackPointQuery | 1224 // trackPointQuery |
1213 // return the trackNN closest tracks to the query track | 1225 // return the trackNN closest tracks to the query track |
1214 // uses average of pointNN points per track | 1226 // uses average of pointNN points per track |
1215 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | 1227 void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { |
1216 initTables(dbName, 0, inFile); | 1228 initTables(dbName, inFile); |
1217 | 1229 |
1218 // For each input vector, find the closest pointNN matching output vectors and report | 1230 // For each input vector, find the closest pointNN matching output vectors and report |
1219 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1231 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1220 double* query = (double*)(indata+sizeof(int)); | 1232 double* query = (double*)(indata+sizeof(int)); |
1221 double* data = dataBuf; | 1233 double* data = dataBuf; |
1466 // efficient implementation based on matched filter | 1478 // efficient implementation based on matched filter |
1467 // assumes normed shingles | 1479 // assumes normed shingles |
1468 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track | 1480 // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track |
1469 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | 1481 void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ |
1470 | 1482 |
1471 initTables(dbName, 0, inFile); | 1483 initTables(dbName, inFile); |
1472 | 1484 |
1473 // For each input vector, find the closest pointNN matching output vectors and report | 1485 // For each input vector, find the closest pointNN matching output vectors and report |
1474 // we use stdout in this stub version | 1486 // we use stdout in this stub version |
1475 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1487 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1476 double* query = (double*)(indata+sizeof(int)); | 1488 double* query = (double*)(indata+sizeof(int)); |
1973 // efficient implementation based on matched filter | 1985 // efficient implementation based on matched filter |
1974 // assumes normed shingles | 1986 // assumes normed shingles |
1975 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track | 1987 // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track |
1976 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ | 1988 void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ |
1977 | 1989 |
1978 initTables(dbName, 0, inFile); | 1990 initTables(dbName, inFile); |
1979 | 1991 |
1980 // For each input vector, find the closest pointNN matching output vectors and report | 1992 // For each input vector, find the closest pointNN matching output vectors and report |
1981 // we use stdout in this stub version | 1993 // we use stdout in this stub version |
1982 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); | 1994 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); |
1983 double* query = (double*)(indata+sizeof(int)); | 1995 double* query = (double*)(indata+sizeof(int)); |