comparison audioDB.cpp @ 123:8d0942525fab audiodb-debian

Merge trunk changes -r129:142 to audiodb-debian branch (+ new debian/changelog version)
author mas01cr
date Wed, 17 Oct 2007 14:14:02 +0000
parents ae045842d29f
children b9f1c375f28a
comparison
equal deleted inserted replaced
112:ae045842d29f 123:8d0942525fab
104 throw(err); 104 throw(err);
105 } 105 }
106 } 106 }
107 107
108 void audioDB::cleanup() { 108 void audioDB::cleanup() {
109 cmdline_parser_free(&args_info);
109 if(indata) 110 if(indata)
110 munmap(indata,statbuf.st_size); 111 munmap(indata,statbuf.st_size);
111 if(db) 112 if(db)
112 munmap(db,O2_DEFAULTDBSIZE); 113 munmap(db,O2_DEFAULTDBSIZE);
113 if(dbfid>0) 114 if(dbfid>0)
312 return 0; 313 return 0;
313 } 314 }
314 return -1; // no command found 315 return -1; // no command found
315 } 316 }
316 317
317 /* Make a new database
318
319 The database consists of:
320
321 header
322 ---------------------------------------------------------------------------------
323 | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes |
324 ---------------------------------------------------------------------------------
325
326
327 keyTable : list of keys of tracks
328 --------------------------------------------------------------------------
329 | key 256 bytes |
330 --------------------------------------------------------------------------
331 O2_MAXFILES*O2_FILENAMELENGTH
332
333 trackTable : Maps implicit feature index to a feature vector matrix
334 --------------------------------------------------------------------------
335 | numVectors (4 bytes) |
336 --------------------------------------------------------------------------
337 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT)
338
339 featureTable
340 --------------------------------------------------------------------------
341 | v1 v2 v3 ... vd (double) |
342 --------------------------------------------------------------------------
343 O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE)
344
345 timesTable
346 --------------------------------------------------------------------------
347 | timestamp (double) |
348 --------------------------------------------------------------------------
349 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE)
350
351 l2normTable
352 --------------------------------------------------------------------------
353 | nm (double) |
354 --------------------------------------------------------------------------
355 O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE)
356
357 */
358
359 void audioDB::get_lock(int fd, bool exclusive) { 318 void audioDB::get_lock(int fd, bool exclusive) {
360 struct flock lock; 319 struct flock lock;
361 int status; 320 int status;
362 321
363 lock.l_type = exclusive ? F_WRLCK : F_RDLCK; 322 lock.l_type = exclusive ? F_WRLCK : F_RDLCK;
393 352
394 if (status) 353 if (status)
395 error("fcntl unlock error", "", "fcntl"); 354 error("fcntl unlock error", "", "fcntl");
396 } 355 }
397 356
357 /* Make a new database.
358
359 The database consists of:
360
361 * a header (see dbTableHeader struct definition);
362 * keyTable: list of keys of tracks;
363 * trackTable: Maps implicit feature index to a feature vector
364 matrix (sizes of tracks)
365 * featureTable: Lots of doubles;
366 * timesTable: time points for each feature vector;
367 * l2normTable: squared l2norms for each feature vector.
368 */
398 void audioDB::create(const char* dbName){ 369 void audioDB::create(const char* dbName){
399 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) 370 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
400 error("Can't create database file", dbName, "open"); 371 error("Can't create database file", dbName, "open");
401 get_lock(dbfid, 1); 372 get_lock(dbfid, 1);
402 373
441 412
442 void audioDB::drop(){ 413 void audioDB::drop(){
443 // FIXME: drop something? Should we even allow this? 414 // FIXME: drop something? Should we even allow this?
444 } 415 }
445 416
446 // initTables - memory map files passed as arguments 417 void audioDB::initDBHeader(const char* dbName, bool forWrite) {
447 // Precondition: database has already been created
448 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
449 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { 418 if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) {
450 error("Can't open database file", dbName, "open"); 419 error("Can't open database file", dbName, "open");
451 } 420 }
421
452 get_lock(dbfid, forWrite); 422 get_lock(dbfid, forWrite);
453
454 // open the input file
455 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) {
456 error("can't open input file for reading", inFile, "open");
457 }
458 // find size of input file
459 if (inFile && fstat(infid, &statbuf) < 0) {
460 error("fstat error finding size of input", inFile, "fstat");
461 }
462 // Get the database header info 423 // Get the database header info
463 dbH = new dbTableHeaderT(); 424 dbH = new dbTableHeaderT();
464 assert(dbH); 425 assert(dbH);
465 426
466 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { 427 if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) {
480 441
481 if(dbH->version != O2_FORMAT_VERSION) { 442 if(dbH->version != O2_FORMAT_VERSION) {
482 error("database file has incorect version", dbName); 443 error("database file has incorect version", dbName);
483 } 444 }
484 445
446 // mmap the database file
447 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
448 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
449 error("mmap error for initting tables of database", "", "mmap");
450
451 // Make some handy tables with correct types
452 fileTable = (char *) (db + dbH->fileTableOffset);
453 trackTable = (unsigned *) (db + dbH->trackTableOffset);
454 dataBuf = (double *) (db + dbH->dataOffset);
455 l2normTable = (double *) (db + dbH->l2normTableOffset);
456 timesTable = (double *) (db + dbH->timesTableOffset);
457 }
458
459 void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile = 0) {
460
461 initDBHeader(dbName, forWrite);
462
463 // open the input file
464 if (inFile && (infid = open(inFile, O_RDONLY)) < 0) {
465 error("can't open input file for reading", inFile, "open");
466 }
467 // find size of input file
468 if (inFile && fstat(infid, &statbuf) < 0) {
469 error("fstat error finding size of input", inFile, "fstat");
470 }
485 if(inFile) 471 if(inFile)
486 if(dbH->dim == 0 && dbH->length == 0) // empty database 472 if(dbH->dim == 0 && dbH->length == 0) // empty database
487 // initialize with input dimensionality 473 // initialize with input dimensionality
488 read(infid, &dbH->dim, sizeof(unsigned)); 474 read(infid, &dbH->dim, sizeof(unsigned));
489 else { 475 else {
497 483
498 // mmap the input file 484 // mmap the input file
499 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0)) 485 if (inFile && (indata = (char*)mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, infid, 0))
500 == (caddr_t) -1) 486 == (caddr_t) -1)
501 error("mmap error for input", inFile, "mmap"); 487 error("mmap error for input", inFile, "mmap");
502
503 // mmap the database file
504 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | (forWrite ? PROT_WRITE : 0),
505 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
506 error("mmap error for initting tables of database", "", "mmap");
507
508 // Make some handy tables with correct types
509 fileTable= (char*)(db+dbH->fileTableOffset);
510 trackTable = (unsigned*)(db+dbH->trackTableOffset);
511 dataBuf = (double*)(db+dbH->dataOffset);
512 l2normTable = (double*)(db+dbH->l2normTableOffset);
513 timesTable = (double*)(db+dbH->timesTableOffset);
514 } 488 }
515 489
516 void audioDB::insert(const char* dbName, const char* inFile){ 490 void audioDB::insert(const char* dbName, const char* inFile){
517 491
518 initTables(dbName, 1, inFile); 492 initTables(dbName, 1, inFile);
647 } 621 }
648 } 622 }
649 } 623 }
650 } 624 }
651 625
652 void audioDB::batchinsert(const char* dbName, const char* inFile){ 626 void audioDB::batchinsert(const char* dbName, const char* inFile) {
653 627
654 if ((dbfid = open (dbName, O_RDWR)) < 0) 628 initDBHeader(dbName, true);
655 error("Can't open database file", dbName, "open");
656 get_lock(dbfid, 1);
657 629
658 if(!key) 630 if(!key)
659 key=inFile; 631 key=inFile;
660 ifstream *filesIn = 0; 632 ifstream *filesIn = 0;
661 ifstream *keysIn = 0; 633 ifstream *keysIn = 0;
665 error("Could not open batch in file", inFile); 637 error("Could not open batch in file", inFile);
666 if(key && key!=inFile) 638 if(key && key!=inFile)
667 if(!(keysIn = new ifstream(key))) 639 if(!(keysIn = new ifstream(key)))
668 error("Could not open batch key file",key); 640 error("Could not open batch key file",key);
669 641
670 // Get the database header info
671 dbH = new dbTableHeaderT();
672 assert(dbH);
673
674 if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT))
675 error("error reading db header");
676
677 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) 642 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
678 error("Must use timestamps with timestamped database","use --times"); 643 error("Must use timestamps with timestamped database","use --times");
679 644
680 if(dbH->magic!=O2_MAGIC){
681 cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl;
682 error("database file has incorrect header",dbName);
683 }
684
685 unsigned totalVectors=0; 645 unsigned totalVectors=0;
686 char *thisKey = new char[MAXSTR]; 646 char *thisKey = new char[MAXSTR];
687 char *thisFile = new char[MAXSTR]; 647 char *thisFile = new char[MAXSTR];
688 char *thisTimesFileName = new char[MAXSTR]; 648 char *thisTimesFileName = new char[MAXSTR];
689 649
690 do{ 650 do{
691 filesIn->getline(thisFile,MAXSTR); 651 filesIn->getline(thisFile,MAXSTR);
692 if(key && key!=inFile) 652 if(key && key!=inFile)
693 keysIn->getline(thisKey,MAXSTR); 653 keysIn->getline(thisKey,MAXSTR);
694 else 654 else
704 error("can't open feature file for reading", thisFile, "open"); 664 error("can't open feature file for reading", thisFile, "open");
705 665
706 // find size of input file 666 // find size of input file
707 if (thisFile && fstat (infid,&statbuf) < 0) 667 if (thisFile && fstat (infid,&statbuf) < 0)
708 error("fstat error finding size of input", "", "fstat"); 668 error("fstat error finding size of input", "", "fstat");
709
710 // mmap the database file
711 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
712 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
713 error("mmap error for batchinsert into database", "", "mmap");
714
715 // Make some handy tables with correct types
716 fileTable= (char*)(db+dbH->fileTableOffset);
717 trackTable = (unsigned*)(db+dbH->trackTableOffset);
718 dataBuf = (double*)(db+dbH->dataOffset);
719 l2normTable = (double*)(db+dbH->l2normTableOffset);
720 timesTable = (double*)(db+dbH->timesTableOffset);
721 669
722 // Check that there is room for at least 1 more file 670 // Check that there is room for at least 1 more file
723 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) 671 if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int))))
724 error("No more room in database","insert failed: reason database is full."); 672 error("No more room in database","insert failed: reason database is full.");
725 673
807 } 755 }
808 } 756 }
809 // CLEAN UP 757 // CLEAN UP
810 munmap(indata,statbuf.st_size); 758 munmap(indata,statbuf.st_size);
811 close(infid); 759 close(infid);
812 munmap(db,O2_DEFAULTDBSIZE);
813 }while(!filesIn->eof()); 760 }while(!filesIn->eof());
814 761
815 // mmap the database file
816 if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE,
817 MAP_SHARED, dbfid, 0)) == (caddr_t) -1)
818 error("mmap error for creating database", "", "mmap");
819
820 if(verbosity) { 762 if(verbosity) {
821 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " 763 cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
822 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; 764 << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl;
823 } 765 }
824 766
825 // Report status 767 // Report status
826 status(dbName); 768 status(dbName);
827
828 munmap(db,O2_DEFAULTDBSIZE);
829 } 769 }
830 770
831 // FIXME: this can't propagate the sequence length argument (used for 771 // FIXME: this can't propagate the sequence length argument (used for
832 // dudCount). See adb__status() definition for the other half of 772 // dudCount). See adb__status() definition for the other half of
833 // this. -- CSR, 2007-10-01 773 // this. -- CSR, 2007-10-01
1889 // Clean up 1829 // Clean up
1890 if(trackOffsetTable) 1830 if(trackOffsetTable)
1891 delete[] trackOffsetTable; 1831 delete[] trackOffsetTable;
1892 if(queryCopy) 1832 if(queryCopy)
1893 delete[] queryCopy; 1833 delete[] queryCopy;
1894 //if(qNorm) 1834 if(qNorm)
1895 //delete qNorm; 1835 delete[] qNorm;
1836 if(sNorm)
1837 delete[] sNorm;
1838 if(sMeanL2)
1839 delete[] sMeanL2;
1896 if(D) 1840 if(D)
1897 delete[] D; 1841 delete[] D;
1898 if(DD) 1842 if(DD)
1899 delete[] DD; 1843 delete[] DD;
1900 if(timesdata) 1844 if(timesdata)
2359 adbQueryResult->Spos[k]=trackSIndexes[k]; 2303 adbQueryResult->Spos[k]=trackSIndexes[k];
2360 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); 2304 sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE);
2361 } 2305 }
2362 } 2306 }
2363 2307
2364
2365 // Clean up 2308 // Clean up
2366 if(trackOffsetTable) 2309 if(trackOffsetTable)
2367 delete[] trackOffsetTable; 2310 delete[] trackOffsetTable;
2368 if(queryCopy) 2311 if(queryCopy)
2369 delete[] queryCopy; 2312 delete[] queryCopy;
2370 //if(qNorm) 2313 if(qNorm)
2371 //delete qNorm; 2314 delete[] qNorm;
2315 if(sNorm)
2316 delete[] sNorm;
2317 if(sMeanL2)
2318 delete[] sMeanL2;
2372 if(D) 2319 if(D)
2373 delete[] D; 2320 delete[] D;
2374 if(DD) 2321 if(DD)
2375 delete[] DD; 2322 delete[] DD;
2376 if(timesdata) 2323 if(timesdata)