mas01cr@0: #include "audioDB.h" mas01cr@0: mas01cr@105: #if defined(O2_DEBUG) mas01cr@104: void sigterm_action(int signal, siginfo_t *info, void *context) { mas01cr@104: exit(128+signal); mas01cr@104: } mas01cr@104: mas01cr@104: void sighup_action(int signal, siginfo_t *info, void *context) { mas01cr@104: // FIXME: reread any configuration files mas01cr@104: } mas01cr@105: #endif mas01cr@0: mas01cr@32: void audioDB::error(const char* a, const char* b, const char *sysFunc) { mas01cr@77: if(isServer) { mas01cr@81: /* FIXME: I think this is leaky -- we never delete err. actually mas01cr@81: deleting it is tricky, though; it gets placed into some mas01cr@81: soap-internal struct with uncertain extent... -- CSR, mas01cr@81: 2007-10-01 */ mas01cr@77: char *err = new char[256]; /* FIXME: overflows */ mas01cr@77: snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : ""); mas01cr@77: /* FIXME: actually we could usefully do with a properly structured mas01cr@77: type, so that we can throw separate faultstring and details. mas01cr@77: -- CSR, 2007-10-01 */ mas01cr@77: throw(err); mas01cr@77: } else { mas01cr@77: cerr << a << ": " << b << endl; mas01cr@77: if (sysFunc) { mas01cr@77: perror(sysFunc); mas01cr@77: } mas01cr@77: exit(1); mas01cr@32: } mas01cr@0: } mas01cr@0: mas01cr@76: audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@151: error("No command found"); mas01cr@0: } mas01cr@77: mas01cr@0: if(O2_ACTION(COM_SERVER)) mas01cr@0: startServer(); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01cr@0: if(isClient) mas01cr@0: ws_query(dbName, inFile, (char*)hostport); mas01cr@0: else mas01cr@76: query(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@133: audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01cr@151: isServer = 1; // FIXME: Hack mas01cr@97: processArgs(argc, argv); mas01cr@97: assert(O2_ACTION(COM_QUERY)); mas01cr@133: query(dbName, inFile, adbQueryResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@133: audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01cr@151: isServer = 1; // FIXME: Hack mas01cr@97: processArgs(argc, argv); mas01cr@97: assert(O2_ACTION(COM_STATUS)); mas01cr@133: status(dbName, adbStatusResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@97: void audioDB::cleanup() { mas01cr@122: cmdline_parser_free(&args_info); mas01cr@0: if(indata) mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: if(db) mas01cr@176: munmap(db,getpagesize()); mas01cr@175: if(fileTable) mas01cr@178: munmap(fileTable, fileTableLength); mas01cr@175: if(trackTable) mas01cr@178: munmap(trackTable, trackTableLength); mas01cr@175: if(dataBuf) mas01cr@178: munmap(dataBuf, dataBufLength); mas01cr@175: if(timesTable) mas01cr@185: munmap(timesTable, timesTableLength); mas01cr@175: if(l2normTable) mas01cr@178: munmap(l2normTable, l2normTableLength); mas01cr@175: mas01cr@0: if(dbfid>0) mas01cr@0: close(dbfid); mas01cr@0: if(infid>0) mas01cr@0: close(infid); mas01cr@0: if(dbH) mas01cr@0: delete dbH; mas01cr@0: } mas01cr@0: mas01cr@97: audioDB::~audioDB(){ mas01cr@97: cleanup(); mas01cr@97: } mas01cr@97: mas01cr@0: int audioDB::processArgs(const unsigned argc, char* const argv[]){ mas01cr@0: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if (cmdline_parser (argc, argv, &args_info) != 0) mas01cr@151: error("Error parsing command line"); mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@0: verbosity=args_info.verbosity_arg; mas01cr@0: if(verbosity<0 || verbosity>10){ mas01cr@0: cerr << "Warning: verbosity out of range, setting to 1" << endl; mas01cr@0: verbosity=1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@129: if(args_info.size_given) { mas01cr@192: if (args_info.size_arg < 50 || args_info.size_arg > 32000) { mas01cr@129: error("Size out of range", ""); mas01cr@129: } mas01cr@192: size = (off_t) args_info.size_arg * 1000000; mas01cr@129: } mas01cr@129: mas01mc@17: if(args_info.radius_given){ mas01mc@17: radius=args_info.radius_arg; mas01mc@17: if(radius<=0 || radius>1000000000){ mas01cr@77: error("radius out of range"); mas01mc@17: } mas01mc@17: else mas01cr@60: if(verbosity>3) { mas01mc@17: cerr << "Setting radius to " << radius << endl; mas01cr@60: } mas01mc@17: } mas01mc@17: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@151: isServer = 1; mas01cr@105: #if defined(O2_DEBUG) mas01cr@104: struct sigaction sa; mas01cr@104: sa.sa_sigaction = sigterm_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGTERM, &sa, NULL); mas01cr@104: sa.sa_sigaction = sighup_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGHUP, &sa, NULL); mas01cr@105: #endif mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@105: if(args_info.client_given){ mas01cr@105: command=COM_CLIENT; mas01cr@105: hostport=args_info.client_arg; mas01cr@105: isClient=1; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.NEW_given){ mas01cr@105: command=COM_CREATE; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.STATUS_given){ mas01cr@105: command=COM_STATUS; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.DUMP_given){ mas01cr@105: command=COM_DUMP; mas01cr@105: dbName=args_info.database_arg; mas01cr@131: output = args_info.output_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.L2NORM_given){ mas01cr@105: command=COM_L2NORM; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.INSERT_given){ mas01cr@105: command=COM_INSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.features_arg; mas01cr@105: if(args_info.key_given) mas01cr@105: key=args_info.key_arg; mas01cr@105: if(args_info.times_given){ mas01cr@105: timesFileName=args_info.times_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@105: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: mas01cr@105: if(args_info.BATCHINSERT_given){ mas01cr@105: command=COM_BATCHINSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.featureList_arg; mas01cr@105: if(args_info.keyList_given) mas01cr@105: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@0: mas01cr@105: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01mc@18: trackFileName=args_info.keyList_arg; mas01mc@18: if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) mas01mc@18: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@105: */ mas01cr@105: mas01cr@105: if(args_info.timesList_given){ mas01cr@105: timesFileName=args_info.timesList_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@105: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@105: error("Could not open timesList file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: mas01cr@105: // Query command and arguments mas01cr@105: if(args_info.QUERY_given){ mas01cr@105: command=COM_QUERY; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.features_arg; mas01cr@105: mas01cr@105: if(args_info.keyList_given){ mas01cr@105: trackFileName=args_info.keyList_arg; mas01cr@105: if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) mas01cr@105: error("Could not open keyList file for reading",trackFileName); mas01cr@105: } mas01cr@105: mas01cr@105: if(args_info.times_given){ mas01cr@105: timesFileName=args_info.times_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@105: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@105: mas01cr@105: // query type mas01cr@105: if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) mas01cr@105: queryType=O2_TRACK_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@105: queryType=O2_POINT_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@105: queryType=O2_SEQUENCE_QUERY; mas01cr@105: else mas01cr@105: error("unsupported query type",args_info.QUERY_arg); mas01cr@105: mas01cr@105: if(!args_info.exhaustive_flag){ mas01cr@105: queryPoint = args_info.qpoint_arg; mas01cr@105: usingQueryPoint=1; mas01cr@105: if(queryPoint<0 || queryPoint >10000) mas01cr@105: error("queryPoint out of range: 0 <= queryPoint <= 10000"); mas01cr@105: } mas01cr@105: mas01cr@105: pointNN = args_info.pointnn_arg; mas01cr@105: if(pointNN < 1 || pointNN > 1000) { mas01cr@105: error("pointNN out of range: 1 <= pointNN <= 1000"); mas01cr@105: } mas01cr@105: trackNN = args_info.resultlength_arg; mas01cr@105: if(trackNN < 1 || trackNN > 1000) { mas01cr@105: error("resultlength out of range: 1 <= resultlength <= 1000"); mas01cr@105: } mas01cr@105: sequenceLength = args_info.sequencelength_arg; mas01cr@105: if(sequenceLength < 1 || sequenceLength > 1000) { mas01cr@105: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@105: } mas01cr@105: sequenceHop = args_info.sequencehop_arg; mas01cr@105: if(sequenceHop < 1 || sequenceHop > 1000) { mas01cr@105: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01cr@105: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@30: void audioDB::get_lock(int fd, bool exclusive) { mas01cr@30: struct flock lock; mas01cr@30: int status; mas01cr@30: mas01cr@30: lock.l_type = exclusive ? F_WRLCK : F_RDLCK; mas01cr@30: lock.l_whence = SEEK_SET; mas01cr@30: lock.l_start = 0; mas01cr@30: lock.l_len = 0; /* "the whole file" */ mas01cr@30: mas01cr@30: retry: mas01cr@30: do { mas01cr@30: status = fcntl(fd, F_SETLKW, &lock); mas01cr@30: } while (status != 0 && errno == EINTR); mas01cr@30: mas01cr@30: if (status) { mas01cr@30: if (errno == EAGAIN) { mas01cr@30: sleep(1); mas01cr@30: goto retry; mas01cr@30: } else { mas01cr@32: error("fcntl lock error", "", "fcntl"); mas01cr@30: } mas01cr@30: } mas01cr@30: } mas01cr@30: mas01cr@30: void audioDB::release_lock(int fd) { mas01cr@30: struct flock lock; mas01cr@30: int status; mas01cr@30: mas01cr@30: lock.l_type = F_UNLCK; mas01cr@30: lock.l_whence = SEEK_SET; mas01cr@30: lock.l_start = 0; mas01cr@30: lock.l_len = 0; mas01cr@30: mas01cr@30: status = fcntl(fd, F_SETLKW, &lock); mas01cr@30: mas01cr@30: if (status) mas01cr@32: error("fcntl unlock error", "", "fcntl"); mas01cr@30: } mas01cr@30: mas01cr@120: /* Make a new database. mas01cr@120: mas01cr@120: The database consists of: mas01cr@120: mas01cr@120: * a header (see dbTableHeader struct definition); mas01cr@120: * keyTable: list of keys of tracks; mas01cr@120: * trackTable: Maps implicit feature index to a feature vector mas01cr@120: matrix (sizes of tracks) mas01cr@120: * featureTable: Lots of doubles; mas01cr@120: * timesTable: time points for each feature vector; mas01cr@120: * l2normTable: squared l2norms for each feature vector. mas01cr@120: */ mas01cr@0: void audioDB::create(const char* dbName){ mas01cr@31: if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) mas01cr@32: error("Can't create database file", dbName, "open"); mas01cr@30: get_lock(dbfid, 1); mas01cr@0: mas01cr@60: if(verbosity) { mas01cr@0: cerr << "header size:" << O2_HEADERSIZE << endl; mas01cr@60: } mas01cr@0: mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@140: unsigned int maxfiles = (unsigned int) rint((double) O2_MAXFILES * (double) size / (double) O2_DEFAULTDBSIZE); mas01cr@140: mas01cr@0: // Initialize header mas01cr@108: dbH->magic = O2_MAGIC; mas01cr@108: dbH->version = O2_FORMAT_VERSION; mas01cr@108: dbH->numFiles = 0; mas01cr@108: dbH->dim = 0; mas01cr@108: dbH->flags = 0; mas01cr@108: dbH->length = 0; mas01cr@177: dbH->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); mas01cr@177: dbH->trackTableOffset = ALIGN_PAGE_UP(dbH->fileTableOffset + O2_FILETABLESIZE*maxfiles); mas01cr@177: dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLESIZE*maxfiles); mas01cr@177: dbH->l2normTableOffset = ALIGN_PAGE_DOWN(size - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); mas01cr@177: dbH->timesTableOffset = ALIGN_PAGE_DOWN(dbH->l2normTableOffset - maxfiles*O2_MEANNUMVECTORS*sizeof(double)); mas01cr@129: dbH->dbSize = size; mas01cr@0: mas01cr@173: write(dbfid, dbH, O2_HEADERSIZE); mas01cr@173: mas01cr@173: // go to the location corresponding to the last byte mas01cr@173: if (lseek (dbfid, size - 1, SEEK_SET) == -1) mas01cr@173: error("lseek error in db file", "", "lseek"); mas01cr@173: mas01cr@173: // write a dummy byte at the last location mas01cr@173: if (write (dbfid, "", 1) != 1) mas01cr@173: error("write error", "", "write"); mas01cr@173: mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_CREATE << " " << dbName << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::drop(){ mas01cr@108: // FIXME: drop something? Should we even allow this? mas01cr@0: } mas01cr@0: mas01cr@177: void audioDB::initDBHeader(const char* dbName) { mas01cr@106: if ((dbfid = open(dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) { mas01cr@32: error("Can't open database file", dbName, "open"); mas01cr@106: } mas01cr@120: mas01cr@30: get_lock(dbfid, forWrite); mas01cr@0: // Get the database header info mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@106: if(read(dbfid, (char *) dbH, O2_HEADERSIZE) != O2_HEADERSIZE) { mas01cr@106: error("error reading db header", dbName, "read"); mas01cr@106: } mas01cr@0: mas01cr@108: if(dbH->magic == O2_OLD_MAGIC) { mas01cr@108: // FIXME: if anyone ever complains, write the program to convert mas01cr@108: // from the old audioDB format to the new... mas01cr@108: error("database file has old O2 header", dbName); mas01cr@108: } mas01cr@0: mas01cr@106: if(dbH->magic != O2_MAGIC) { mas01cr@106: cerr << "expected: " << O2_MAGIC << ", got: " << dbH->magic << endl; mas01cr@108: error("database file has incorrect header", dbName); mas01cr@108: } mas01cr@108: mas01cr@108: if(dbH->version != O2_FORMAT_VERSION) { mas01cr@108: error("database file has incorect version", dbName); mas01cr@0: } mas01cr@0: mas01cr@128: // FIXME: when changing file format version, remove this workaround. mas01cr@128: if(dbH->dbSize == 0) { mas01cr@128: dbH->dbSize = O2_DEFAULTDBSIZE; mas01cr@128: } mas01cr@128: mas01cr@120: // Make some handy tables with correct types mas01cr@178: #define CHECKED_MMAP(type, var, start, length) \ mas01cr@178: { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \ mas01cr@174: if(tmp == (void *) -1) { \ mas01cr@174: error("mmap error for db table", #var, "mmap"); \ mas01cr@174: } \ mas01cr@174: var = (type) tmp; \ mas01cr@174: } mas01cr@174: mas01cr@176: CHECKED_MMAP(char *, db, 0, getpagesize()); mas01cr@176: mas01cr@178: if(forWrite || (dbH->length > 0)) { mas01cr@178: if(forWrite) { mas01cr@178: fileTableLength = dbH->trackTableOffset - dbH->fileTableOffset; mas01cr@178: trackTableLength = dbH->dataOffset - dbH->trackTableOffset; mas01cr@178: dataBufLength = dbH->timesTableOffset - dbH->dataOffset; mas01cr@178: timesTableLength = dbH->l2normTableOffset - dbH->timesTableOffset; mas01cr@178: l2normTableLength = dbH->dbSize - dbH->l2normTableOffset; mas01cr@178: } else { mas01cr@178: fileTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_FILETABLESIZE); mas01cr@178: trackTableLength = ALIGN_PAGE_UP(dbH->numFiles * O2_TRACKTABLESIZE); mas01cr@178: dataBufLength = ALIGN_PAGE_UP(dbH->length); mas01cr@178: timesTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); mas01cr@178: l2normTableLength = ALIGN_PAGE_UP(dbH->length / dbH->dim); mas01cr@177: } mas01cr@178: CHECKED_MMAP(char *, fileTable, dbH->fileTableOffset, fileTableLength); mas01cr@178: CHECKED_MMAP(unsigned *, trackTable, dbH->trackTableOffset, trackTableLength); mas01cr@191: /* mas01cr@191: * No more mmap() for dataBuf mas01cr@191: * mas01cr@191: * FIXME: Actually we do do the mmap() in the two cases where it's mas01cr@191: * still "needed": in pointQuery and in l2norm if dbH->length is mas01cr@191: * non-zero. Removing those cases too (and deleting the dataBuf mas01cr@191: * variable completely) would be cool. -- CSR, 2007-11-19 mas01cr@191: * mas01cr@191: * CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); mas01cr@191: */ mas01cr@178: CHECKED_MMAP(double *, timesTable, dbH->timesTableOffset, timesTableLength); mas01cr@178: CHECKED_MMAP(double *, l2normTable, dbH->l2normTableOffset, l2normTableLength); mas01cr@177: } mas01cr@120: } mas01cr@120: mas01cr@169: void audioDB::initInputFile (const char *inFile) { mas01cr@169: if (inFile) { mas01cr@169: if ((infid = open(inFile, O_RDONLY)) < 0) { mas01cr@169: error("can't open input file for reading", inFile, "open"); mas01cr@169: } mas01cr@169: mas01cr@169: if (fstat(infid, &statbuf) < 0) { mas01cr@169: error("fstat error finding size of input", inFile, "fstat"); mas01cr@169: } mas01cr@169: mas01cr@169: if(dbH->dim == 0 && dbH->length == 0) { // empty database mas01cr@169: // initialize with input dimensionality mas01cr@169: if(read(infid, &dbH->dim, sizeof(unsigned)) != sizeof(unsigned)) { mas01cr@169: error("short read of input file", inFile); mas01cr@169: } mas01cr@169: if(dbH->dim == 0) { mas01cr@169: error("dimensionality of zero in input file", inFile); mas01cr@169: } mas01cr@169: } else { mas01cr@169: unsigned test; mas01cr@169: if(read(infid, &test, sizeof(unsigned)) != sizeof(unsigned)) { mas01cr@169: error("short read of input file", inFile); mas01cr@169: } mas01cr@169: if(dbH->dim == 0) { mas01cr@169: error("dimensionality of zero in input file", inFile); mas01cr@169: } mas01cr@169: if(dbH->dim != test) { mas01cr@169: cerr << "error: expected dimension: " << dbH->dim << ", got : " << test <timesTableOffset > dbH->dataOffset + dbH->length + size); mas01cr@180: } mas01cr@180: mas01cr@181: void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) { mas01cr@181: lseek(dbfid, dbH->dataOffset + offset, SEEK_SET); mas01cr@181: write(dbfid, buffer, size); mas01cr@181: } mas01cr@181: mas01cr@177: void audioDB::insert(const char* dbName, const char* inFile) { mas01cr@177: forWrite = true; mas01cr@177: initTables(dbName, inFile); mas01cr@0: mas01cr@0: if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@180: if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { mas01cr@169: error("Insert failed: no more room in database", inFile); mas01cr@180: } mas01cr@180: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: // Linear scan of filenames check for pre-existing feature mas01cr@0: unsigned alreadyInserted=0; mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: key already exists in database, ignoring: " <dim); mas01cr@0: if(!numVectors){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; mas01cr@60: } mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@128: munmap(db,dbH->dbSize); mas01cr@0: close(infid); mas01cr@0: return; mas01cr@0: } mas01cr@0: mas01cr@0: strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); mas01cr@0: mas01cr@192: off_t insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Check times status and insert times from file mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@187: mas01cr@188: if(timesoffset + numVectors > timesTableLength) { mas01cr@188: error("out of space for times", key); mas01cr@188: } mas01cr@188: mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: mas01cr@0: // Increment file count mas01cr@0: dbH->numFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: mas01mc@18: // Update track to file index map mas01mc@18: memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@181: insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); mas01cr@0: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@190: unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " mas01cr@0: << (statbuf.st_size-sizeof(int)) << " bytes." << endl; mas01cr@60: } mas01cr@0: mas01cr@176: // Copy the header back to the database mas01cr@176: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@176: mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ mas01cr@0: unsigned numtimes=0; mas01cr@0: if(usingTimes){ mas01cr@0: if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) mas01cr@0: dbH->flags=dbH->flags|O2_FLAG_TIMES; mas01cr@0: else if(!(dbH->flags&O2_FLAG_TIMES)){ mas01cr@0: cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: if(!timesFile->is_open()){ mas01cr@0: if(dbH->flags & O2_FLAG_TIMES){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@128: munmap(db,dbH->dbSize); mas01cr@0: error("problem opening times file on timestamped database",timesFileName); mas01cr@0: } mas01cr@0: else{ mas01cr@0: cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Process time file mas01cr@0: if(usingTimes){ mas01cr@0: do{ mas01cr@0: *timesFile>>*timesdata++; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof() && numtimeseof()){ mas01cr@0: double dummy; mas01cr@0: do{ mas01cr@0: *timesFile>>dummy; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof()); mas01cr@0: } mas01cr@0: if(numtimesnumVectors+2){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@128: munmap(db,dbH->dbSize); mas01cr@0: close(infid); mas01cr@0: cerr << "expected " << numVectors << " found " << numtimes << endl; mas01cr@0: error("Times file is incorrect length for features file",inFile); mas01cr@0: } mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "numtimes: " << numtimes << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@120: void audioDB::batchinsert(const char* dbName, const char* inFile) { mas01cr@0: mas01cr@177: forWrite = true; mas01cr@177: initDBHeader(dbName); mas01cr@0: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: ifstream *filesIn = 0; mas01cr@0: ifstream *keysIn = 0; mas01cr@0: ifstream* thisTimesFile = 0; mas01cr@0: mas01cr@0: if(!(filesIn = new ifstream(inFile))) mas01cr@0: error("Could not open batch in file", inFile); mas01cr@0: if(key && key!=inFile) mas01cr@0: if(!(keysIn = new ifstream(key))) mas01cr@0: error("Could not open batch key file",key); mas01cr@0: mas01cr@0: if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@0: unsigned totalVectors=0; mas01cr@0: char *thisKey = new char[MAXSTR]; mas01cr@0: char *thisFile = new char[MAXSTR]; mas01cr@0: char *thisTimesFileName = new char[MAXSTR]; mas01cr@120: mas01cr@0: do{ mas01cr@0: filesIn->getline(thisFile,MAXSTR); mas01cr@0: if(key && key!=inFile) mas01cr@0: keysIn->getline(thisKey,MAXSTR); mas01cr@0: else mas01cr@0: thisKey = thisFile; mas01cr@0: if(usingTimes) mas01cr@0: timesFile->getline(thisTimesFileName,MAXSTR); mas01cr@0: mas01cr@0: if(filesIn->eof()) mas01cr@0: break; mas01cr@0: mas01cr@169: initInputFile(thisFile); mas01cr@0: mas01cr@180: if(!enough_data_space_free(statbuf.st_size - sizeof(int))) { mas01cr@169: error("batchinsert failed: no more room in database", thisFile); mas01cr@180: } mas01cr@0: mas01cr@0: // Linear scan of filenames check for pre-existing feature mas01cr@0: unsigned alreadyInserted=0; mas01cr@0: mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: key already exists in database:" << thisKey << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: else{ mas01cr@0: mas01mc@18: // Make a track index table of features to file indexes mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: if(!numVectors){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: else{ mas01cr@0: if(usingTimes){ mas01cr@0: if(timesFile->eof()) mas01cr@0: error("not enough timestamp files in timesList"); mas01cr@0: thisTimesFile=new ifstream(thisTimesFileName,ios::in); mas01cr@0: if(!thisTimesFile->is_open()) mas01cr@0: error("Cannot open timestamp file",thisTimesFileName); mas01cr@192: off_t insertoffset=dbH->length; mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@188: if(timesoffset + numVectors > timesTableLength) { mas01cr@188: error("out of space for times", key); mas01cr@188: } mas01cr@0: insertTimeStamps(numVectors,thisTimesFile,timesdata); mas01cr@0: if(thisTimesFile) mas01cr@0: delete thisTimesFile; mas01cr@0: } mas01cr@0: mas01cr@0: strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); mas01cr@0: mas01cr@192: off_t insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Increment file count mas01cr@0: dbH->numFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: mas01mc@18: // Update track to file index map mas01mc@18: memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@181: insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int)); mas01cr@181: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@190: unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: totalVectors+=numVectors; mas01cr@176: mas01cr@176: // Copy the header back to the database mas01cr@176: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@0: } mas01cr@0: } mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01cr@0: }while(!filesIn->eof()); mas01mc@12: mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " mas01cr@0: << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; mas01cr@60: } mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01cr@0: } mas01cr@0: mas01cr@76: // FIXME: this can't propagate the sequence length argument (used for mas01cr@76: // dudCount). See adb__status() definition for the other half of mas01cr@76: // this. -- CSR, 2007-10-01 mas01cr@0: void audioDB::ws_status(const char*dbName, char* hostport){ mas01cr@0: struct soap soap; mas01cr@133: adb__statusResponse adbStatusResponse; mas01cr@0: mas01cr@0: // Query an existing adb database mas01cr@0: soap_init(&soap); mas01cr@133: if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResponse)==SOAP_OK) { mas01cr@133: cout << "numFiles = " << adbStatusResponse.result.numFiles << endl; mas01cr@133: cout << "dim = " << adbStatusResponse.result.dim << endl; mas01cr@133: cout << "length = " << adbStatusResponse.result.length << endl; mas01cr@133: cout << "dudCount = " << adbStatusResponse.result.dudCount << endl; mas01cr@133: cout << "nullCount = " << adbStatusResponse.result.nullCount << endl; mas01cr@133: cout << "flags = " << adbStatusResponse.result.flags << endl; mas01cr@76: } else { mas01cr@0: soap_print_fault(&soap,stderr); mas01cr@76: } mas01cr@0: mas01cr@0: soap_destroy(&soap); mas01cr@0: soap_end(&soap); mas01cr@0: soap_done(&soap); mas01cr@0: } mas01cr@0: mas01mc@18: void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ mas01cr@0: struct soap soap; mas01cr@133: adb__queryResponse adbQueryResponse; mas01cr@0: mas01cr@0: soap_init(&soap); mas01cr@0: if(soap_call_adb__query(&soap,hostport,NULL, mas01mc@18: (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, mas01cr@133: queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResponse)==SOAP_OK){ mas01cr@133: //std::cerr << "result list length:" << adbQueryResponse.result.__sizeRlist << std::endl; mas01cr@133: for(int i=0; inumFiles; k++){ mas01mc@18: if(trackTable[k]numFiles << endl; mas01cr@76: cout << "data dim:" << dbH->dim <dim>0){ mas01cr@76: cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << endl; mas01cr@76: } mas01cr@108: cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; mas01cr@108: cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << mas01cr@108: (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << endl; mas01cr@76: cout << "flags:" << dbH->flags << endl; mas01cr@76: mas01cr@76: cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; mas01cr@76: } else { mas01cr@133: adbStatusResponse->result.numFiles = dbH->numFiles; mas01cr@133: adbStatusResponse->result.dim = dbH->dim; mas01cr@133: adbStatusResponse->result.length = dbH->length; mas01cr@133: adbStatusResponse->result.dudCount = dudCount; mas01cr@133: adbStatusResponse->result.nullCount = nullCount; mas01cr@133: adbStatusResponse->result.flags = dbH->flags; mas01cr@76: } mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::dump(const char* dbName){ mas01cr@131: if(!dbH) { mas01cr@177: initTables(dbName, 0); mas01mc@17: } mas01cr@0: mas01cr@131: if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { mas01cr@131: error("error making output directory", output, "mkdir"); mas01cr@131: } mas01cr@131: mas01cr@131: char *cwd = new char[PATH_MAX]; mas01cr@131: mas01cr@131: if ((getcwd(cwd, PATH_MAX)) == 0) { mas01cr@131: error("error getting working directory", "", "getcwd"); mas01cr@131: } mas01cr@131: mas01cr@131: if((chdir(output)) < 0) { mas01cr@131: error("error changing working directory", output, "chdir"); mas01cr@131: } mas01cr@131: mas01cr@131: int fLfd, tLfd = 0, kLfd; mas01cr@131: FILE *fLFile, *tLFile = 0, *kLFile; mas01cr@131: mas01cr@131: if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { mas01cr@131: error("error creating featureList file", "featureList.txt", "open"); mas01cr@131: } mas01cr@131: int times = dbH->flags & O2_FLAG_TIMES; mas01cr@131: if (times) { mas01cr@131: if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { mas01cr@131: error("error creating timesList file", "timesList.txt", "open"); mas01cr@131: } mas01cr@131: } mas01cr@131: if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { mas01cr@131: error("error creating keyList file", "keyList.txt", "open"); mas01cr@131: } mas01cr@131: mas01cr@131: /* can these fail? I sincerely hope not. */ mas01cr@131: fLFile = fdopen(fLfd, "w"); mas01cr@131: if (times) { mas01cr@131: tLFile = fdopen(tLfd, "w"); mas01cr@131: } mas01cr@131: kLFile = fdopen(kLfd, "w"); mas01cr@131: mas01cr@131: char *fName = new char[256]; mas01cr@131: int ffd; mas01cr@131: FILE *tFile; mas01cr@131: unsigned pos = 0; mas01cr@184: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@184: double *data_buffer; mas01cr@184: size_t data_buffer_size; mas01cr@131: for(unsigned k = 0; k < dbH->numFiles; k++) { mas01cr@131: fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); mas01cr@131: snprintf(fName, 256, "%05d.features", k); mas01cr@131: if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { mas01cr@131: error("error creating feature file", fName, "open"); mas01cr@131: } mas01cr@131: if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) { mas01cr@131: error("error writing dimensions", fName, "write"); mas01cr@131: } mas01cr@184: mas01cr@184: /* FIXME: this repeated malloc()/free() of data buffers is mas01cr@184: inefficient. */ mas01cr@184: data_buffer_size = trackTable[k] * dbH->dim * sizeof(double); mas01cr@184: mas01cr@184: { mas01cr@184: void *tmp = malloc(data_buffer_size); mas01cr@184: if (tmp == NULL) { mas01cr@184: error("error allocating data buffer"); mas01cr@184: } mas01cr@184: data_buffer = (double *) tmp; mas01cr@184: } mas01cr@184: mas01cr@184: if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) { mas01cr@184: error("error reading data", fName, "read"); mas01cr@184: } mas01cr@184: mas01cr@184: if ((write(ffd, data_buffer, data_buffer_size)) < 0) { mas01cr@131: error("error writing data", fName, "write"); mas01cr@131: } mas01cr@184: mas01cr@184: free(data_buffer); mas01cr@184: mas01cr@131: fprintf(fLFile, "%s\n", fName); mas01cr@131: close(ffd); mas01cr@131: mas01cr@131: if(times) { mas01cr@131: snprintf(fName, 256, "%05d.times", k); mas01cr@131: tFile = fopen(fName, "w"); mas01cr@131: for(unsigned i = 0; i < trackTable[k]; i++) { mas01cr@131: // KLUDGE: specifying 16 digits of precision after the decimal mas01cr@131: // point is (but check this!) sufficient to uniquely identify mas01cr@131: // doubles; however, that will cause ugliness, as that's mas01cr@131: // vastly too many for most values of interest. Moving to %a mas01cr@131: // here and scanf() in the timesFile reading might fix this. mas01cr@131: // -- CSR, 2007-10-19 mas01cr@131: fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); mas01cr@131: } mas01cr@131: fprintf(tLFile, "%s\n", fName); mas01cr@131: } mas01cr@131: mas01cr@131: pos += trackTable[k]; mas01cr@131: cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; mas01cr@131: } mas01cr@131: mas01cr@131: FILE *scriptFile; mas01cr@131: scriptFile = fopen("restore.sh", "w"); mas01cr@131: fprintf(scriptFile, "\ mas01cr@131: #! /bin/sh\n\ mas01cr@131: #\n\ mas01cr@131: # usage: AUDIODB=/path/to/audioDB sh ./restore.sh \n\ mas01cr@131: \n\ mas01cr@131: if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ mas01cr@131: if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ mas01cr@192: \"${AUDIODB}\" -d \"$1\" -N --size=%d\n", (int) (dbH->dbSize / 1000000)); mas01cr@131: if(dbH->flags & O2_FLAG_L2NORM) { mas01cr@131: fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); mas01cr@131: } mas01cr@131: fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); mas01cr@131: if(times) { mas01cr@131: fprintf(scriptFile, " -T timesList.txt"); mas01cr@131: } mas01cr@131: fprintf(scriptFile, "\n"); mas01cr@131: fclose(scriptFile); mas01cr@131: mas01cr@131: if((chdir(cwd)) < 0) { mas01cr@131: error("error changing working directory", cwd, "chdir"); mas01cr@131: } mas01cr@131: mas01cr@131: fclose(fLFile); mas01cr@131: if(times) { mas01cr@131: fclose(tLFile); mas01cr@131: } mas01cr@131: fclose(kLFile); mas01cr@131: delete[] fName; mas01cr@131: mas01cr@0: status(dbName); mas01cr@0: } mas01cr@0: mas01cr@177: void audioDB::l2norm(const char* dbName) { mas01cr@177: forWrite = true; mas01cr@177: initTables(dbName, 0); mas01cr@0: if(dbH->length>0){ mas01cr@192: /* FIXME: should probably be uint64_t */ mas01cr@0: unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@191: CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); mas01cr@0: unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append mas01cr@0: } mas01cr@0: // Update database flags mas01cr@0: dbH->flags = dbH->flags|O2_FLAG_L2NORM; mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: } mas01cr@0: mas01cr@133: void audioDB::query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ mas01cr@0: switch(queryType){ mas01cr@105: case O2_POINT_QUERY: mas01cr@133: pointQuery(dbName, inFile, adbQueryResponse); mas01cr@0: break; mas01cr@105: case O2_SEQUENCE_QUERY: mas01mc@17: if(radius==0) mas01cr@133: trackSequenceQueryNN(dbName, inFile, adbQueryResponse); mas01mc@17: else mas01cr@133: trackSequenceQueryRad(dbName, inFile, adbQueryResponse); mas01cr@0: break; mas01cr@105: case O2_TRACK_QUERY: mas01cr@133: trackPointQuery(dbName, inFile, adbQueryResponse); mas01cr@0: break; mas01cr@0: default: mas01cr@0: error("unrecognized queryType in query()"); mas01cr@0: mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: //return ordinal position of key in keyTable mas01cr@0: unsigned audioDB::getKeyPos(char* key){ mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) mas01cr@0: return k; mas01cr@0: error("Key not found",key); mas01cr@0: return O2_ERR_KEYNOTFOUND; mas01cr@0: } mas01cr@0: mas01cr@0: // Basic point query engine mas01cr@177: void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { mas01cr@0: mas01cr@177: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@190: mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@191: CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: for(unsigned k=0; klength/(dbH->dim*sizeof(double)); mas01cr@0: double meanQdur = 0; mas01cr@0: double* timesdata = 0; mas01cr@0: double* dbdurs = 0; mas01cr@0: mas01cr@0: if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: j=1; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: while(j--){ // query mas01cr@0: data=dataBuf; mas01cr@0: k=totalVecs; // number of database vectors mas01cr@0: while(k--){ // database mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01cr@0: && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l >= n+1 ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01cr@0: sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv2, NULL); mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@60: } mas01cr@0: mas01cr@133: if(adbQueryResponse==0){ mas01cr@0: // Output answer mas01cr@0: // Loop over nearest neighbours mas01cr@0: for(k=0; k < pointNN; k++){ mas01cr@0: // Scan for key mas01mc@18: unsigned cumTrack=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01mc@18: cumTrack+=trackTable[l]; mas01mc@18: if(sIndexes[k]result.__sizeRlist=listLen; mas01cr@133: adbQueryResponse->result.__sizeDist=listLen; mas01cr@133: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@133: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@133: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@133: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@133: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@133: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@133: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@133: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@133: adbQueryResponse->result.Dist[k]=distances[k]; mas01cr@133: adbQueryResponse->result.Qpos[k]=qIndexes[k]; mas01mc@18: unsigned cumTrack=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01mc@18: cumTrack+=trackTable[l]; mas01mc@18: if(sIndexes[k]result.Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@133: adbQueryResponse->result.Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Clean up mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(dbdurs) mas01cr@0: delete dbdurs; mas01cr@0: } mas01cr@0: mas01mc@18: // trackPointQuery mas01mc@18: // return the trackNN closest tracks to the query track mas01mc@18: // uses average of pointNN points per track mas01cr@177: void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse) { mas01cr@177: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@189: double* data; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: unsigned j=numVectors; // number of query points mas01mc@18: unsigned k,l,n, track, trackOffset=0, processedTracks=0; mas01cr@0: double thisDist; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumFiles]; mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: } mas01cr@0: mas01mc@18: // build track offset table mas01cr@187: off_t *trackOffsetTable = new off_t[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01cr@187: off_t trackIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey[MAXSTR]; mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@187: mas01cr@187: size_t data_buffer_size = 0; mas01cr@187: double *data_buffer = 0; mas01cr@187: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@0: mas01mc@18: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01cr@187: mas01cr@187: trackOffset = trackOffsetTable[track]; // numDoubles offset mas01cr@187: mas01cr@187: // get trackID from file if using a control file mas01cr@187: if(trackFile) { mas01cr@187: trackFile->getline(nextKey,MAXSTR); mas01cr@187: if(!trackFile->eof()) { mas01cr@187: track = getKeyPos(nextKey); mas01cr@187: trackOffset = trackOffsetTable[track]; mas01cr@187: lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); mas01cr@187: } else { mas01cr@187: break; mas01cr@0: } mas01cr@0: } mas01cr@187: mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@187: mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@0: mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; mas01cr@0: else mas01cr@0: usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); mas01cr@0: if(usingQueryPoint) mas01cr@0: j=1; mas01cr@0: else mas01cr@0: j=numVectors; mas01cr@187: mas01cr@187: if (trackTable[track] * sizeof(double) * dbH->dim > data_buffer_size) { mas01cr@187: if(data_buffer) { mas01cr@187: free(data_buffer); mas01cr@187: } mas01cr@187: { mas01cr@187: data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; mas01cr@187: void *tmp = malloc(data_buffer_size); mas01cr@187: if (tmp == NULL) { mas01cr@187: error("error allocating data buffer"); mas01cr@187: } mas01cr@187: data_buffer = (double *) tmp; mas01cr@187: } mas01cr@187: } mas01cr@187: mas01cr@187: read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); mas01cr@187: mas01cr@0: while(j--){ mas01mc@18: k=trackTable[track]; // number of vectors in track mas01cr@187: data=data_buffer; // data for track mas01cr@0: while(k--){ mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l > n ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01mc@18: sIndexes[n]=trackTable[track]-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01mc@18: } // track mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } // query mas01mc@18: // Take the average of this track's distance mas01mc@18: // Test the track distances mas01cr@0: thisDist=0; mas01cr@66: for (n = 0; n < pointNN; n++) { mas01cr@66: if (distances[n] == -DBL_MAX) break; mas01cr@66: thisDist += distances[n]; mas01cr@66: } mas01cr@66: thisDist /= n; mas01cr@66: mas01mc@18: n=trackNN; mas01cr@0: while(n--){ mas01mc@18: if(thisDist>=trackDistances[n]){ mas01mc@18: if((n==0 || thisDist<=trackDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@74: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01cr@0: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: trackIDs[n]=track; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: for(unsigned k=0; k1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks mas01cr@0: << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@60: } mas01cr@0: mas01cr@133: if(adbQueryResponse==0){ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr<result.__sizeRlist=listLen; mas01cr@133: adbQueryResponse->result.__sizeDist=listLen; mas01cr@133: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@133: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@133: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@133: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@133: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@133: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@133: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@133: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@133: adbQueryResponse->result.Dist[k]=trackDistances[k]; mas01cr@133: adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; mas01cr@133: adbQueryResponse->result.Spos[k]=trackSIndexes[k]; mas01cr@133: sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@18: delete trackOffsetTable; mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(meanDBdur) mas01cr@0: delete meanDBdur; mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: mas01mc@20: // k nearest-neighbor (k-NN) search between query and target tracks mas01mc@20: // efficient implementation based on matched filter mas01mc@20: // assumes normed shingles mas01mc@20: // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track mas01cr@133: void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ mas01cr@0: mas01cr@177: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: double qMeanL2; mas01cr@0: double* sMeanL2; mas01cr@0: mas01cr@0: unsigned USE_THRESH=0; mas01cr@0: double SILENCE_THRESH=0; mas01cr@0: double DIFF_THRESH=0; mas01cr@0: mas01cr@0: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01cr@55: error("Database must be L2 normed for sequence query","use -L2NORM"); mas01cr@55: mas01cr@55: if(numVectors1) { mas01cr@0: cerr << "performing norms ... "; cerr.flush(); mas01cr@60: } mas01cr@0: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01mc@20: mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: sNorm = new double[dbVectors]; mas01cr@0: sMeanL2=new double[dbH->numFiles]; mas01cr@0: assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01mc@20: mas01cr@0: // Make norm measurements relative to sequenceLength mas01cr@0: unsigned w = sequenceLength-1; mas01cr@0: unsigned i,j; mas01cr@0: double* ps; mas01cr@0: double tmp1,tmp2; mas01mc@20: mas01cr@0: // Copy the L2 norm values to core to avoid disk random access later on mas01cr@0: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01cr@125: double* qnPtr = qNorm; mas01cr@0: double* snPtr = sNorm; mas01cr@0: for(i=0; inumFiles; i++){ mas01mc@20: if(trackTable[i]>=sequenceLength){ mas01cr@0: tmp1=*snPtr; mas01cr@0: j=1; mas01cr@0: w=sequenceLength-1; mas01cr@0: while(w--) mas01cr@0: *snPtr+=snPtr[j++]; mas01cr@0: ps = snPtr+1; mas01mc@18: w=trackTable[i]-sequenceLength; // +1 - 1 mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01mc@20: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01cr@0: tmp1=tmp2; mas01cr@0: ps++; mas01cr@0: } mas01mc@20: ps = snPtr; mas01mc@20: w=trackTable[i]-sequenceLength+1; mas01mc@20: while(w--){ mas01mc@20: *ps=sqrt(*ps); mas01mc@20: ps++; mas01mc@20: } mas01cr@0: } mas01mc@18: snPtr+=trackTable[i]; mas01cr@0: } mas01cr@0: mas01cr@0: double* pn = sMeanL2; mas01cr@0: w=dbH->numFiles; mas01cr@0: while(w--) mas01cr@0: *pn++=0.0; mas01cr@0: ps=sNorm; mas01mc@18: unsigned processedTracks=0; mas01cr@0: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>sequenceLength-1){ mas01cr@57: w = trackTable[i]-sequenceLength+1; mas01cr@0: pn = sMeanL2+i; mas01mc@20: *pn=0; mas01cr@0: while(w--) mas01mc@20: if(*ps>0) mas01mc@20: *pn+=*ps++; mas01cr@57: *pn/=trackTable[i]-sequenceLength+1; mas01cr@0: SILENCE_THRESH+=*pn; mas01mc@18: processedTracks++; mas01cr@0: } mas01mc@18: ps = sNorm + trackTable[i]; mas01cr@0: } mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "processedTracks: " << processedTracks << endl; mas01cr@60: } mas01mc@20: mas01mc@18: SILENCE_THRESH/=processedTracks; mas01cr@0: USE_THRESH=1; // Turn thresholding on mas01mc@20: DIFF_THRESH=SILENCE_THRESH; // mean shingle power mas01mc@20: SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE mas01cr@60: if(verbosity>4) { mas01mc@20: cerr << "silence thresh: " << SILENCE_THRESH; mas01cr@60: } mas01cr@0: w=sequenceLength-1; mas01cr@0: i=1; mas01cr@125: tmp1=*qnPtr; mas01cr@0: while(w--) mas01cr@125: *qnPtr+=qnPtr[i++]; mas01cr@125: ps = qnPtr+1; mas01mc@20: w=numVectors-sequenceLength; // +1 -1 mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01mc@20: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01cr@0: tmp1=tmp2; mas01mc@20: ps++; mas01mc@20: } mas01cr@125: ps = qnPtr; mas01mc@20: qMeanL2 = 0; mas01mc@20: w=numVectors-sequenceLength+1; mas01mc@20: while(w--){ mas01mc@20: *ps=sqrt(*ps); mas01mc@20: qMeanL2+=*ps++; mas01cr@0: } mas01cr@0: qMeanL2 /= numVectors-sequenceLength+1; mas01mc@20: mas01cr@60: if(verbosity>1) { mas01cr@60: cerr << "done." << endl; mas01cr@60: } mas01cr@0: mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "matching tracks..." << endl; mas01cr@60: } mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: mas01mc@18: unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01cr@0: double thisDist; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: assert(timesdata); mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; k1) { mas01cr@0: cerr << "mean query file duration: " << meanQdur << endl; mas01cr@60: } mas01cr@0: meanDBdur = new double[dbH->numFiles]; mas01cr@0: assert(meanDBdur); mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01cr@0: error("queryPoint > numVectors-wL+1 in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@125: qnPtr=qnPtr+queryPoint; mas01cr@0: numVectors=wL; mas01cr@0: } mas01cr@0: mas01mc@20: double ** D = 0; // Differences query and target mas01cr@0: double ** DD = 0; // Matched filter distance mas01cr@0: mas01cr@0: D = new double*[numVectors]; mas01cr@0: assert(D); mas01cr@0: DD = new double*[numVectors]; mas01cr@0: assert(DD); mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01mc@18: processedTracks=0; mas01mc@18: unsigned successfulTracks=0; mas01cr@0: mas01cr@0: double* qp; mas01cr@0: double* sp; mas01cr@0: double* dp; mas01cr@0: mas01mc@18: // build track offset table mas01cr@187: off_t *trackOffsetTable = new off_t[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01cr@187: off_t trackIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey [MAXSTR]; mas01mc@20: mas01mc@20: // chi^2 statistics mas01mc@20: double sampleCount = 0; mas01mc@20: double sampleSum = 0; mas01mc@20: double logSampleSum = 0; mas01mc@20: double minSample = 1e9; mas01mc@20: double maxSample = 0; mas01mc@20: mas01mc@20: // Track loop mas01cr@179: size_t data_buffer_size = 0; mas01cr@179: double *data_buffer = 0; mas01cr@179: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@179: mas01cr@179: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++) { mas01cr@0: mas01cr@187: trackOffset = trackOffsetTable[track]; // numDoubles offset mas01cr@187: mas01mc@18: // get trackID from file if using a control file mas01cr@187: if(trackFile) { mas01cr@187: trackFile->getline(nextKey,MAXSTR); mas01cr@187: if(!trackFile->eof()) { mas01cr@187: track = getKeyPos(nextKey); mas01cr@187: trackOffset = trackOffsetTable[track]; mas01cr@187: lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); mas01cr@187: } else { mas01cr@187: break; mas01cr@0: } mas01cr@0: } mas01mc@12: mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@0: mas01cr@57: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01cr@0: mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@0: mas01mc@20: // Sum products matrix mas01cr@0: for(j=0; jdim > data_buffer_size) { mas01cr@179: if(data_buffer) { mas01cr@179: free(data_buffer); mas01cr@179: } mas01cr@179: { mas01cr@179: data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; mas01cr@179: void *tmp = malloc(data_buffer_size); mas01cr@179: if (tmp == NULL) { mas01cr@179: error("error allocating data buffer"); mas01cr@179: } mas01cr@179: data_buffer = (double *) tmp; mas01cr@179: } mas01cr@179: } mas01cr@179: mas01cr@179: read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); mas01cr@179: mas01mc@20: // Dot product mas01cr@0: for(j=0; jdim; mas01cr@179: sp=data_buffer+k*dbH->dim; mas01cr@0: DD[j][k]=0.0; // Initialize matched filter array mas01cr@0: dp=&D[j][k]; // point to correlation cell j,k mas01cr@0: *dp=0.0; // initialize correlation cell mas01cr@0: l=dbH->dim; // size of vectors mas01cr@0: while(l--) mas01cr@0: *dp+=*qp++**sp++; mas01cr@0: } mas01cr@0: mas01cr@0: // Matched Filter mas01cr@0: // HOP SIZE == 1 mas01cr@0: double* spd; mas01cr@0: if(HOP_SIZE==1){ // HOP_SIZE = shingleHop mas01cr@0: for(w=0; w3 && usingTimes) { mas01mc@18: cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01cr@0: cerr << "within duration tolerance." << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: // Search for minimum distance by shingles (concatenated vectors) mas01cr@53: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@53: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01cr@125: thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@60: if(verbosity>10) { mas01cr@125: cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; mas01cr@60: } mas01mc@20: // Gather chi^2 statistics mas01mc@20: if(thisDistmaxSample) mas01mc@20: maxSample=thisDist; mas01mc@20: if(thisDist>1e-9){ mas01mc@20: sampleCount++; mas01mc@20: sampleSum+=thisDist; mas01mc@20: logSampleSum+=log(thisDist); mas01mc@20: } mas01mc@20: mas01cr@125: // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); mas01cr@0: // Power test mas01cr@0: if(!USE_THRESH || mas01cr@0: // Threshold on mean L2 of Q and S sequences mas01cr@125: (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && mas01cr@0: // Are both query and target windows above mean energy? mas01cr@125: (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) mas01mc@20: thisDist=thisDist; // Computed above mas01cr@0: else mas01mc@20: thisDist=1000000.0; mas01mc@20: mas01mc@20: // k-NN match algorithm mas01cr@58: m=pointNN; mas01mc@20: while(m--){ mas01mc@20: if(thisDist<=distances[m]) mas01mc@20: if(m==0 || thisDist>=distances[m-1]){ mas01cr@0: // Shuffle distances up the list mas01cr@0: for(l=pointNN-1; l>m; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[m]=thisDist; mas01cr@0: if(usingQueryPoint) mas01cr@0: qIndexes[m]=queryPoint; mas01cr@0: else mas01cr@0: qIndexes[m]=j; mas01cr@0: sIndexes[m]=k; mas01cr@0: break; mas01mc@20: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Calculate the mean of the N-Best matches mas01cr@0: thisDist=0.0; mas01cr@53: for(m=0; m3) { mas01mc@18: cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; mas01cr@60: } mas01mc@12: mas01mc@20: mas01mc@18: // All the track stuff goes here mas01cr@58: n=trackNN; mas01cr@0: while(n--){ mas01mc@20: if(thisDist<=trackDistances[n]){ mas01mc@20: if((n==0 || thisDist>=trackDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01mc@18: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01cr@0: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: successfulTracks++; mas01mc@18: trackIDs[n]=track; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } // Duration match mas01mc@20: mas01mc@18: // Clean up current track mas01cr@0: if(D!=NULL){ mas01cr@0: for(j=0; j1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01cr@0: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01mc@20: cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01mc@20: << " minSample: " << minSample << " maxSample: " << maxSample << endl; mas01mc@20: } mas01cr@133: if(adbQueryResponse==0){ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr<result.__sizeRlist=listLen; mas01cr@133: adbQueryResponse->result.__sizeDist=listLen; mas01cr@133: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@133: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@133: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@133: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@133: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@133: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@133: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@133: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@133: adbQueryResponse->result.Dist[k]=trackDistances[k]; mas01cr@133: adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; mas01cr@133: adbQueryResponse->result.Spos[k]=trackSIndexes[k]; mas01cr@133: sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@20: delete[] trackOffsetTable; mas01cr@0: if(queryCopy) mas01mc@20: delete[] queryCopy; mas01cr@121: if(qNorm) mas01cr@121: delete[] qNorm; mas01cr@121: if(sNorm) mas01cr@121: delete[] sNorm; mas01cr@121: if(sMeanL2) mas01cr@121: delete[] sMeanL2; mas01cr@0: if(D) mas01cr@0: delete[] D; mas01cr@0: if(DD) mas01cr@0: delete[] DD; mas01cr@0: if(timesdata) mas01mc@20: delete[] timesdata; mas01cr@0: if(meanDBdur) mas01mc@20: delete[] meanDBdur; mas01cr@0: mas01cr@0: mas01cr@0: } mas01cr@0: mas01mc@20: // Radius search between query and target tracks mas01mc@20: // efficient implementation based on matched filter mas01mc@20: // assumes normed shingles mas01mc@20: // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track mas01cr@133: void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse){ mas01mc@17: mas01cr@177: initTables(dbName, inFile); mas01mc@17: mas01mc@17: // For each input vector, find the closest pointNN matching output vectors and report mas01mc@17: // we use stdout in this stub version mas01mc@17: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01mc@17: double* query = (double*)(indata+sizeof(int)); mas01mc@17: double* queryCopy = 0; mas01mc@17: mas01mc@17: double qMeanL2; mas01mc@17: double* sMeanL2; mas01mc@17: mas01mc@17: unsigned USE_THRESH=0; mas01mc@17: double SILENCE_THRESH=0; mas01mc@17: double DIFF_THRESH=0; mas01mc@17: mas01mc@17: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01mc@17: error("Database must be L2 normed for sequence query","use -l2norm"); mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "performing norms ... "; cerr.flush(); mas01cr@60: } mas01mc@17: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01mc@18: mas01mc@17: // Make a copy of the query mas01mc@17: queryCopy = new double[numVectors*dbH->dim]; mas01mc@17: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01mc@17: qNorm = new double[numVectors]; mas01mc@17: sNorm = new double[dbVectors]; mas01mc@17: sMeanL2=new double[dbH->numFiles]; mas01mc@17: assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); mas01mc@17: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01mc@17: query = queryCopy; mas01mc@18: mas01mc@17: // Make norm measurements relative to sequenceLength mas01mc@17: unsigned w = sequenceLength-1; mas01mc@17: unsigned i,j; mas01mc@17: double* ps; mas01mc@17: double tmp1,tmp2; mas01mc@18: mas01mc@17: // Copy the L2 norm values to core to avoid disk random access later on mas01mc@17: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01mc@17: double* snPtr = sNorm; mas01cr@125: double* qnPtr = qNorm; mas01mc@17: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>=sequenceLength){ mas01mc@17: tmp1=*snPtr; mas01mc@17: j=1; mas01mc@17: w=sequenceLength-1; mas01mc@17: while(w--) mas01mc@17: *snPtr+=snPtr[j++]; mas01mc@17: ps = snPtr+1; mas01mc@18: w=trackTable[i]-sequenceLength; // +1 - 1 mas01mc@17: while(w--){ mas01mc@17: tmp2=*ps; mas01mc@17: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01mc@17: tmp1=tmp2; mas01mc@17: ps++; mas01mc@17: } mas01mc@17: ps = snPtr; mas01mc@18: w=trackTable[i]-sequenceLength+1; mas01mc@17: while(w--){ mas01mc@17: *ps=sqrt(*ps); mas01mc@17: ps++; mas01mc@17: } mas01mc@17: } mas01mc@18: snPtr+=trackTable[i]; mas01mc@17: } mas01mc@17: mas01mc@17: double* pn = sMeanL2; mas01mc@17: w=dbH->numFiles; mas01mc@17: while(w--) mas01mc@17: *pn++=0.0; mas01mc@17: ps=sNorm; mas01mc@18: unsigned processedTracks=0; mas01mc@17: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>sequenceLength-1){ mas01cr@70: w = trackTable[i]-sequenceLength+1; mas01mc@17: pn = sMeanL2+i; mas01mc@17: *pn=0; mas01mc@17: while(w--) mas01mc@17: if(*ps>0) mas01mc@17: *pn+=*ps++; mas01cr@70: *pn/=trackTable[i]-sequenceLength+1; mas01mc@17: SILENCE_THRESH+=*pn; mas01mc@18: processedTracks++; mas01mc@17: } mas01mc@18: ps = sNorm + trackTable[i]; mas01mc@17: } mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "processedTracks: " << processedTracks << endl; mas01cr@60: } mas01mc@17: mas01mc@18: SILENCE_THRESH/=processedTracks; mas01mc@17: USE_THRESH=1; // Turn thresholding on mas01mc@18: DIFF_THRESH=SILENCE_THRESH; // mean shingle power mas01mc@17: SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE mas01cr@60: if(verbosity>4) { mas01mc@17: cerr << "silence thresh: " << SILENCE_THRESH; mas01cr@60: } mas01mc@17: w=sequenceLength-1; mas01mc@17: i=1; mas01cr@125: tmp1=*qnPtr; mas01mc@17: while(w--) mas01cr@125: *qnPtr+=qnPtr[i++]; mas01cr@125: ps = qnPtr+1; mas01mc@17: w=numVectors-sequenceLength; // +1 -1 mas01mc@17: while(w--){ mas01mc@17: tmp2=*ps; mas01mc@17: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01mc@17: tmp1=tmp2; mas01mc@17: ps++; mas01mc@17: } mas01cr@125: ps = qnPtr; mas01mc@17: qMeanL2 = 0; mas01mc@17: w=numVectors-sequenceLength+1; mas01mc@17: while(w--){ mas01mc@17: *ps=sqrt(*ps); mas01mc@17: qMeanL2+=*ps++; mas01mc@17: } mas01mc@17: qMeanL2 /= numVectors-sequenceLength+1; mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "done." << endl; mas01cr@60: } mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "matching tracks..." << endl; mas01cr@60: } mas01mc@17: mas01mc@17: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01mc@17: mas01mc@17: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01mc@17: mas01mc@17: double distances[pointNN]; mas01mc@17: unsigned qIndexes[pointNN]; mas01mc@17: unsigned sIndexes[pointNN]; mas01mc@17: mas01mc@17: mas01cr@59: unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01mc@17: double thisDist; mas01mc@17: mas01mc@17: for(k=0; kflags & O2_FLAG_TIMES)){ mas01mc@17: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01mc@17: usingTimes=0; mas01mc@17: } mas01mc@17: mas01mc@17: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01mc@17: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01mc@17: mas01mc@17: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01mc@17: timesdata = new double[numVectors]; mas01mc@17: assert(timesdata); mas01mc@17: insertTimeStamps(numVectors, timesFile, timesdata); mas01mc@17: // Calculate durations of points mas01mc@17: for(k=0; k1) { mas01mc@17: cerr << "mean query file duration: " << meanQdur << endl; mas01cr@60: } mas01mc@17: meanDBdur = new double[dbH->numFiles]; mas01mc@17: assert(meanDBdur); mas01mc@17: for(k=0; knumFiles; k++){ mas01mc@17: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01mc@17: error("queryPoint > numVectors-wL+1 in query"); mas01mc@17: else{ mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01mc@17: query=query+queryPoint*dbH->dim; mas01cr@125: qnPtr=qnPtr+queryPoint; mas01mc@17: numVectors=wL; mas01mc@17: } mas01mc@17: mas01mc@17: double ** D = 0; // Differences query and target mas01mc@17: double ** DD = 0; // Matched filter distance mas01mc@17: mas01mc@17: D = new double*[numVectors]; mas01mc@17: assert(D); mas01mc@17: DD = new double*[numVectors]; mas01mc@17: assert(DD); mas01mc@17: mas01mc@17: gettimeofday(&tv1, NULL); mas01mc@18: processedTracks=0; mas01mc@18: unsigned successfulTracks=0; mas01mc@17: mas01mc@17: double* qp; mas01mc@17: double* sp; mas01mc@17: double* dp; mas01mc@17: mas01mc@18: // build track offset table mas01cr@187: off_t *trackOffsetTable = new off_t[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01cr@187: off_t trackIndexOffset; mas01mc@17: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01mc@17: } mas01mc@17: mas01mc@17: char nextKey [MAXSTR]; mas01mc@17: mas01mc@17: // chi^2 statistics mas01mc@17: double sampleCount = 0; mas01mc@17: double sampleSum = 0; mas01mc@17: double logSampleSum = 0; mas01mc@17: double minSample = 1e9; mas01mc@17: double maxSample = 0; mas01mc@17: mas01mc@17: // Track loop mas01cr@179: size_t data_buffer_size = 0; mas01cr@179: double *data_buffer = 0; mas01cr@179: lseek(dbfid, dbH->dataOffset, SEEK_SET); mas01cr@179: mas01mc@18: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01mc@17: mas01cr@187: trackOffset = trackOffsetTable[track]; // numDoubles offset mas01cr@187: mas01mc@18: // get trackID from file if using a control file mas01cr@187: if(trackFile) { mas01cr@187: trackFile->getline(nextKey,MAXSTR); mas01cr@187: if(!trackFile->eof()) { mas01cr@187: track = getKeyPos(nextKey); mas01cr@187: trackOffset = trackOffsetTable[track]; mas01cr@187: lseek(dbfid, dbH->dataOffset + trackOffset * sizeof(double), SEEK_SET); mas01cr@187: } else { mas01cr@187: break; mas01mc@17: } mas01mc@17: } mas01mc@17: mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01mc@17: mas01cr@70: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01mc@17: mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@60: mas01mc@17: // Sum products matrix mas01mc@17: for(j=0; jdim > data_buffer_size) { mas01cr@179: if(data_buffer) { mas01cr@179: free(data_buffer); mas01cr@179: } mas01cr@179: { mas01cr@179: data_buffer_size = trackTable[track] * sizeof(double) * dbH->dim; mas01cr@179: void *tmp = malloc(data_buffer_size); mas01cr@179: if (tmp == NULL) { mas01cr@179: error("error allocating data buffer"); mas01cr@179: } mas01cr@179: data_buffer = (double *) tmp; mas01cr@179: } mas01cr@179: } mas01cr@179: mas01cr@179: read(dbfid, data_buffer, trackTable[track] * sizeof(double) * dbH->dim); mas01cr@179: mas01mc@17: // Dot product mas01mc@17: for(j=0; jdim; mas01cr@179: sp=data_buffer+k*dbH->dim; mas01mc@17: DD[j][k]=0.0; // Initialize matched filter array mas01mc@17: dp=&D[j][k]; // point to correlation cell j,k mas01mc@17: *dp=0.0; // initialize correlation cell mas01mc@17: l=dbH->dim; // size of vectors mas01mc@17: while(l--) mas01mc@17: *dp+=*qp++**sp++; mas01mc@17: } mas01cr@179: mas01mc@17: // Matched Filter mas01mc@17: // HOP SIZE == 1 mas01mc@17: double* spd; mas01mc@17: if(HOP_SIZE==1){ // HOP_SIZE = shingleHop mas01mc@17: for(w=0; w3 && usingTimes) { mas01mc@18: cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; mas01mc@17: cerr.flush(); mas01mc@17: } mas01mc@17: mas01mc@17: if(!usingTimes || mas01mc@17: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01mc@17: cerr << "within duration tolerance." << endl; mas01mc@17: cerr.flush(); mas01mc@17: } mas01mc@17: mas01mc@17: // Search for minimum distance by shingles (concatenated vectors) mas01cr@70: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@70: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01cr@125: thisDist=2-(2/(qnPtr[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@60: if(verbosity>10) { mas01cr@125: cerr << thisDist << " " << qnPtr[j] << " " << sNorm[trackIndexOffset+k] << endl; mas01cr@60: } mas01mc@17: // Gather chi^2 statistics mas01mc@17: if(thisDistmaxSample) mas01mc@17: maxSample=thisDist; mas01mc@17: if(thisDist>1e-9){ mas01mc@17: sampleCount++; mas01mc@17: sampleSum+=thisDist; mas01mc@17: logSampleSum+=log(thisDist); mas01mc@17: } mas01mc@17: mas01cr@125: // diffL2 = fabs(qnPtr[j] - sNorm[trackIndexOffset+k]); mas01mc@17: // Power test mas01mc@17: if(!USE_THRESH || mas01mc@17: // Threshold on mean L2 of Q and S sequences mas01cr@125: (USE_THRESH && qnPtr[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && mas01mc@17: // Are both query and target windows above mean energy? mas01cr@125: (qnPtr[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) mas01mc@17: thisDist=thisDist; // Computed above mas01mc@17: else mas01mc@17: thisDist=1000000.0; mas01mc@17: if(thisDist>=0 && thisDist<=radius){ mas01mc@17: distances[0]++; // increment count mas01mc@18: break; // only need one track point per query point mas01mc@17: } mas01mc@17: } mas01mc@17: // How many points were below threshold ? mas01mc@17: thisDist=distances[0]; mas01mc@17: mas01mc@17: // Let's see the distances then... mas01cr@60: if(verbosity>3) { mas01mc@18: cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; mas01cr@60: } mas01mc@17: mas01mc@18: // All the track stuff goes here mas01mc@18: n=trackNN; mas01mc@17: while(n--){ mas01mc@18: if(thisDist>trackDistances[n]){ mas01mc@18: if((n==0 || thisDist<=trackDistances[n-1])){ mas01mc@17: // Copy all values above up the queue mas01mc@18: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01mc@17: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: successfulTracks++; mas01mc@18: trackIDs[n]=track; mas01mc@17: break; mas01mc@17: } mas01mc@17: } mas01mc@17: else mas01mc@17: break; mas01mc@17: } mas01mc@17: } // Duration match mas01mc@17: mas01mc@18: // Clean up current track mas01mc@17: if(D!=NULL){ mas01mc@17: for(j=0; j1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01mc@17: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01mc@17: cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01mc@17: << " minSample: " << minSample << " maxSample: " << maxSample << endl; mas01mc@17: } mas01mc@17: mas01cr@133: if(adbQueryResponse==0){ mas01cr@60: if(verbosity>1) { mas01mc@17: cerr<result.__sizeRlist=listLen; mas01cr@133: adbQueryResponse->result.__sizeDist=listLen; mas01cr@133: adbQueryResponse->result.__sizeQpos=listLen; mas01cr@133: adbQueryResponse->result.__sizeSpos=listLen; mas01cr@133: adbQueryResponse->result.Rlist= new char*[listLen]; mas01cr@133: adbQueryResponse->result.Dist = new double[listLen]; mas01cr@133: adbQueryResponse->result.Qpos = new unsigned int[listLen]; mas01cr@133: adbQueryResponse->result.Spos = new unsigned int[listLen]; mas01cr@133: for(k=0; k<(unsigned)adbQueryResponse->result.__sizeRlist; k++){ mas01cr@133: adbQueryResponse->result.Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@133: adbQueryResponse->result.Dist[k]=trackDistances[k]; mas01cr@133: adbQueryResponse->result.Qpos[k]=trackQIndexes[k]; mas01cr@133: adbQueryResponse->result.Spos[k]=trackSIndexes[k]; mas01cr@133: sprintf(adbQueryResponse->result.Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01mc@17: } mas01mc@17: } mas01mc@17: mas01mc@17: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@18: delete[] trackOffsetTable; mas01mc@17: if(queryCopy) mas01mc@17: delete[] queryCopy; mas01cr@121: if(qNorm) mas01cr@121: delete[] qNorm; mas01cr@121: if(sNorm) mas01cr@121: delete[] sNorm; mas01cr@121: if(sMeanL2) mas01cr@121: delete[] sMeanL2; mas01mc@17: if(D) mas01mc@17: delete[] D; mas01mc@17: if(DD) mas01mc@17: delete[] DD; mas01mc@17: if(timesdata) mas01mc@17: delete[] timesdata; mas01mc@17: if(meanDBdur) mas01mc@17: delete[] meanDBdur; mas01mc@17: mas01mc@17: mas01mc@17: } mas01mc@17: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ mas01cr@0: unsigned d; mas01cr@59: double L2, *p; mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@60: } mas01cr@0: while(n--){ mas01cr@0: p=X; mas01cr@0: L2=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: L2+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01mc@17: /* L2=sqrt(L2);*/ mas01cr@0: if(qNorm) mas01cr@0: *qNorm++=L2; mas01mc@17: /* mas01cr@0: oneOverL2 = 1.0/L2; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *X*=oneOverL2; mas01cr@0: X++; mas01mc@17: */ mas01mc@17: X+=dim; mas01cr@0: } mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "done..." << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ mas01cr@0: unsigned d; mas01cr@59: double *p; mas01cr@0: unsigned nn = n; mas01cr@0: mas01cr@0: assert(l2normTable); mas01cr@0: mas01cr@0: if( !append && (dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database is already L2 normed", "automatic norm on insert is enabled"); mas01cr@0: mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@60: } mas01cr@0: mas01cr@0: double* l2buf = new double[n]; mas01cr@0: double* l2ptr = l2buf; mas01cr@0: assert(l2buf); mas01cr@0: assert(X); mas01cr@0: mas01cr@0: while(nn--){ mas01cr@0: p=X; mas01cr@0: *l2ptr=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *l2ptr+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01mc@17: l2ptr++; mas01mc@17: X+=dim; mas01cr@0: } mas01cr@0: unsigned offset; mas01cr@84: if(append) { mas01cr@84: // FIXME: a hack, a very palpable hack: the vectors have already mas01cr@84: // been inserted, and dbH->length has already been updated. We mas01cr@84: // need to subtract off again the number of vectors that we've mas01cr@84: // inserted this time... mas01cr@84: offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors mas01cr@84: } else { mas01cr@0: offset=0; mas01cr@84: } mas01cr@0: memcpy(l2normTable+offset, l2buf, n*sizeof(double)); mas01cr@0: if(l2buf) mas01mc@17: delete[] l2buf; mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "done..." << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Start an audioDB server on the host mas01cr@0: void audioDB::startServer(){ mas01cr@0: struct soap soap; mas01cr@0: int m, s; // master and slave sockets mas01cr@0: soap_init(&soap); mas01cr@92: // FIXME: largely this use of SO_REUSEADDR is to make writing (and mas01cr@92: // running) test cases more convenient, so that multiple test runs mas01cr@92: // in close succession don't fail because of a bin() error. mas01cr@92: // Investigate whether there are any potential drawbacks in this, mas01cr@92: // and also whether there's a better way to write the tests. -- mas01cr@92: // CSR, 2007-10-03 mas01cr@92: soap.bind_flags |= SO_REUSEADDR; mas01cr@0: m = soap_bind(&soap, NULL, port, 100); mas01cr@0: if (m < 0) mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: else mas01cr@0: { mas01cr@0: fprintf(stderr, "Socket connection successful: master socket = %d\n", m); mas01cr@0: for (int i = 1; ; i++) mas01cr@0: { mas01cr@0: s = soap_accept(&soap); mas01cr@0: if (s < 0) mas01cr@0: { mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: break; mas01cr@0: } mas01cr@75: fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i, mas01cr@0: (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); mas01cr@0: if (soap_serve(&soap) != SOAP_OK) // process RPC request mas01cr@0: soap_print_fault(&soap, stderr); // print error mas01cr@0: fprintf(stderr, "request served\n"); mas01cr@0: soap_destroy(&soap); // clean up class instances mas01cr@0: soap_end(&soap); // clean up everything and close socket mas01cr@0: } mas01cr@0: } mas01cr@0: soap_done(&soap); // close master socket and detach environment mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // web services mas01cr@0: mas01cr@0: // SERVER SIDE mas01cr@133: int adb__status(struct soap* soap, xsd__string dbName, adb__statusResponse &adbStatusResponse){ mas01cr@21: char* const argv[]={"audioDB",COM_STATUS,"-d",dbName}; mas01cr@21: const unsigned argc = 4; mas01cr@77: try { mas01cr@133: audioDB(argc, argv, &adbStatusResponse); mas01cr@77: return SOAP_OK; mas01cr@77: } catch(char *err) { mas01cr@77: soap_receiver_fault(soap, err, ""); mas01cr@77: return SOAP_FAULT; mas01cr@77: } mas01cr@0: } mas01cr@0: mas01cr@0: // Literal translation of command line to web service mas01cr@0: mas01cr@133: int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResponse &adbQueryResponse){ mas01cr@0: char queryType[256]; mas01cr@0: for(int k=0; k<256; k++) mas01cr@0: queryType[k]='\0'; mas01cr@105: if(qType == O2_POINT_QUERY) mas01cr@0: strncpy(queryType, "point", strlen("point")); mas01cr@105: else if (qType == O2_SEQUENCE_QUERY) mas01cr@0: strncpy(queryType, "sequence", strlen("sequence")); mas01cr@105: else if(qType == O2_TRACK_QUERY) mas01mc@18: strncpy(queryType,"track", strlen("track")); mas01cr@0: else mas01cr@0: strncpy(queryType, "", strlen("")); mas01cr@0: mas01cr@0: if(pointNN==0) mas01cr@0: pointNN=10; mas01mc@18: if(trackNN==0) mas01mc@18: trackNN=10; mas01cr@0: if(seqLen==0) mas01cr@0: seqLen=16; mas01cr@0: mas01cr@0: char qPosStr[256]; mas01cr@0: sprintf(qPosStr, "%d", qPos); mas01cr@0: char pointNNStr[256]; mas01cr@0: sprintf(pointNNStr,"%d",pointNN); mas01mc@18: char trackNNStr[256]; mas01mc@18: sprintf(trackNNStr,"%d",trackNN); mas01cr@0: char seqLenStr[256]; mas01cr@0: sprintf(seqLenStr,"%d",seqLen); mas01cr@0: mas01cr@0: const char* argv[] ={ mas01cr@0: "./audioDB", mas01cr@0: COM_QUERY, mas01cr@0: queryType, // Need to pass a parameter mas01cr@0: COM_DATABASE, mas01cr@166: ENSURE_STRING(dbName), mas01cr@0: COM_FEATURES, mas01cr@166: ENSURE_STRING(qKey), mas01cr@0: COM_KEYLIST, mas01cr@166: ENSURE_STRING(keyList), mas01cr@0: COM_TIMES, mas01cr@166: ENSURE_STRING(timesFileName), mas01cr@0: COM_QPOINT, mas01cr@0: qPosStr, mas01cr@0: COM_POINTNN, mas01cr@0: pointNNStr, mas01mc@18: COM_TRACKNN, mas01mc@18: trackNNStr, // Need to pass a parameter mas01cr@0: COM_SEQLEN, mas01cr@0: seqLenStr mas01cr@0: }; mas01cr@0: mas01cr@0: const unsigned argc = 19; mas01cr@79: try { mas01cr@133: audioDB(argc, (char* const*)argv, &adbQueryResponse); mas01cr@79: return SOAP_OK; mas01cr@79: } catch (char *err) { mas01cr@79: soap_receiver_fault(soap, err, ""); mas01cr@79: return SOAP_FAULT; mas01cr@79: } mas01cr@0: } mas01cr@0: mas01cr@0: int main(const unsigned argc, char* const argv[]){ mas01cr@0: audioDB(argc, argv); mas01cr@0: }