mas01cr@0: #include "audioDB.h" mas01cr@0: mas01cr@0: #define O2_DEBUG mas01cr@0: mas01cr@32: void audioDB::error(const char* a, const char* b, const char *sysFunc) { mas01cr@77: if(isServer) { mas01cr@81: /* FIXME: I think this is leaky -- we never delete err. actually mas01cr@81: deleting it is tricky, though; it gets placed into some mas01cr@81: soap-internal struct with uncertain extent... -- CSR, mas01cr@81: 2007-10-01 */ mas01cr@77: char *err = new char[256]; /* FIXME: overflows */ mas01cr@77: snprintf(err, 255, "%s: %s\n%s", a, b, sysFunc ? strerror(errno) : ""); mas01cr@77: /* FIXME: actually we could usefully do with a properly structured mas01cr@77: type, so that we can throw separate faultstring and details. mas01cr@77: -- CSR, 2007-10-01 */ mas01cr@77: throw(err); mas01cr@77: } else { mas01cr@77: cerr << a << ": " << b << endl; mas01cr@77: if (sysFunc) { mas01cr@77: perror(sysFunc); mas01cr@77: } mas01cr@77: exit(1); mas01cr@32: } mas01cr@0: } mas01cr@0: mas01cr@76: #define O2_AUDIODB_INITIALIZERS \ mas01cr@76: dim(0), \ mas01cr@76: dbName(0), \ mas01cr@76: inFile(0), \ mas01cr@76: key(0), \ mas01cr@76: trackFileName(0), \ mas01cr@76: trackFile(0), \ mas01cr@76: command(0), \ mas01cr@76: timesFileName(0), \ mas01cr@76: timesFile(0), \ mas01cr@76: dbfid(0), \ mas01cr@76: infid(0), \ mas01cr@76: db(0), \ mas01cr@76: indata(0), \ mas01cr@76: dbH(0), \ mas01cr@76: fileTable(0), \ mas01cr@76: trackTable(0), \ mas01cr@76: dataBuf(0), \ mas01cr@76: l2normTable(0), \ mas01cr@76: qNorm(0), \ mas01cr@76: timesTable(0), \ mas01cr@76: verbosity(1), \ mas01cr@76: queryType(O2_FLAG_POINT_QUERY), \ mas01cr@76: pointNN(O2_DEFAULT_POINTNN), \ mas01cr@76: trackNN(O2_DEFAULT_TRACKNN), \ mas01cr@76: sequenceLength(16), \ mas01cr@76: sequenceHop(1), \ mas01cr@76: queryPoint(0), \ mas01cr@76: usingQueryPoint(0), \ mas01cr@76: usingTimes(0), \ mas01cr@76: isClient(0), \ mas01cr@76: isServer(0), \ mas01cr@76: port(0), \ mas01cr@76: timesTol(0.1), \ mas01cr@76: radius(0) mas01cr@76: mas01cr@76: audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(1); mas01cr@0: } mas01cr@77: mas01cr@0: if(O2_ACTION(COM_SERVER)) mas01cr@0: startServer(); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01cr@0: if(isClient) mas01cr@0: ws_query(dbName, inFile, (char*)hostport); mas01cr@0: else mas01cr@76: query(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@76: audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@76: processArgs(argc, argv); mas01cr@77: isServer = 1; // FIXME: Hack mas01cr@76: assert(O2_ACTION(COM_QUERY)); mas01cr@76: query(dbName, inFile, adbQueryResult); mas01cr@76: } mas01cr@76: mas01cr@76: audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResult *adbStatusResult): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@76: processArgs(argc, argv); mas01cr@77: isServer = 1; // FIXME: Hack mas01cr@76: assert(O2_ACTION(COM_STATUS)); mas01cr@76: status(dbName, adbStatusResult); mas01cr@76: } mas01cr@76: mas01cr@0: audioDB::~audioDB(){ mas01cr@0: // Clean up mas01cr@0: if(indata) mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: if(db) mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: if(dbfid>0) mas01cr@0: close(dbfid); mas01cr@0: if(infid>0) mas01cr@0: close(infid); mas01cr@0: if(dbH) mas01cr@0: delete dbH; mas01cr@0: } mas01cr@0: mas01cr@0: int audioDB::processArgs(const unsigned argc, char* const argv[]){ mas01cr@0: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if (cmdline_parser (argc, argv, &args_info) != 0) mas01cr@0: exit(1) ; mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@0: verbosity=args_info.verbosity_arg; mas01cr@0: if(verbosity<0 || verbosity>10){ mas01cr@0: cerr << "Warning: verbosity out of range, setting to 1" << endl; mas01cr@0: verbosity=1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01mc@17: if(args_info.radius_given){ mas01mc@17: radius=args_info.radius_arg; mas01mc@17: if(radius<=0 || radius>1000000000){ mas01cr@77: error("radius out of range"); mas01mc@17: } mas01mc@17: else mas01cr@60: if(verbosity>3) { mas01mc@17: cerr << "Setting radius to " << radius << endl; mas01cr@60: } mas01mc@17: } mas01mc@17: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@0: isServer=1; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@0: if(args_info.client_given){ mas01cr@0: command=COM_CLIENT; mas01cr@0: hostport=args_info.client_arg; mas01cr@0: isClient=1; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.NEW_given){ mas01cr@0: command=COM_CREATE; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.STATUS_given){ mas01cr@0: command=COM_STATUS; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.DUMP_given){ mas01cr@0: command=COM_DUMP; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.L2NORM_given){ mas01cr@0: command=COM_L2NORM; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.INSERT_given){ mas01cr@0: command=COM_INSERT; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.features_arg; mas01cr@0: if(args_info.key_given) mas01cr@0: key=args_info.key_arg; mas01cr@0: if(args_info.times_given){ mas01cr@0: timesFileName=args_info.times_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open times file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: return 0; mas01cr@0: } mas01mc@10: mas01cr@0: if(args_info.BATCHINSERT_given){ mas01cr@0: command=COM_BATCHINSERT; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.featureList_arg; mas01cr@0: if(args_info.keyList_given) mas01cr@0: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@0: mas01cr@0: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01mc@18: trackFileName=args_info.keyList_arg; mas01mc@18: if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) mas01mc@18: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@0: */ mas01cr@0: mas01cr@0: if(args_info.timesList_given){ mas01cr@0: timesFileName=args_info.timesList_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open timesList file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // Query command and arguments mas01cr@0: if(args_info.QUERY_given){ mas01cr@0: command=COM_QUERY; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.features_arg; mas01cr@0: mas01cr@0: if(args_info.keyList_given){ mas01mc@18: trackFileName=args_info.keyList_arg; mas01mc@18: if(strlen(trackFileName)>0 && !(trackFile = new ifstream(trackFileName,ios::in))) mas01mc@18: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.times_given){ mas01cr@0: timesFileName=args_info.times_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open times file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // query type mas01mc@18: if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) mas01mc@18: queryType=O2_FLAG_TRACK_QUERY; mas01cr@0: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@0: queryType=O2_FLAG_POINT_QUERY; mas01cr@0: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@0: queryType=O2_FLAG_SEQUENCE_QUERY; mas01cr@0: else mas01cr@0: error("unsupported query type",args_info.QUERY_arg); mas01cr@0: mas01cr@0: if(!args_info.exhaustive_flag){ mas01cr@0: queryPoint = args_info.qpoint_arg; mas01cr@0: usingQueryPoint=1; mas01cr@0: if(queryPoint<0 || queryPoint >10000) mas01cr@0: error("queryPoint out of range: 0 <= queryPoint <= 10000"); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: pointNN=args_info.pointnn_arg; mas01cr@0: if(pointNN<1 || pointNN >1000) mas01cr@0: error("pointNN out of range: 1 <= pointNN <= 1000"); mas01cr@0: mas01cr@0: mas01cr@0: mas01mc@18: trackNN=args_info.resultlength_arg; mas01mc@18: if(trackNN<1 || trackNN >10000) mas01cr@0: error("resultlength out of range: 1 <= resultlength <= 1000"); mas01cr@0: mas01cr@0: mas01cr@0: sequenceLength=args_info.sequencelength_arg; mas01cr@0: if(sequenceLength<1 || sequenceLength >1000) mas01cr@0: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@0: mas01cr@0: sequenceHop=args_info.sequencehop_arg; mas01cr@0: if(sequenceHop<1 || sequenceHop >1000) mas01cr@0: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01cr@0: mas01cr@0: return 0; mas01cr@0: } mas01cr@0: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@0: /* Make a new database mas01cr@0: mas01cr@0: The database consists of: mas01cr@0: mas01cr@0: header mas01cr@0: --------------------------------------------------------------------------------- mas01cr@0: | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | mas01cr@0: --------------------------------------------------------------------------------- mas01cr@0: mas01cr@0: mas01mc@18: keyTable : list of keys of tracks mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | key 256 bytes | mas01cr@0: -------------------------------------------------------------------------- mas01cr@85: O2_MAXFILES*O2_FILENAMELENGTH mas01cr@0: mas01mc@18: trackTable : Maps implicit feature index to a feature vector matrix mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | numVectors (4 bytes) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@85: O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(INT) mas01cr@0: mas01cr@0: featureTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | v1 v2 v3 ... vd (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@85: O2_MAXFILES * O2_MEANNUMFEATURES * DIM * sizeof(DOUBLE) mas01cr@0: mas01cr@0: timesTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | timestamp (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@85: O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) mas01cr@0: mas01cr@0: l2normTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | nm (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@85: O2_MAXFILES * O2_MEANNUMFEATURES * sizeof(DOUBLE) mas01cr@0: mas01cr@0: */ mas01cr@0: mas01cr@30: void audioDB::get_lock(int fd, bool exclusive) { mas01cr@30: struct flock lock; mas01cr@30: int status; mas01cr@30: mas01cr@30: lock.l_type = exclusive ? F_WRLCK : F_RDLCK; mas01cr@30: lock.l_whence = SEEK_SET; mas01cr@30: lock.l_start = 0; mas01cr@30: lock.l_len = 0; /* "the whole file" */ mas01cr@30: mas01cr@30: retry: mas01cr@30: do { mas01cr@30: status = fcntl(fd, F_SETLKW, &lock); mas01cr@30: } while (status != 0 && errno == EINTR); mas01cr@30: mas01cr@30: if (status) { mas01cr@30: if (errno == EAGAIN) { mas01cr@30: sleep(1); mas01cr@30: goto retry; mas01cr@30: } else { mas01cr@32: error("fcntl lock error", "", "fcntl"); mas01cr@30: } mas01cr@30: } mas01cr@30: } mas01cr@30: mas01cr@30: void audioDB::release_lock(int fd) { mas01cr@30: struct flock lock; mas01cr@30: int status; mas01cr@30: mas01cr@30: lock.l_type = F_UNLCK; mas01cr@30: lock.l_whence = SEEK_SET; mas01cr@30: lock.l_start = 0; mas01cr@30: lock.l_len = 0; mas01cr@30: mas01cr@30: status = fcntl(fd, F_SETLKW, &lock); mas01cr@30: mas01cr@30: if (status) mas01cr@32: error("fcntl unlock error", "", "fcntl"); mas01cr@30: } mas01cr@30: mas01cr@0: void audioDB::create(const char* dbName){ mas01cr@31: if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) mas01cr@32: error("Can't create database file", dbName, "open"); mas01cr@30: get_lock(dbfid, 1); mas01cr@0: mas01cr@0: // go to the location corresponding to the last byte mas01cr@0: if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1) mas01cr@32: error("lseek error in db file", "", "lseek"); mas01cr@0: mas01cr@0: // write a dummy byte at the last location mas01cr@0: if (write (dbfid, "", 1) != 1) mas01cr@32: error("write error", "", "write"); mas01cr@0: mas01cr@0: // mmap the output file mas01cr@60: if(verbosity) { mas01cr@0: cerr << "header size:" << O2_HEADERSIZE << endl; mas01cr@60: } mas01cr@0: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01cr@0: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01cr@32: error("mmap error for creating database", "", "mmap"); mas01cr@0: mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: // Initialize header mas01cr@0: dbH->magic=O2_MAGIC; mas01cr@0: dbH->numFiles=0; mas01cr@0: dbH->length=0; mas01cr@0: dbH->dim=0; mas01cr@0: dbH->flags=0; //O2_FLAG_L2NORM; mas01cr@0: mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_CREATE << " " << dbName << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: void audioDB::drop(){ mas01cr@0: mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: // initTables - memory map files passed as arguments mas01cr@0: // Precondition: database has already been created mas01cr@27: void audioDB::initTables(const char* dbName, bool forWrite, const char* inFile=0){ mas01cr@27: if ((dbfid = open (dbName, forWrite ? O_RDWR : O_RDONLY)) < 0) mas01cr@32: error("Can't open database file", dbName, "open"); mas01cr@30: get_lock(dbfid, forWrite); mas01cr@30: mas01cr@0: // open the input file mas01cr@0: if (inFile && (infid = open (inFile, O_RDONLY)) < 0) mas01cr@32: error("can't open input file for reading", inFile, "open"); mas01cr@0: mas01cr@0: // find size of input file mas01cr@0: if (inFile && fstat (infid,&statbuf) < 0) mas01cr@32: error("fstat error finding size of input", "", "fstat"); mas01cr@0: mas01cr@0: // Get the database header info mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) mas01cr@0: error("error reading db header"); mas01cr@0: mas01cr@0: fileTableOffset = O2_HEADERSIZE; mas01mc@18: trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; mas01mc@18: dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; mas01cr@0: l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: mas01cr@0: if(dbH->magic!=O2_MAGIC){ mas01cr@0: cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; mas01cr@0: error("database file has incorrect header",dbName); mas01cr@0: } mas01cr@0: mas01cr@0: if(inFile) mas01cr@0: if(dbH->dim==0 && dbH->length==0) // empty database mas01cr@0: read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality mas01cr@0: else { mas01cr@0: unsigned test; mas01cr@0: read(infid,&test,sizeof(unsigned)); mas01cr@0: if(dbH->dim!=test){ mas01cr@0: cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@0: // Check that there is room for at least 1 more file mas01cr@0: if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) mas01cr@0: error("No more room in database","insert failed: reason database is full."); mas01cr@0: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: // Linear scan of filenames check for pre-existing feature mas01cr@0: unsigned alreadyInserted=0; mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: key already exists in database, ignoring: " <dim); mas01cr@0: if(!numVectors){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; mas01cr@60: } mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: close(infid); mas01cr@0: return; mas01cr@0: } mas01cr@0: mas01cr@0: strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); mas01cr@0: mas01cr@0: unsigned insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Check times status and insert times from file mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@0: assert(timesdata+numVectorsnumFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Copy the header back to the database mas01cr@0: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@0: mas01mc@18: // Update track to file index map mas01mc@18: //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); mas01mc@18: memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@0: // Update the feature database mas01cr@0: memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " mas01cr@0: << (statbuf.st_size-sizeof(int)) << " bytes." << endl; mas01cr@60: } mas01cr@0: mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ mas01cr@0: unsigned numtimes=0; mas01cr@0: if(usingTimes){ mas01cr@0: if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) mas01cr@0: dbH->flags=dbH->flags|O2_FLAG_TIMES; mas01cr@0: else if(!(dbH->flags&O2_FLAG_TIMES)){ mas01cr@0: cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: if(!timesFile->is_open()){ mas01cr@0: if(dbH->flags & O2_FLAG_TIMES){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: error("problem opening times file on timestamped database",timesFileName); mas01cr@0: } mas01cr@0: else{ mas01cr@0: cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Process time file mas01cr@0: if(usingTimes){ mas01cr@0: do{ mas01cr@0: *timesFile>>*timesdata++; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof() && numtimeseof()){ mas01cr@0: double dummy; mas01cr@0: do{ mas01cr@0: *timesFile>>dummy; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof()); mas01cr@0: } mas01cr@0: if(numtimesnumVectors+2){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: close(infid); mas01cr@0: cerr << "expected " << numVectors << " found " << numtimes << endl; mas01cr@0: error("Times file is incorrect length for features file",inFile); mas01cr@0: } mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "numtimes: " << numtimes << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::batchinsert(const char* dbName, const char* inFile){ mas01cr@0: mas01cr@0: if ((dbfid = open (dbName, O_RDWR)) < 0) mas01cr@32: error("Can't open database file", dbName, "open"); mas01cr@30: get_lock(dbfid, 1); mas01cr@0: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: ifstream *filesIn = 0; mas01cr@0: ifstream *keysIn = 0; mas01cr@0: ifstream* thisTimesFile = 0; mas01cr@0: mas01cr@0: if(!(filesIn = new ifstream(inFile))) mas01cr@0: error("Could not open batch in file", inFile); mas01cr@0: if(key && key!=inFile) mas01cr@0: if(!(keysIn = new ifstream(key))) mas01cr@0: error("Could not open batch key file",key); mas01cr@0: mas01cr@0: // Get the database header info mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) mas01cr@0: error("error reading db header"); mas01cr@0: mas01cr@0: if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@0: fileTableOffset = O2_HEADERSIZE; mas01mc@18: trackTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; mas01mc@18: dataoffset = trackTableOffset + O2_TRACKTABLESIZE*O2_MAXFILES; mas01cr@0: l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: mas01cr@0: if(dbH->magic!=O2_MAGIC){ mas01cr@0: cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; mas01cr@0: error("database file has incorrect header",dbName); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: unsigned totalVectors=0; mas01cr@0: char *thisKey = new char[MAXSTR]; mas01cr@0: char *thisFile = new char[MAXSTR]; mas01cr@0: char *thisTimesFileName = new char[MAXSTR]; mas01cr@0: mas01cr@0: do{ mas01cr@0: filesIn->getline(thisFile,MAXSTR); mas01cr@0: if(key && key!=inFile) mas01cr@0: keysIn->getline(thisKey,MAXSTR); mas01cr@0: else mas01cr@0: thisKey = thisFile; mas01cr@0: if(usingTimes) mas01cr@0: timesFile->getline(thisTimesFileName,MAXSTR); mas01cr@0: mas01cr@0: if(filesIn->eof()) mas01cr@0: break; mas01cr@0: mas01cr@0: // open the input file mas01cr@0: if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0) mas01cr@32: error("can't open feature file for reading", thisFile, "open"); mas01cr@0: mas01cr@0: // find size of input file mas01cr@0: if (thisFile && fstat (infid,&statbuf) < 0) mas01cr@32: error("fstat error finding size of input", "", "fstat"); mas01cr@0: mas01mc@11: // mmap the database file mas01mc@11: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01mc@11: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01cr@32: error("mmap error for batchinsert into database", "", "mmap"); mas01mc@11: mas01mc@11: // Make some handy tables with correct types mas01mc@11: fileTable= (char*)(db+fileTableOffset); mas01mc@18: trackTable = (unsigned*)(db+trackTableOffset); mas01mc@11: dataBuf = (double*)(db+dataoffset); mas01mc@11: l2normTable = (double*)(db+l2normTableOffset); mas01mc@11: timesTable = (double*)(db+timesTableOffset); mas01mc@11: mas01cr@0: // Check that there is room for at least 1 more file mas01cr@0: if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) mas01cr@0: error("No more room in database","insert failed: reason database is full."); mas01cr@0: mas01cr@0: if(thisFile) mas01cr@0: if(dbH->dim==0 && dbH->length==0) // empty database mas01cr@0: read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality mas01cr@0: else { mas01cr@0: unsigned test; mas01cr@0: read(infid,&test,sizeof(unsigned)); mas01cr@0: if(dbH->dim!=test){ mas01cr@0: cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <numFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: key already exists in database:" << thisKey << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: else{ mas01cr@0: mas01mc@18: // Make a track index table of features to file indexes mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: if(!numVectors){ mas01cr@60: if(verbosity) { mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: else{ mas01cr@0: if(usingTimes){ mas01cr@0: if(timesFile->eof()) mas01cr@0: error("not enough timestamp files in timesList"); mas01cr@0: thisTimesFile=new ifstream(thisTimesFileName,ios::in); mas01cr@0: if(!thisTimesFile->is_open()) mas01cr@0: error("Cannot open timestamp file",thisTimesFileName); mas01cr@0: unsigned insertoffset=dbH->length; mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@0: assert(timesdata+numVectorsnumFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); mas01cr@0: mas01cr@0: unsigned insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Increment file count mas01cr@0: dbH->numFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: // Copy the header back to the database mas01cr@0: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@0: mas01mc@18: // Update track to file index map mas01mc@18: //memcpy (db+trackTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); mas01mc@18: memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@0: // Update the feature database mas01cr@0: memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: totalVectors+=numVectors; mas01cr@0: } mas01cr@0: } mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01mc@11: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: }while(!filesIn->eof()); mas01mc@12: mas01mc@12: // mmap the database file mas01mc@12: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01mc@12: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01cr@32: error("mmap error for creating database", "", "mmap"); mas01cr@0: mas01cr@60: if(verbosity) { mas01cr@0: cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " mas01cr@0: << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; mas01cr@60: } mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01mc@12: mas01mc@12: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: } mas01cr@0: mas01cr@76: // FIXME: this can't propagate the sequence length argument (used for mas01cr@76: // dudCount). See adb__status() definition for the other half of mas01cr@76: // this. -- CSR, 2007-10-01 mas01cr@0: void audioDB::ws_status(const char*dbName, char* hostport){ mas01cr@0: struct soap soap; mas01cr@76: adb__statusResult adbStatusResult; mas01cr@0: mas01cr@0: // Query an existing adb database mas01cr@0: soap_init(&soap); mas01cr@76: if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK) { mas01cr@76: cout << "numFiles = " << adbStatusResult.numFiles << endl; mas01cr@76: cout << "dim = " << adbStatusResult.dim << endl; mas01cr@76: cout << "length = " << adbStatusResult.length << endl; mas01cr@76: cout << "dudCount = " << adbStatusResult.dudCount << endl; mas01cr@76: cout << "nullCount = " << adbStatusResult.nullCount << endl; mas01cr@76: cout << "flags = " << adbStatusResult.flags << endl; mas01cr@76: } else { mas01cr@0: soap_print_fault(&soap,stderr); mas01cr@76: } mas01cr@0: mas01cr@0: soap_destroy(&soap); mas01cr@0: soap_end(&soap); mas01cr@0: soap_done(&soap); mas01cr@0: } mas01cr@0: mas01mc@18: void audioDB::ws_query(const char*dbName, const char *trackKey, const char* hostport){ mas01cr@0: struct soap soap; mas01cr@0: adb__queryResult adbQueryResult; mas01cr@0: mas01cr@0: soap_init(&soap); mas01cr@0: if(soap_call_adb__query(&soap,hostport,NULL, mas01mc@18: (char*)dbName,(char*)trackKey,(char*)trackFileName,(char*)timesFileName, mas01mc@18: queryType, queryPoint, pointNN, trackNN, sequenceLength, adbQueryResult)==SOAP_OK){ mas01cr@0: //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; mas01cr@0: for(int i=0; inumFiles; k++){ mas01mc@18: if(trackTable[k]numFiles << endl; mas01cr@76: cout << "data dim:" << dbH->dim <dim>0){ mas01cr@76: cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<length))/(sizeof(double)*dbH->dim) << endl; mas01cr@76: } mas01cr@76: cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl; mas01cr@76: cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" << mas01cr@76: (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl; mas01cr@76: cout << "flags:" << dbH->flags << endl; mas01cr@76: mas01cr@76: cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << endl; mas01cr@76: } else { mas01cr@76: adbStatusResult->numFiles = dbH->numFiles; mas01cr@76: adbStatusResult->dim = dbH->dim; mas01cr@76: adbStatusResult->length = dbH->length; mas01cr@76: adbStatusResult->dudCount = dudCount; mas01cr@76: adbStatusResult->nullCount = nullCount; mas01cr@76: adbStatusResult->flags = dbH->flags; mas01cr@76: } mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::dump(const char* dbName){ mas01cr@0: if(!dbH) mas01cr@27: initTables(dbName, 0, 0); mas01cr@0: mas01mc@17: for(unsigned k=0, j=0; knumFiles; k++){ mas01mc@18: cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; mas01mc@18: j+=trackTable[k]; mas01mc@17: } mas01cr@0: mas01cr@0: status(dbName); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::l2norm(const char* dbName){ mas01cr@50: initTables(dbName, true, 0); mas01cr@0: if(dbH->length>0){ mas01cr@0: unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@0: unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append mas01cr@0: } mas01cr@0: // Update database flags mas01cr@0: dbH->flags = dbH->flags|O2_FLAG_L2NORM; mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: mas01cr@0: void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: switch(queryType){ mas01cr@0: case O2_FLAG_POINT_QUERY: mas01cr@0: pointQuery(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01cr@0: case O2_FLAG_SEQUENCE_QUERY: mas01mc@17: if(radius==0) mas01mc@20: trackSequenceQueryNN(dbName, inFile, adbQueryResult); mas01mc@17: else mas01mc@20: trackSequenceQueryRad(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01mc@18: case O2_FLAG_TRACK_QUERY: mas01mc@18: trackPointQuery(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01cr@0: default: mas01cr@0: error("unrecognized queryType in query()"); mas01cr@0: mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: //return ordinal position of key in keyTable mas01cr@0: unsigned audioDB::getKeyPos(char* key){ mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) mas01cr@0: return k; mas01cr@0: error("Key not found",key); mas01cr@0: return O2_ERR_KEYNOTFOUND; mas01cr@0: } mas01cr@0: mas01cr@0: // Basic point query engine mas01cr@0: void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: mas01cr@27: initTables(dbName, 0, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: for(unsigned k=0; klength/(dbH->dim*sizeof(double)); mas01cr@0: double meanQdur = 0; mas01cr@0: double* timesdata = 0; mas01cr@0: double* dbdurs = 0; mas01cr@0: mas01cr@0: if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: j=1; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: while(j--){ // query mas01cr@0: data=dataBuf; mas01cr@0: k=totalVecs; // number of database vectors mas01cr@0: while(k--){ // database mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01cr@0: && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l >= n+1 ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01cr@0: sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv2, NULL); mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@60: } mas01cr@0: mas01cr@0: if(adbQueryResult==0){ mas01cr@0: // Output answer mas01cr@0: // Loop over nearest neighbours mas01cr@0: for(k=0; k < pointNN; k++){ mas01cr@0: // Scan for key mas01mc@18: unsigned cumTrack=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01mc@18: cumTrack+=trackTable[l]; mas01mc@18: if(sIndexes[k]__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@86: adbQueryResult->Qpos = new unsigned int[listLen]; mas01cr@86: adbQueryResult->Spos = new unsigned int[listLen]; mas01cr@59: for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@0: adbQueryResult->Dist[k]=distances[k]; mas01cr@0: adbQueryResult->Qpos[k]=qIndexes[k]; mas01mc@18: unsigned cumTrack=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01mc@18: cumTrack+=trackTable[l]; mas01mc@18: if(sIndexes[k]Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01mc@18: adbQueryResult->Spos[k]=sIndexes[k]+trackTable[l]-cumTrack; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Clean up mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(dbdurs) mas01cr@0: delete dbdurs; mas01cr@0: } mas01cr@0: mas01mc@18: // trackPointQuery mas01mc@18: // return the trackNN closest tracks to the query track mas01mc@18: // uses average of pointNN points per track mas01mc@18: void audioDB::trackPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@27: initTables(dbName, 0, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: unsigned j=numVectors; // number of query points mas01mc@18: unsigned k,l,n, track, trackOffset=0, processedTracks=0; mas01cr@0: double thisDist; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumFiles]; mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: } mas01cr@0: mas01mc@18: // build track offset table mas01mc@18: unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01mc@18: unsigned trackIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey[MAXSTR]; mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: mas01mc@18: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01mc@18: if(trackFile){ mas01mc@18: if(!trackFile->eof()){ mas01mc@18: trackFile->getline(nextKey,MAXSTR); mas01mc@18: track=getKeyPos(nextKey); mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01mc@18: trackOffset=trackOffsetTable[track]; // numDoubles offset mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackOffset/(dbH->dim) << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@0: mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; mas01cr@0: else mas01cr@0: usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); mas01cr@0: if(usingQueryPoint) mas01cr@0: j=1; mas01cr@0: else mas01cr@0: j=numVectors; mas01cr@0: while(j--){ mas01mc@18: k=trackTable[track]; // number of vectors in track mas01mc@18: data=dataBuf+trackOffset; // data for track mas01cr@0: while(k--){ mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l > n ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01mc@18: sIndexes[n]=trackTable[track]-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01mc@18: } // track mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } // query mas01mc@18: // Take the average of this track's distance mas01mc@18: // Test the track distances mas01cr@0: thisDist=0; mas01cr@66: for (n = 0; n < pointNN; n++) { mas01cr@66: if (distances[n] == -DBL_MAX) break; mas01cr@66: thisDist += distances[n]; mas01cr@66: } mas01cr@66: thisDist /= n; mas01cr@66: mas01mc@18: n=trackNN; mas01cr@0: while(n--){ mas01mc@18: if(thisDist>=trackDistances[n]){ mas01mc@18: if((n==0 || thisDist<=trackDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@74: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01cr@0: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: trackIDs[n]=track; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: for(unsigned k=0; k1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks mas01cr@0: << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@60: } mas01cr@0: mas01cr@0: if(adbQueryResult==0){ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr<__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@86: adbQueryResult->Qpos = new unsigned int[listLen]; mas01cr@86: adbQueryResult->Spos = new unsigned int[listLen]; mas01cr@59: for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01mc@18: adbQueryResult->Dist[k]=trackDistances[k]; mas01mc@18: adbQueryResult->Qpos[k]=trackQIndexes[k]; mas01mc@18: adbQueryResult->Spos[k]=trackSIndexes[k]; mas01mc@18: sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@18: delete trackOffsetTable; mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(meanDBdur) mas01cr@0: delete meanDBdur; mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: mas01mc@20: // k nearest-neighbor (k-NN) search between query and target tracks mas01mc@20: // efficient implementation based on matched filter mas01mc@20: // assumes normed shingles mas01mc@20: // outputs distances of retrieved shingles, max retreived = pointNN shingles per per track mas01mc@20: void audioDB::trackSequenceQueryNN(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: mas01cr@27: initTables(dbName, 0, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: double qMeanL2; mas01cr@0: double* sMeanL2; mas01cr@0: mas01cr@0: unsigned USE_THRESH=0; mas01cr@0: double SILENCE_THRESH=0; mas01cr@0: double DIFF_THRESH=0; mas01cr@0: mas01cr@0: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01cr@55: error("Database must be L2 normed for sequence query","use -L2NORM"); mas01cr@55: mas01cr@55: if(numVectors1) { mas01cr@0: cerr << "performing norms ... "; cerr.flush(); mas01cr@60: } mas01cr@0: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01mc@20: mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: sNorm = new double[dbVectors]; mas01cr@0: sMeanL2=new double[dbH->numFiles]; mas01cr@0: assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01mc@20: mas01cr@0: // Make norm measurements relative to sequenceLength mas01cr@0: unsigned w = sequenceLength-1; mas01cr@0: unsigned i,j; mas01cr@0: double* ps; mas01cr@0: double tmp1,tmp2; mas01mc@20: mas01cr@0: // Copy the L2 norm values to core to avoid disk random access later on mas01cr@0: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01cr@0: double* snPtr = sNorm; mas01cr@0: for(i=0; inumFiles; i++){ mas01mc@20: if(trackTable[i]>=sequenceLength){ mas01cr@0: tmp1=*snPtr; mas01cr@0: j=1; mas01cr@0: w=sequenceLength-1; mas01cr@0: while(w--) mas01cr@0: *snPtr+=snPtr[j++]; mas01cr@0: ps = snPtr+1; mas01mc@18: w=trackTable[i]-sequenceLength; // +1 - 1 mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01mc@20: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01cr@0: tmp1=tmp2; mas01cr@0: ps++; mas01cr@0: } mas01mc@20: ps = snPtr; mas01mc@20: w=trackTable[i]-sequenceLength+1; mas01mc@20: while(w--){ mas01mc@20: *ps=sqrt(*ps); mas01mc@20: ps++; mas01mc@20: } mas01cr@0: } mas01mc@18: snPtr+=trackTable[i]; mas01cr@0: } mas01cr@0: mas01cr@0: double* pn = sMeanL2; mas01cr@0: w=dbH->numFiles; mas01cr@0: while(w--) mas01cr@0: *pn++=0.0; mas01cr@0: ps=sNorm; mas01mc@18: unsigned processedTracks=0; mas01cr@0: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>sequenceLength-1){ mas01cr@57: w = trackTable[i]-sequenceLength+1; mas01cr@0: pn = sMeanL2+i; mas01mc@20: *pn=0; mas01cr@0: while(w--) mas01mc@20: if(*ps>0) mas01mc@20: *pn+=*ps++; mas01cr@57: *pn/=trackTable[i]-sequenceLength+1; mas01cr@0: SILENCE_THRESH+=*pn; mas01mc@18: processedTracks++; mas01cr@0: } mas01mc@18: ps = sNorm + trackTable[i]; mas01cr@0: } mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "processedTracks: " << processedTracks << endl; mas01cr@60: } mas01mc@20: mas01mc@18: SILENCE_THRESH/=processedTracks; mas01cr@0: USE_THRESH=1; // Turn thresholding on mas01mc@20: DIFF_THRESH=SILENCE_THRESH; // mean shingle power mas01mc@20: SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE mas01cr@60: if(verbosity>4) { mas01mc@20: cerr << "silence thresh: " << SILENCE_THRESH; mas01cr@60: } mas01cr@0: w=sequenceLength-1; mas01cr@0: i=1; mas01cr@0: tmp1=*qNorm; mas01cr@0: while(w--) mas01cr@0: *qNorm+=qNorm[i++]; mas01cr@0: ps = qNorm+1; mas01mc@20: w=numVectors-sequenceLength; // +1 -1 mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01mc@20: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01cr@0: tmp1=tmp2; mas01mc@20: ps++; mas01mc@20: } mas01mc@20: ps = qNorm; mas01mc@20: qMeanL2 = 0; mas01mc@20: w=numVectors-sequenceLength+1; mas01mc@20: while(w--){ mas01mc@20: *ps=sqrt(*ps); mas01mc@20: qMeanL2+=*ps++; mas01cr@0: } mas01cr@0: qMeanL2 /= numVectors-sequenceLength+1; mas01mc@20: mas01cr@60: if(verbosity>1) { mas01cr@60: cerr << "done." << endl; mas01cr@60: } mas01cr@0: mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "matching tracks..." << endl; mas01cr@60: } mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: mas01mc@18: unsigned k,l,m,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01cr@0: double thisDist; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: assert(timesdata); mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; k1) { mas01cr@0: cerr << "mean query file duration: " << meanQdur << endl; mas01cr@60: } mas01cr@0: meanDBdur = new double[dbH->numFiles]; mas01cr@0: assert(meanDBdur); mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01cr@0: error("queryPoint > numVectors-wL+1 in query"); mas01cr@0: else{ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: qNorm=qNorm+queryPoint; mas01cr@0: numVectors=wL; mas01cr@0: } mas01cr@0: mas01mc@20: double ** D = 0; // Differences query and target mas01cr@0: double ** DD = 0; // Matched filter distance mas01cr@0: mas01cr@0: D = new double*[numVectors]; mas01cr@0: assert(D); mas01cr@0: DD = new double*[numVectors]; mas01cr@0: assert(DD); mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01mc@18: processedTracks=0; mas01mc@18: unsigned successfulTracks=0; mas01cr@0: mas01cr@0: double* qp; mas01cr@0: double* sp; mas01cr@0: double* dp; mas01cr@0: mas01mc@18: // build track offset table mas01mc@18: unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01mc@18: unsigned trackIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey [MAXSTR]; mas01mc@20: mas01mc@20: // chi^2 statistics mas01mc@20: double sampleCount = 0; mas01mc@20: double sampleSum = 0; mas01mc@20: double logSampleSum = 0; mas01mc@20: double minSample = 1e9; mas01mc@20: double maxSample = 0; mas01mc@20: mas01mc@20: // Track loop mas01mc@18: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01cr@0: mas01mc@18: // get trackID from file if using a control file mas01mc@18: if(trackFile){ mas01mc@18: if(!trackFile->eof()){ mas01mc@18: trackFile->getline(nextKey,MAXSTR); mas01mc@18: track=getKeyPos(nextKey); mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01mc@12: mas01mc@18: trackOffset=trackOffsetTable[track]; // numDoubles offset mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01cr@0: mas01cr@57: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01cr@0: mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@0: mas01mc@20: // Sum products matrix mas01cr@0: for(j=0; jdim; mas01mc@18: sp=dataBuf+trackOffset+k*dbH->dim; mas01cr@0: DD[j][k]=0.0; // Initialize matched filter array mas01cr@0: dp=&D[j][k]; // point to correlation cell j,k mas01cr@0: *dp=0.0; // initialize correlation cell mas01cr@0: l=dbH->dim; // size of vectors mas01cr@0: while(l--) mas01cr@0: *dp+=*qp++**sp++; mas01cr@0: } mas01cr@0: mas01cr@0: // Matched Filter mas01cr@0: // HOP SIZE == 1 mas01cr@0: double* spd; mas01cr@0: if(HOP_SIZE==1){ // HOP_SIZE = shingleHop mas01cr@0: for(w=0; w3 && usingTimes) { mas01mc@18: cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01cr@0: cerr << "within duration tolerance." << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: // Search for minimum distance by shingles (concatenated vectors) mas01cr@53: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@53: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01mc@20: thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@60: if(verbosity>10) { mas01mc@20: cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; mas01cr@60: } mas01mc@20: // Gather chi^2 statistics mas01mc@20: if(thisDistmaxSample) mas01mc@20: maxSample=thisDist; mas01mc@20: if(thisDist>1e-9){ mas01mc@20: sampleCount++; mas01mc@20: sampleSum+=thisDist; mas01mc@20: logSampleSum+=log(thisDist); mas01mc@20: } mas01mc@20: mas01mc@20: // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]); mas01cr@0: // Power test mas01cr@0: if(!USE_THRESH || mas01cr@0: // Threshold on mean L2 of Q and S sequences mas01mc@20: (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && mas01cr@0: // Are both query and target windows above mean energy? mas01mc@20: (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) mas01mc@20: thisDist=thisDist; // Computed above mas01cr@0: else mas01mc@20: thisDist=1000000.0; mas01mc@20: mas01mc@20: // k-NN match algorithm mas01cr@58: m=pointNN; mas01mc@20: while(m--){ mas01mc@20: if(thisDist<=distances[m]) mas01mc@20: if(m==0 || thisDist>=distances[m-1]){ mas01cr@0: // Shuffle distances up the list mas01cr@0: for(l=pointNN-1; l>m; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[m]=thisDist; mas01cr@0: if(usingQueryPoint) mas01cr@0: qIndexes[m]=queryPoint; mas01cr@0: else mas01cr@0: qIndexes[m]=j; mas01cr@0: sIndexes[m]=k; mas01cr@0: break; mas01mc@20: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Calculate the mean of the N-Best matches mas01cr@0: thisDist=0.0; mas01cr@53: for(m=0; m3) { mas01mc@18: cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; mas01cr@60: } mas01mc@12: mas01mc@20: mas01mc@18: // All the track stuff goes here mas01cr@58: n=trackNN; mas01cr@0: while(n--){ mas01mc@20: if(thisDist<=trackDistances[n]){ mas01mc@20: if((n==0 || thisDist>=trackDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01mc@18: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01cr@0: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: successfulTracks++; mas01mc@18: trackIDs[n]=track; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } // Duration match mas01mc@20: mas01mc@18: // Clean up current track mas01cr@0: if(D!=NULL){ mas01cr@0: for(j=0; j1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01cr@0: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01mc@20: cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01mc@20: << " minSample: " << minSample << " maxSample: " << maxSample << endl; mas01mc@20: } mas01cr@0: if(adbQueryResult==0){ mas01cr@60: if(verbosity>1) { mas01cr@0: cerr<__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@86: adbQueryResult->Qpos = new unsigned int[listLen]; mas01cr@86: adbQueryResult->Spos = new unsigned int[listLen]; mas01cr@59: for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01mc@18: adbQueryResult->Dist[k]=trackDistances[k]; mas01mc@18: adbQueryResult->Qpos[k]=trackQIndexes[k]; mas01mc@18: adbQueryResult->Spos[k]=trackSIndexes[k]; mas01mc@18: sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@20: delete[] trackOffsetTable; mas01cr@0: if(queryCopy) mas01mc@20: delete[] queryCopy; mas01cr@0: //if(qNorm) mas01cr@0: //delete qNorm; mas01cr@0: if(D) mas01cr@0: delete[] D; mas01cr@0: if(DD) mas01cr@0: delete[] DD; mas01cr@0: if(timesdata) mas01mc@20: delete[] timesdata; mas01cr@0: if(meanDBdur) mas01mc@20: delete[] meanDBdur; mas01cr@0: mas01cr@0: mas01cr@0: } mas01cr@0: mas01mc@20: // Radius search between query and target tracks mas01mc@20: // efficient implementation based on matched filter mas01mc@20: // assumes normed shingles mas01mc@20: // outputs count of retrieved shingles, max retreived = one shingle per query shingle per track mas01mc@20: void audioDB::trackSequenceQueryRad(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01mc@17: mas01cr@27: initTables(dbName, 0, inFile); mas01mc@17: mas01mc@17: // For each input vector, find the closest pointNN matching output vectors and report mas01mc@17: // we use stdout in this stub version mas01mc@17: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01mc@17: double* query = (double*)(indata+sizeof(int)); mas01mc@17: double* queryCopy = 0; mas01mc@17: mas01mc@17: double qMeanL2; mas01mc@17: double* sMeanL2; mas01mc@17: mas01mc@17: unsigned USE_THRESH=0; mas01mc@17: double SILENCE_THRESH=0; mas01mc@17: double DIFF_THRESH=0; mas01mc@17: mas01mc@17: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01mc@17: error("Database must be L2 normed for sequence query","use -l2norm"); mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "performing norms ... "; cerr.flush(); mas01cr@60: } mas01mc@17: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01mc@18: mas01mc@17: // Make a copy of the query mas01mc@17: queryCopy = new double[numVectors*dbH->dim]; mas01mc@17: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01mc@17: qNorm = new double[numVectors]; mas01mc@17: sNorm = new double[dbVectors]; mas01mc@17: sMeanL2=new double[dbH->numFiles]; mas01mc@17: assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); mas01mc@17: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01mc@17: query = queryCopy; mas01mc@18: mas01mc@17: // Make norm measurements relative to sequenceLength mas01mc@17: unsigned w = sequenceLength-1; mas01mc@17: unsigned i,j; mas01mc@17: double* ps; mas01mc@17: double tmp1,tmp2; mas01mc@18: mas01mc@17: // Copy the L2 norm values to core to avoid disk random access later on mas01mc@17: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01mc@17: double* snPtr = sNorm; mas01mc@17: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>=sequenceLength){ mas01mc@17: tmp1=*snPtr; mas01mc@17: j=1; mas01mc@17: w=sequenceLength-1; mas01mc@17: while(w--) mas01mc@17: *snPtr+=snPtr[j++]; mas01mc@17: ps = snPtr+1; mas01mc@18: w=trackTable[i]-sequenceLength; // +1 - 1 mas01mc@17: while(w--){ mas01mc@17: tmp2=*ps; mas01mc@17: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01mc@17: tmp1=tmp2; mas01mc@17: ps++; mas01mc@17: } mas01mc@17: ps = snPtr; mas01mc@18: w=trackTable[i]-sequenceLength+1; mas01mc@17: while(w--){ mas01mc@17: *ps=sqrt(*ps); mas01mc@17: ps++; mas01mc@17: } mas01mc@17: } mas01mc@18: snPtr+=trackTable[i]; mas01mc@17: } mas01mc@17: mas01mc@17: double* pn = sMeanL2; mas01mc@17: w=dbH->numFiles; mas01mc@17: while(w--) mas01mc@17: *pn++=0.0; mas01mc@17: ps=sNorm; mas01mc@18: unsigned processedTracks=0; mas01mc@17: for(i=0; inumFiles; i++){ mas01mc@18: if(trackTable[i]>sequenceLength-1){ mas01cr@70: w = trackTable[i]-sequenceLength+1; mas01mc@17: pn = sMeanL2+i; mas01mc@17: *pn=0; mas01mc@17: while(w--) mas01mc@17: if(*ps>0) mas01mc@17: *pn+=*ps++; mas01cr@70: *pn/=trackTable[i]-sequenceLength+1; mas01mc@17: SILENCE_THRESH+=*pn; mas01mc@18: processedTracks++; mas01mc@17: } mas01mc@18: ps = sNorm + trackTable[i]; mas01mc@17: } mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "processedTracks: " << processedTracks << endl; mas01cr@60: } mas01mc@17: mas01mc@18: SILENCE_THRESH/=processedTracks; mas01mc@17: USE_THRESH=1; // Turn thresholding on mas01mc@18: DIFF_THRESH=SILENCE_THRESH; // mean shingle power mas01mc@17: SILENCE_THRESH/=5; // 20% of the mean shingle power is SILENCE mas01cr@60: if(verbosity>4) { mas01mc@17: cerr << "silence thresh: " << SILENCE_THRESH; mas01cr@60: } mas01mc@17: w=sequenceLength-1; mas01mc@17: i=1; mas01mc@17: tmp1=*qNorm; mas01mc@17: while(w--) mas01mc@17: *qNorm+=qNorm[i++]; mas01mc@17: ps = qNorm+1; mas01mc@17: w=numVectors-sequenceLength; // +1 -1 mas01mc@17: while(w--){ mas01mc@17: tmp2=*ps; mas01mc@17: *ps=*(ps-1)-tmp1+*(ps+sequenceLength-1); mas01mc@17: tmp1=tmp2; mas01mc@17: ps++; mas01mc@17: } mas01mc@17: ps = qNorm; mas01mc@17: qMeanL2 = 0; mas01mc@17: w=numVectors-sequenceLength+1; mas01mc@17: while(w--){ mas01mc@17: *ps=sqrt(*ps); mas01mc@17: qMeanL2+=*ps++; mas01mc@17: } mas01mc@17: qMeanL2 /= numVectors-sequenceLength+1; mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "done." << endl; mas01cr@60: } mas01mc@17: mas01cr@60: if(verbosity>1) { mas01mc@18: cerr << "matching tracks..." << endl; mas01cr@60: } mas01mc@17: mas01mc@17: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01mc@18: assert(trackNN>0 && trackNN<=O2_MAXNN); mas01mc@17: mas01mc@17: // Make temporary dynamic memory for results mas01mc@18: double trackDistances[trackNN]; mas01mc@18: unsigned trackIDs[trackNN]; mas01mc@18: unsigned trackQIndexes[trackNN]; mas01mc@18: unsigned trackSIndexes[trackNN]; mas01mc@17: mas01mc@17: double distances[pointNN]; mas01mc@17: unsigned qIndexes[pointNN]; mas01mc@17: unsigned sIndexes[pointNN]; mas01mc@17: mas01mc@17: mas01cr@59: unsigned k,l,n,track,trackOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01mc@17: double thisDist; mas01mc@17: mas01mc@17: for(k=0; kflags & O2_FLAG_TIMES)){ mas01mc@17: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01mc@17: usingTimes=0; mas01mc@17: } mas01mc@17: mas01mc@17: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01mc@17: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01mc@17: mas01mc@17: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01mc@17: timesdata = new double[numVectors]; mas01mc@17: assert(timesdata); mas01mc@17: insertTimeStamps(numVectors, timesFile, timesdata); mas01mc@17: // Calculate durations of points mas01mc@17: for(k=0; k1) { mas01mc@17: cerr << "mean query file duration: " << meanQdur << endl; mas01cr@60: } mas01mc@17: meanDBdur = new double[dbH->numFiles]; mas01mc@17: assert(meanDBdur); mas01mc@17: for(k=0; knumFiles; k++){ mas01mc@17: meanDBdur[k]=0.0; mas01mc@18: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01mc@17: error("queryPoint > numVectors-wL+1 in query"); mas01mc@17: else{ mas01cr@60: if(verbosity>1) { mas01mc@17: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@60: } mas01mc@17: query=query+queryPoint*dbH->dim; mas01mc@17: qNorm=qNorm+queryPoint; mas01mc@17: numVectors=wL; mas01mc@17: } mas01mc@17: mas01mc@17: double ** D = 0; // Differences query and target mas01mc@17: double ** DD = 0; // Matched filter distance mas01mc@17: mas01mc@17: D = new double*[numVectors]; mas01mc@17: assert(D); mas01mc@17: DD = new double*[numVectors]; mas01mc@17: assert(DD); mas01mc@17: mas01mc@17: gettimeofday(&tv1, NULL); mas01mc@18: processedTracks=0; mas01mc@18: unsigned successfulTracks=0; mas01mc@17: mas01mc@17: double* qp; mas01mc@17: double* sp; mas01mc@17: double* dp; mas01mc@17: mas01mc@18: // build track offset table mas01mc@18: unsigned *trackOffsetTable = new unsigned[dbH->numFiles]; mas01mc@18: unsigned cumTrack=0; mas01mc@18: unsigned trackIndexOffset; mas01mc@17: for(k=0; knumFiles;k++){ mas01mc@18: trackOffsetTable[k]=cumTrack; mas01mc@18: cumTrack+=trackTable[k]*dbH->dim; mas01mc@17: } mas01mc@17: mas01mc@17: char nextKey [MAXSTR]; mas01mc@17: mas01mc@17: // chi^2 statistics mas01mc@17: double sampleCount = 0; mas01mc@17: double sampleSum = 0; mas01mc@17: double logSampleSum = 0; mas01mc@17: double minSample = 1e9; mas01mc@17: double maxSample = 0; mas01mc@17: mas01mc@17: // Track loop mas01mc@18: for(processedTracks=0, track=0 ; processedTracks < dbH->numFiles ; track++, processedTracks++){ mas01mc@17: mas01mc@18: // get trackID from file if using a control file mas01mc@18: if(trackFile){ mas01mc@18: if(!trackFile->eof()){ mas01mc@18: trackFile->getline(nextKey,MAXSTR); mas01mc@18: track=getKeyPos(nextKey); mas01mc@17: } mas01mc@17: else mas01mc@17: break; mas01mc@17: } mas01mc@17: mas01mc@18: trackOffset=trackOffsetTable[track]; // numDoubles offset mas01mc@18: trackIndexOffset=trackOffset/dbH->dim; // numVectors offset mas01mc@17: mas01cr@70: if(sequenceLength<=trackTable[track]){ // test for short sequences mas01mc@17: mas01cr@60: if(verbosity>7) { mas01mc@18: cerr << track << "." << trackIndexOffset << "." << trackTable[track] << " | ";cerr.flush(); mas01cr@60: } mas01cr@60: mas01mc@17: // Sum products matrix mas01mc@17: for(j=0; jdim; mas01mc@18: sp=dataBuf+trackOffset+k*dbH->dim; mas01mc@17: DD[j][k]=0.0; // Initialize matched filter array mas01mc@17: dp=&D[j][k]; // point to correlation cell j,k mas01mc@17: *dp=0.0; // initialize correlation cell mas01mc@17: l=dbH->dim; // size of vectors mas01mc@17: while(l--) mas01mc@17: *dp+=*qp++**sp++; mas01mc@17: } mas01mc@17: mas01mc@17: // Matched Filter mas01mc@17: // HOP SIZE == 1 mas01mc@17: double* spd; mas01mc@17: if(HOP_SIZE==1){ // HOP_SIZE = shingleHop mas01mc@17: for(w=0; w3 && usingTimes) { mas01mc@18: cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[track] << endl; mas01mc@17: cerr.flush(); mas01mc@17: } mas01mc@17: mas01mc@17: if(!usingTimes || mas01mc@17: (usingTimes mas01mc@18: && fabs(meanDBdur[track]-meanQdur)3 && usingTimes) { mas01mc@17: cerr << "within duration tolerance." << endl; mas01mc@17: cerr.flush(); mas01mc@17: } mas01mc@17: mas01mc@17: // Search for minimum distance by shingles (concatenated vectors) mas01cr@70: for(j=0;j<=numVectors-wL;j+=HOP_SIZE) mas01cr@70: for(k=0;k<=trackTable[track]-wL;k+=HOP_SIZE){ mas01mc@18: thisDist=2-(2/(qNorm[j]*sNorm[trackIndexOffset+k]))*DD[j][k]; mas01cr@60: if(verbosity>10) { mas01mc@18: cerr << thisDist << " " << qNorm[j] << " " << sNorm[trackIndexOffset+k] << endl; mas01cr@60: } mas01mc@17: // Gather chi^2 statistics mas01mc@17: if(thisDistmaxSample) mas01mc@17: maxSample=thisDist; mas01mc@17: if(thisDist>1e-9){ mas01mc@17: sampleCount++; mas01mc@17: sampleSum+=thisDist; mas01mc@17: logSampleSum+=log(thisDist); mas01mc@17: } mas01mc@17: mas01mc@18: // diffL2 = fabs(qNorm[j] - sNorm[trackIndexOffset+k]); mas01mc@17: // Power test mas01mc@17: if(!USE_THRESH || mas01mc@17: // Threshold on mean L2 of Q and S sequences mas01mc@18: (USE_THRESH && qNorm[j]>SILENCE_THRESH && sNorm[trackIndexOffset+k]>SILENCE_THRESH && mas01mc@17: // Are both query and target windows above mean energy? mas01mc@18: (qNorm[j]>qMeanL2*.25 && sNorm[trackIndexOffset+k]>sMeanL2[track]*.25))) // && diffL2 < DIFF_THRESH ))) mas01mc@17: thisDist=thisDist; // Computed above mas01mc@17: else mas01mc@17: thisDist=1000000.0; mas01mc@17: if(thisDist>=0 && thisDist<=radius){ mas01mc@17: distances[0]++; // increment count mas01mc@18: break; // only need one track point per query point mas01mc@17: } mas01mc@17: } mas01mc@17: // How many points were below threshold ? mas01mc@17: thisDist=distances[0]; mas01mc@17: mas01mc@17: // Let's see the distances then... mas01cr@60: if(verbosity>3) { mas01mc@18: cerr << fileTable+track*O2_FILETABLESIZE << " " << thisDist << endl; mas01cr@60: } mas01mc@17: mas01mc@18: // All the track stuff goes here mas01mc@18: n=trackNN; mas01mc@17: while(n--){ mas01mc@18: if(thisDist>trackDistances[n]){ mas01mc@18: if((n==0 || thisDist<=trackDistances[n-1])){ mas01mc@17: // Copy all values above up the queue mas01mc@18: for( l=trackNN-1 ; l > n ; l--){ mas01mc@18: trackDistances[l]=trackDistances[l-1]; mas01mc@18: trackQIndexes[l]=trackQIndexes[l-1]; mas01mc@18: trackSIndexes[l]=trackSIndexes[l-1]; mas01mc@18: trackIDs[l]=trackIDs[l-1]; mas01mc@17: } mas01mc@18: trackDistances[n]=thisDist; mas01mc@18: trackQIndexes[n]=qIndexes[0]; mas01mc@18: trackSIndexes[n]=sIndexes[0]; mas01mc@18: successfulTracks++; mas01mc@18: trackIDs[n]=track; mas01mc@17: break; mas01mc@17: } mas01mc@17: } mas01mc@17: else mas01mc@17: break; mas01mc@17: } mas01mc@17: } // Duration match mas01mc@17: mas01mc@18: // Clean up current track mas01mc@17: if(D!=NULL){ mas01mc@17: for(j=0; j1) { mas01mc@18: cerr << endl << "processed tracks :" << processedTracks << " matched tracks: " << successfulTracks << " elapsed time:" mas01mc@17: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01mc@17: cerr << "sampleCount: " << sampleCount << " sampleSum: " << sampleSum << " logSampleSum: " << logSampleSum mas01mc@17: << " minSample: " << minSample << " maxSample: " << maxSample << endl; mas01mc@17: } mas01mc@17: mas01mc@17: if(adbQueryResult==0){ mas01cr@60: if(verbosity>1) { mas01mc@17: cerr<__sizeRlist=listLen; mas01mc@17: adbQueryResult->__sizeDist=listLen; mas01mc@17: adbQueryResult->__sizeQpos=listLen; mas01mc@17: adbQueryResult->__sizeSpos=listLen; mas01mc@17: adbQueryResult->Rlist= new char*[listLen]; mas01mc@17: adbQueryResult->Dist = new double[listLen]; mas01cr@86: adbQueryResult->Qpos = new unsigned int[listLen]; mas01cr@86: adbQueryResult->Spos = new unsigned int[listLen]; mas01cr@59: for(k=0; k<(unsigned)adbQueryResult->__sizeRlist; k++){ mas01mc@17: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01mc@18: adbQueryResult->Dist[k]=trackDistances[k]; mas01mc@18: adbQueryResult->Qpos[k]=trackQIndexes[k]; mas01mc@18: adbQueryResult->Spos[k]=trackSIndexes[k]; mas01mc@18: sprintf(adbQueryResult->Rlist[k], "%s", fileTable+trackIDs[k]*O2_FILETABLESIZE); mas01mc@17: } mas01mc@17: } mas01mc@17: mas01mc@17: mas01mc@17: // Clean up mas01mc@18: if(trackOffsetTable) mas01mc@18: delete[] trackOffsetTable; mas01mc@17: if(queryCopy) mas01mc@17: delete[] queryCopy; mas01mc@17: //if(qNorm) mas01mc@17: //delete qNorm; mas01mc@17: if(D) mas01mc@17: delete[] D; mas01mc@17: if(DD) mas01mc@17: delete[] DD; mas01mc@17: if(timesdata) mas01mc@17: delete[] timesdata; mas01mc@17: if(meanDBdur) mas01mc@17: delete[] meanDBdur; mas01mc@17: mas01mc@17: mas01mc@17: } mas01mc@17: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ mas01cr@0: unsigned d; mas01cr@59: double L2, *p; mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@60: } mas01cr@0: while(n--){ mas01cr@0: p=X; mas01cr@0: L2=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: L2+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01mc@17: /* L2=sqrt(L2);*/ mas01cr@0: if(qNorm) mas01cr@0: *qNorm++=L2; mas01mc@17: /* mas01cr@0: oneOverL2 = 1.0/L2; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *X*=oneOverL2; mas01cr@0: X++; mas01mc@17: */ mas01mc@17: X+=dim; mas01cr@0: } mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "done..." << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ mas01cr@0: unsigned d; mas01cr@59: double *p; mas01cr@0: unsigned nn = n; mas01cr@0: mas01cr@0: assert(l2normTable); mas01cr@0: mas01cr@0: if( !append && (dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database is already L2 normed", "automatic norm on insert is enabled"); mas01cr@0: mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@60: } mas01cr@0: mas01cr@0: double* l2buf = new double[n]; mas01cr@0: double* l2ptr = l2buf; mas01cr@0: assert(l2buf); mas01cr@0: assert(X); mas01cr@0: mas01cr@0: while(nn--){ mas01cr@0: p=X; mas01cr@0: *l2ptr=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *l2ptr+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01mc@17: l2ptr++; mas01mc@17: /* mas01mc@17: oneOverL2 = 1.0/(*l2ptr++); mas01mc@17: d=dim; mas01mc@17: while(d--){ mas01cr@0: *X*=oneOverL2; mas01cr@0: X++; mas01mc@17: } mas01mc@17: */ mas01mc@17: X+=dim; mas01cr@0: } mas01cr@0: unsigned offset; mas01cr@84: if(append) { mas01cr@84: // FIXME: a hack, a very palpable hack: the vectors have already mas01cr@84: // been inserted, and dbH->length has already been updated. We mas01cr@84: // need to subtract off again the number of vectors that we've mas01cr@84: // inserted this time... mas01cr@84: offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors mas01cr@84: } else { mas01cr@0: offset=0; mas01cr@84: } mas01cr@0: memcpy(l2normTable+offset, l2buf, n*sizeof(double)); mas01cr@0: if(l2buf) mas01mc@17: delete[] l2buf; mas01cr@60: if(verbosity>2) { mas01cr@0: cerr << "done..." << endl; mas01cr@60: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Start an audioDB server on the host mas01cr@0: void audioDB::startServer(){ mas01cr@0: struct soap soap; mas01cr@0: int m, s; // master and slave sockets mas01cr@0: soap_init(&soap); mas01cr@0: m = soap_bind(&soap, NULL, port, 100); mas01cr@0: if (m < 0) mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: else mas01cr@0: { mas01cr@0: fprintf(stderr, "Socket connection successful: master socket = %d\n", m); mas01cr@0: for (int i = 1; ; i++) mas01cr@0: { mas01cr@0: s = soap_accept(&soap); mas01cr@0: if (s < 0) mas01cr@0: { mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: break; mas01cr@0: } mas01cr@75: fprintf(stderr, "%d: accepted connection from IP=%lu.%lu.%lu.%lu socket=%d\n", i, mas01cr@0: (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); mas01cr@0: if (soap_serve(&soap) != SOAP_OK) // process RPC request mas01cr@0: soap_print_fault(&soap, stderr); // print error mas01cr@0: fprintf(stderr, "request served\n"); mas01cr@0: soap_destroy(&soap); // clean up class instances mas01cr@0: soap_end(&soap); // clean up everything and close socket mas01cr@0: } mas01cr@0: } mas01cr@0: soap_done(&soap); // close master socket and detach environment mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // web services mas01cr@0: mas01cr@0: // SERVER SIDE mas01cr@76: int adb__status(struct soap* soap, xsd__string dbName, adb__statusResult &adbStatusResult){ mas01cr@21: char* const argv[]={"audioDB",COM_STATUS,"-d",dbName}; mas01cr@21: const unsigned argc = 4; mas01cr@77: try { mas01cr@77: audioDB(argc, argv, &adbStatusResult); mas01cr@77: return SOAP_OK; mas01cr@77: } catch(char *err) { mas01cr@77: soap_receiver_fault(soap, err, ""); mas01cr@77: return SOAP_FAULT; mas01cr@77: } mas01cr@0: } mas01cr@0: mas01cr@0: // Literal translation of command line to web service mas01cr@0: mas01mc@18: int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int trackNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ mas01cr@0: char queryType[256]; mas01cr@0: for(int k=0; k<256; k++) mas01cr@0: queryType[k]='\0'; mas01cr@0: if(qType == O2_FLAG_POINT_QUERY) mas01cr@0: strncpy(queryType, "point", strlen("point")); mas01cr@0: else if (qType == O2_FLAG_SEQUENCE_QUERY) mas01cr@0: strncpy(queryType, "sequence", strlen("sequence")); mas01mc@18: else if(qType == O2_FLAG_TRACK_QUERY) mas01mc@18: strncpy(queryType,"track", strlen("track")); mas01cr@0: else mas01cr@0: strncpy(queryType, "", strlen("")); mas01cr@0: mas01cr@0: if(pointNN==0) mas01cr@0: pointNN=10; mas01mc@18: if(trackNN==0) mas01mc@18: trackNN=10; mas01cr@0: if(seqLen==0) mas01cr@0: seqLen=16; mas01cr@0: mas01cr@0: char qPosStr[256]; mas01cr@0: sprintf(qPosStr, "%d", qPos); mas01cr@0: char pointNNStr[256]; mas01cr@0: sprintf(pointNNStr,"%d",pointNN); mas01mc@18: char trackNNStr[256]; mas01mc@18: sprintf(trackNNStr,"%d",trackNN); mas01cr@0: char seqLenStr[256]; mas01cr@0: sprintf(seqLenStr,"%d",seqLen); mas01cr@0: mas01cr@0: const char* argv[] ={ mas01cr@0: "./audioDB", mas01cr@0: COM_QUERY, mas01cr@0: queryType, // Need to pass a parameter mas01cr@0: COM_DATABASE, mas01cr@0: dbName, mas01cr@0: COM_FEATURES, mas01cr@0: qKey, mas01cr@0: COM_KEYLIST, mas01cr@0: keyList==0?"":keyList, mas01cr@0: COM_TIMES, mas01cr@0: timesFileName==0?"":timesFileName, mas01cr@0: COM_QPOINT, mas01cr@0: qPosStr, mas01cr@0: COM_POINTNN, mas01cr@0: pointNNStr, mas01mc@18: COM_TRACKNN, mas01mc@18: trackNNStr, // Need to pass a parameter mas01cr@0: COM_SEQLEN, mas01cr@0: seqLenStr mas01cr@0: }; mas01cr@0: mas01cr@0: const unsigned argc = 19; mas01cr@79: try { mas01cr@79: audioDB(argc, (char* const*)argv, &adbQueryResult); mas01cr@79: return SOAP_OK; mas01cr@79: } catch (char *err) { mas01cr@79: soap_receiver_fault(soap, err, ""); mas01cr@79: return SOAP_FAULT; mas01cr@79: } mas01cr@0: } mas01cr@0: mas01cr@0: int main(const unsigned argc, char* const argv[]){ mas01cr@0: audioDB(argc, argv); mas01cr@0: }