mas01cr@0: /* audioDB.cpp mas01cr@0: mas01cr@0: audioDB version 1.0 mas01cr@0: mas01cr@0: A feature vector database management system for content-based retrieval. mas01cr@0: mas01cr@0: Usage: audioDB [OPTIONS]... mas01cr@0: mas01cr@0: --full-help Print help, including hidden options, and exit mas01cr@0: -V, --version Print version and exit mas01cr@0: -H, --help print help on audioDB usage and exit. mas01cr@0: -v, --verbosity=detail level of detail of operational information. mas01cr@0: (default=`1') mas01cr@0: mas01cr@0: Database Setup: mas01cr@0: All database operations require a database argument. mas01cr@0: mas01cr@0: Database commands are UPPER CASE. Command options are lower case. mas01cr@0: mas01cr@0: -d, --database=filename database file required by Database commands. mas01cr@0: -N, --NEW make a new (initially empty) database. mas01cr@0: -S, --STATUS output database information to stdout. mas01cr@0: -D, --DUMP output all entries: index key size. mas01cr@0: -L, --L2NORM unit norm vectors and norm all future inserts. mas01cr@0: mas01cr@0: Database Insertion: mas01cr@0: The following commands insert feature files, with optional keys and mas01cr@0: timestamps. mas01cr@0: mas01cr@0: -I, --INSERT add feature vectors to an existing database. mas01cr@0: -U, --UPDATE replace inserted vectors associated with key mas01cr@0: with new input vectors. mas01cr@0: -f, --features=filename binary series of vectors file {int sz:ieee mas01cr@0: double[][sz]:eof}. mas01cr@0: -t, --times=filename list of time points (ascii) for feature vectors. mas01cr@0: -k, --key=identifier unique identifier associated with features. mas01cr@0: mas01cr@0: -B, --BATCHINSERT add feature vectors named in a --featureList mas01cr@0: file (with optional keys in a --keyList file) mas01cr@0: to the named database. mas01cr@0: -F, --featureList=filename text file containing list of binary feature mas01cr@0: vector files to process mas01cr@0: -T, --timesList=filename text file containing list of ascii --times for mas01cr@0: each --features file in --featureList. mas01cr@0: -K, --keyList=filename text file containing list of unique identifiers mas01cr@0: associated with --features. mas01cr@0: mas01cr@0: Database Search: mas01cr@0: Thse commands control the retrieval behaviour. mas01cr@0: mas01cr@0: -Q, --QUERY=searchtype content-based search on --database using mas01cr@0: --features as a query. Optionally restrict the mas01cr@0: search to those segments identified in a mas01cr@0: --keyList. (possible values="point", mas01cr@0: "segment", "sequence") mas01cr@0: -p, --qpoint=position ordinal position of query start point in mas01cr@0: --features file. (default=`0') mas01cr@0: -e, --exhaustive exhaustive search: iterate through all query mas01cr@0: vectors in search. Overrides --qpoint. mas01cr@0: (default=off) mas01cr@0: -n, --pointnn=numpoints number of point nearest neighbours to use in mas01cr@0: retrieval. (default=`10') mas01cr@0: -R, --radius=DOUBLE radius search, returns all mas01cr@0: points/segments/sequences inside given radius. mas01cr@0: (default=`1.0') mas01cr@0: -x, --expandfactor=DOUBLE time compress/expand factor of result length to mas01cr@0: query length [1.0 .. 100.0]. (default=`1.1') mas01cr@0: -o, --rotate rotate query vectors for rotationally invariant mas01cr@0: search. (default=off) mas01cr@0: -r, --resultlength=length maximum length of the result list. mas01cr@0: (default=`10') mas01cr@0: -l, --sequencelength=length length of sequences for sequence search. mas01cr@0: (default=`16') mas01cr@0: -h, --sequencehop=hop hop size of sequence window for sequence search. mas01cr@0: (default=`1') mas01cr@0: mas01cr@0: Web Services: mas01cr@0: These commands enable the database process to establish a connection via the mas01cr@0: internet and operate as separate client and server processes. mas01cr@0: mas01cr@0: -s, --SERVER=port run as standalone web service on named port. mas01cr@0: (default=`80011') mas01cr@0: -c, --client=hostname:port run as a client using named host service. mas01cr@0: mas01cr@0: Copyright (C) 2007 Michael Casey, Goldsmiths, University of London mas01cr@0: mas01cr@0: outputs: mas01cr@0: mas01cr@0: key1 distance1 qpos1 spos1 mas01cr@0: key2 distance2 qpos2 spos2 mas01cr@0: ... mas01cr@0: keyN distanceN qposN sposN mas01cr@0: mas01cr@0: */ mas01cr@0: mas01cr@0: #include "audioDB.h" mas01cr@0: mas01cr@0: #define O2_DEBUG mas01cr@0: mas01cr@0: void audioDB::error(const char* a, const char* b){ mas01cr@0: cerr << a << ":" << b << endl; mas01cr@0: exit(1); mas01cr@0: } mas01cr@0: mas01cr@0: audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResult *adbQueryResult): mas01cr@0: dim(0), mas01cr@0: dbName(0), mas01cr@0: inFile(0), mas01cr@0: key(0), mas01cr@0: segFile(0), mas01cr@0: segFileName(0), mas01cr@0: timesFile(0), mas01cr@0: timesFileName(0), mas01cr@0: usingTimes(0), mas01cr@0: command(0), mas01cr@0: dbfid(0), mas01cr@0: db(0), mas01cr@0: dbH(0), mas01cr@0: infid(0), mas01cr@0: indata(0), mas01cr@0: queryType(O2_FLAG_POINT_QUERY), mas01cr@0: verbosity(1), mas01cr@0: pointNN(O2_DEFAULT_POINTNN), mas01cr@0: segNN(O2_DEFAULT_SEGNN), mas01cr@0: segTable(0), mas01cr@0: fileTable(0), mas01cr@0: dataBuf(0), mas01cr@0: l2normTable(0), mas01cr@0: timesTable(0), mas01cr@0: qNorm(0), mas01cr@0: sequenceLength(16), mas01cr@0: sequenceHop(1), mas01cr@0: queryPoint(0), mas01cr@0: usingQueryPoint(0), mas01cr@0: isClient(0), mas01cr@0: isServer(0), mas01cr@0: port(0), mas01mc@11: timesTol(0.1){ mas01cr@0: mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(1); mas01cr@0: } mas01cr@0: mas01cr@0: if(O2_ACTION(COM_SERVER)) mas01cr@0: startServer(); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01cr@0: if(isClient) mas01cr@0: ws_query(dbName, inFile, (char*)hostport); mas01cr@0: else mas01cr@0: query(dbName, inFile, adbQueryResult); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@0: audioDB::~audioDB(){ mas01cr@0: // Clean up mas01cr@0: if(indata) mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: if(db) mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: if(dbfid>0) mas01cr@0: close(dbfid); mas01cr@0: if(infid>0) mas01cr@0: close(infid); mas01cr@0: if(dbH) mas01cr@0: delete dbH; mas01cr@0: } mas01cr@0: mas01cr@0: int audioDB::processArgs(const unsigned argc, char* const argv[]){ mas01cr@0: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if (cmdline_parser (argc, argv, &args_info) != 0) mas01cr@0: exit(1) ; mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@0: verbosity=args_info.verbosity_arg; mas01cr@0: if(verbosity<0 || verbosity>10){ mas01cr@0: cerr << "Warning: verbosity out of range, setting to 1" << endl; mas01cr@0: verbosity=1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@0: isServer=1; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@0: if(args_info.client_given){ mas01cr@0: command=COM_CLIENT; mas01cr@0: hostport=args_info.client_arg; mas01cr@0: isClient=1; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.NEW_given){ mas01cr@0: command=COM_CREATE; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.STATUS_given){ mas01cr@0: command=COM_STATUS; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.DUMP_given){ mas01cr@0: command=COM_DUMP; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.L2NORM_given){ mas01cr@0: command=COM_L2NORM; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.INSERT_given){ mas01cr@0: command=COM_INSERT; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.features_arg; mas01cr@0: if(args_info.key_given) mas01cr@0: key=args_info.key_arg; mas01cr@0: if(args_info.times_given){ mas01cr@0: timesFileName=args_info.times_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open times file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: return 0; mas01cr@0: } mas01mc@10: mas01cr@0: if(args_info.BATCHINSERT_given){ mas01cr@0: command=COM_BATCHINSERT; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.featureList_arg; mas01cr@0: if(args_info.keyList_given) mas01cr@0: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@0: mas01cr@0: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01cr@0: segFileName=args_info.keyList_arg; mas01cr@0: if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) mas01cr@0: error("Could not open keyList file for reading",segFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@0: */ mas01cr@0: mas01cr@0: if(args_info.timesList_given){ mas01cr@0: timesFileName=args_info.timesList_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open timesList file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // Query command and arguments mas01cr@0: if(args_info.QUERY_given){ mas01cr@0: command=COM_QUERY; mas01cr@0: dbName=args_info.database_arg; mas01cr@0: inFile=args_info.features_arg; mas01cr@0: mas01cr@0: if(args_info.keyList_given){ mas01cr@0: segFileName=args_info.keyList_arg; mas01cr@0: if(strlen(segFileName)>0 && !(segFile = new ifstream(segFileName,ios::in))) mas01cr@0: error("Could not open keyList file for reading",segFileName); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.times_given){ mas01cr@0: timesFileName=args_info.times_arg; mas01cr@0: if(strlen(timesFileName)>0){ mas01cr@0: if(!(timesFile = new ifstream(timesFileName,ios::in))) mas01cr@0: error("Could not open times file for reading", timesFileName); mas01cr@0: usingTimes=1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // query type mas01cr@0: if(strncmp(args_info.QUERY_arg, "segment", MAXSTR)==0) mas01cr@0: queryType=O2_FLAG_SEG_QUERY; mas01cr@0: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@0: queryType=O2_FLAG_POINT_QUERY; mas01cr@0: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@0: queryType=O2_FLAG_SEQUENCE_QUERY; mas01cr@0: else mas01cr@0: error("unsupported query type",args_info.QUERY_arg); mas01cr@0: mas01cr@0: if(!args_info.exhaustive_flag){ mas01cr@0: queryPoint = args_info.qpoint_arg; mas01cr@0: usingQueryPoint=1; mas01cr@0: if(queryPoint<0 || queryPoint >10000) mas01cr@0: error("queryPoint out of range: 0 <= queryPoint <= 10000"); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: pointNN=args_info.pointnn_arg; mas01cr@0: if(pointNN<1 || pointNN >1000) mas01cr@0: error("pointNN out of range: 1 <= pointNN <= 1000"); mas01cr@0: mas01cr@0: mas01cr@0: mas01cr@0: segNN=args_info.resultlength_arg; mas01cr@0: if(segNN<1 || segNN >1000) mas01cr@0: error("resultlength out of range: 1 <= resultlength <= 1000"); mas01cr@0: mas01cr@0: mas01cr@0: sequenceLength=args_info.sequencelength_arg; mas01cr@0: if(sequenceLength<1 || sequenceLength >1000) mas01cr@0: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@0: mas01cr@0: sequenceHop=args_info.sequencehop_arg; mas01cr@0: if(sequenceHop<1 || sequenceHop >1000) mas01cr@0: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01cr@0: mas01cr@0: return 0; mas01cr@0: } mas01cr@0: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@0: /* Make a new database mas01cr@0: mas01cr@0: The database consists of: mas01cr@0: mas01cr@0: header mas01cr@0: --------------------------------------------------------------------------------- mas01cr@0: | magic 4 bytes| numFiles 4 bytes | dim 4 bytes | length 4 bytes |flags 4 bytes | mas01cr@0: --------------------------------------------------------------------------------- mas01cr@0: mas01cr@0: mas01cr@0: keyTable : list of keys of segments mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | key 256 bytes | mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: O2_MAXFILES*02_FILENAMELENGTH mas01cr@0: mas01cr@0: segTable : Maps implicit feature index to a feature vector matrix mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | numVectors (4 bytes) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(INT) mas01cr@0: mas01cr@0: featureTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | v1 v2 v3 ... vd (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: O2_MAXFILES * 02_MEANNUMFEATURES * DIM * sizeof(DOUBLE) mas01cr@0: mas01cr@0: timesTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | timestamp (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) mas01cr@0: mas01cr@0: l2normTable mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: | nm (double) | mas01cr@0: -------------------------------------------------------------------------- mas01cr@0: O2_MAXFILES * 02_MEANNUMFEATURES * sizeof(DOUBLE) mas01cr@0: mas01cr@0: */ mas01cr@0: mas01cr@0: void audioDB::create(const char* dbName){ mas01cr@8: if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) mas01cr@8: error("Can't open database file", dbName); mas01cr@0: mas01cr@0: // go to the location corresponding to the last byte mas01cr@0: if (lseek (dbfid, O2_DEFAULTDBSIZE - 1, SEEK_SET) == -1) mas01cr@0: error("lseek error in db file"); mas01cr@0: mas01cr@0: // write a dummy byte at the last location mas01cr@0: if (write (dbfid, "", 1) != 1) mas01cr@0: error("write error"); mas01cr@0: mas01cr@0: // mmap the output file mas01cr@0: if(verbosity) mas01cr@0: cerr << "header size:" << O2_HEADERSIZE << endl; mas01cr@0: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01cr@0: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01cr@0: error("mmap error for creating database"); mas01cr@0: mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: // Initialize header mas01cr@0: dbH->magic=O2_MAGIC; mas01cr@0: dbH->numFiles=0; mas01cr@0: dbH->length=0; mas01cr@0: dbH->dim=0; mas01cr@0: dbH->flags=0; //O2_FLAG_L2NORM; mas01cr@0: mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: if(verbosity) mas01cr@0: cerr << COM_CREATE << " " << dbName << endl; mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: void audioDB::drop(){ mas01cr@0: mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: // initTables - memory map files passed as arguments mas01cr@0: // Precondition: database has already been created mas01cr@0: void audioDB::initTables(const char* dbName, const char* inFile=0){ mas01cr@0: if ((dbfid = open (dbName, O_RDWR)) < 0) mas01cr@0: error("Can't open database file:", dbName); mas01cr@0: mas01cr@0: // open the input file mas01cr@0: if (inFile && (infid = open (inFile, O_RDONLY)) < 0) mas01cr@9: error("can't open input file for reading", inFile); mas01cr@0: mas01cr@0: // find size of input file mas01cr@0: if (inFile && fstat (infid,&statbuf) < 0) mas01cr@0: error("fstat error finding size of input"); mas01cr@0: mas01cr@0: // Get the database header info mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) mas01cr@0: error("error reading db header"); mas01cr@0: mas01cr@0: fileTableOffset = O2_HEADERSIZE; mas01cr@0: segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; mas01cr@0: dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; mas01cr@0: l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: mas01cr@0: if(dbH->magic!=O2_MAGIC){ mas01cr@0: cerr << "expected: " << O2_MAGIC << ", got:" << dbH->magic << endl; mas01cr@0: error("database file has incorrect header",dbName); mas01cr@0: } mas01cr@0: mas01cr@0: if(inFile) mas01cr@0: if(dbH->dim==0 && dbH->length==0) // empty database mas01cr@0: read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality mas01cr@0: else { mas01cr@0: unsigned test; mas01cr@0: read(infid,&test,sizeof(unsigned)); mas01cr@0: if(dbH->dim!=test){ mas01cr@0: cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@0: // Check that there is room for at least 1 more file mas01cr@0: if((char*)timesTable<((char*)dataBuf+dbH->length+statbuf.st_size-sizeof(int))) mas01cr@0: error("No more room in database","insert failed: reason database is full."); mas01cr@0: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: // Linear scan of filenames check for pre-existing feature mas01cr@0: unsigned alreadyInserted=0; mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@0: if(verbosity) mas01cr@0: cerr << "Warning: key already exists in database, ignoring: " <dim); mas01cr@0: if(!numVectors){ mas01cr@0: if(verbosity) mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << key << endl; mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: close(infid); mas01cr@0: return; mas01cr@0: } mas01cr@0: mas01cr@0: strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key)); mas01cr@0: mas01cr@0: unsigned insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Check times status and insert times from file mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@0: assert(timesdata+numVectorsnumFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Copy the header back to the database mas01cr@0: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@0: mas01cr@0: // Update segment to file index map mas01cr@0: //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); mas01cr@0: memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@0: // Update the feature database mas01cr@0: memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01cr@0: if(verbosity) mas01cr@0: cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors " mas01cr@0: << (statbuf.st_size-sizeof(int)) << " bytes." << endl; mas01cr@0: mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::insertTimeStamps(unsigned numVectors, ifstream* timesFile, double* timesdata){ mas01cr@0: unsigned numtimes=0; mas01cr@0: if(usingTimes){ mas01cr@0: if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) mas01cr@0: dbH->flags=dbH->flags|O2_FLAG_TIMES; mas01cr@0: else if(!(dbH->flags&O2_FLAG_TIMES)){ mas01cr@0: cerr << "Warning: timestamp file used with non time-stamped database: ignoring timestamps" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: if(!timesFile->is_open()){ mas01cr@0: if(dbH->flags & O2_FLAG_TIMES){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: error("problem opening times file on timestamped database",timesFileName); mas01cr@0: } mas01cr@0: else{ mas01cr@0: cerr << "Warning: problem opening times file. But non-timestamped database, so ignoring times file." << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Process time file mas01cr@0: if(usingTimes){ mas01cr@0: do{ mas01cr@0: *timesFile>>*timesdata++; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof() && numtimeseof()){ mas01cr@0: double dummy; mas01cr@0: do{ mas01cr@0: *timesFile>>dummy; mas01cr@0: if(timesFile->eof()) mas01cr@0: break; mas01cr@0: numtimes++; mas01cr@0: }while(!timesFile->eof()); mas01cr@0: } mas01cr@0: if(numtimesnumVectors+2){ mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: close(infid); mas01cr@0: cerr << "expected " << numVectors << " found " << numtimes << endl; mas01cr@0: error("Times file is incorrect length for features file",inFile); mas01cr@0: } mas01cr@0: if(verbosity>2) mas01cr@0: cerr << "numtimes: " << numtimes << endl; mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::batchinsert(const char* dbName, const char* inFile){ mas01cr@0: mas01cr@0: if ((dbfid = open (dbName, O_RDWR)) < 0) mas01cr@0: error("Can't open database file:", dbName); mas01cr@0: mas01cr@0: if(!key) mas01cr@0: key=inFile; mas01cr@0: ifstream *filesIn = 0; mas01cr@0: ifstream *keysIn = 0; mas01cr@0: ifstream* thisTimesFile = 0; mas01cr@0: mas01cr@0: if(!(filesIn = new ifstream(inFile))) mas01cr@0: error("Could not open batch in file", inFile); mas01cr@0: if(key && key!=inFile) mas01cr@0: if(!(keysIn = new ifstream(key))) mas01cr@0: error("Could not open batch key file",key); mas01cr@0: mas01cr@0: // Get the database header info mas01cr@0: dbH = new dbTableHeaderT(); mas01cr@0: assert(dbH); mas01cr@0: mas01cr@0: if(read(dbfid,(char*)dbH,sizeof(dbTableHeaderT))!=sizeof(dbTableHeaderT)) mas01cr@0: error("error reading db header"); mas01cr@0: mas01cr@0: if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: error("Must use timestamps with timestamped database","use --times"); mas01cr@0: mas01cr@0: fileTableOffset = O2_HEADERSIZE; mas01cr@0: segTableOffset = fileTableOffset + O2_FILETABLESIZE*O2_MAXFILES; mas01cr@0: dataoffset = segTableOffset + O2_SEGTABLESIZE*O2_MAXFILES; mas01cr@0: l2normTableOffset = O2_DEFAULTDBSIZE - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: timesTableOffset = l2normTableOffset - O2_MAXFILES*O2_MEANNUMVECTORS*sizeof(double); mas01cr@0: mas01cr@0: if(dbH->magic!=O2_MAGIC){ mas01cr@0: cerr << "expected:" << O2_MAGIC << ", got:" << dbH->magic << endl; mas01cr@0: error("database file has incorrect header",dbName); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: unsigned totalVectors=0; mas01cr@0: char *thisKey = new char[MAXSTR]; mas01cr@0: char *thisFile = new char[MAXSTR]; mas01cr@0: char *thisTimesFileName = new char[MAXSTR]; mas01cr@0: mas01cr@0: do{ mas01cr@0: filesIn->getline(thisFile,MAXSTR); mas01cr@0: if(key && key!=inFile) mas01cr@0: keysIn->getline(thisKey,MAXSTR); mas01cr@0: else mas01cr@0: thisKey = thisFile; mas01cr@0: if(usingTimes) mas01cr@0: timesFile->getline(thisTimesFileName,MAXSTR); mas01cr@0: mas01cr@0: if(filesIn->eof()) mas01cr@0: break; mas01cr@0: mas01cr@0: // open the input file mas01cr@0: if (thisFile && (infid = open (thisFile, O_RDONLY)) < 0) mas01cr@0: error("can't open feature file for reading", thisFile); mas01cr@0: mas01cr@0: // find size of input file mas01cr@0: if (thisFile && fstat (infid,&statbuf) < 0) mas01cr@0: error("fstat error finding size of input"); mas01cr@0: mas01mc@11: // mmap the database file mas01mc@11: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01mc@11: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01mc@11: error("mmap error for creating database"); mas01mc@11: mas01mc@11: // Make some handy tables with correct types mas01mc@11: fileTable= (char*)(db+fileTableOffset); mas01mc@11: segTable = (unsigned*)(db+segTableOffset); mas01mc@11: dataBuf = (double*)(db+dataoffset); mas01mc@11: l2normTable = (double*)(db+l2normTableOffset); mas01mc@11: timesTable = (double*)(db+timesTableOffset); mas01mc@11: mas01cr@0: // Check that there is room for at least 1 more file mas01cr@0: if((char*)timesTable<((char*)dataBuf+(dbH->length+statbuf.st_size-sizeof(int)))) mas01cr@0: error("No more room in database","insert failed: reason database is full."); mas01cr@0: mas01cr@0: if(thisFile) mas01cr@0: if(dbH->dim==0 && dbH->length==0) // empty database mas01cr@0: read(infid,&dbH->dim,sizeof(unsigned)); // initialize with input dimensionality mas01cr@0: else { mas01cr@0: unsigned test; mas01cr@0: read(infid,&test,sizeof(unsigned)); mas01cr@0: if(dbH->dim!=test){ mas01cr@0: cerr << "error: expected dimension: " << dbH->dim << ", got :" << test <numFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey))==0){ mas01cr@0: alreadyInserted=1; mas01cr@0: break; mas01cr@0: } mas01cr@0: mas01cr@0: if(alreadyInserted){ mas01cr@0: if(verbosity) mas01cr@0: cerr << "Warning: key already exists in database:" << thisKey << endl; mas01cr@0: } mas01cr@0: else{ mas01cr@0: mas01cr@0: // Make a segment index table of features to file indexes mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: if(!numVectors){ mas01cr@0: if(verbosity) mas01cr@0: cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << endl; mas01cr@0: } mas01cr@0: else{ mas01cr@0: if(usingTimes){ mas01cr@0: if(timesFile->eof()) mas01cr@0: error("not enough timestamp files in timesList"); mas01cr@0: thisTimesFile=new ifstream(thisTimesFileName,ios::in); mas01cr@0: if(!thisTimesFile->is_open()) mas01cr@0: error("Cannot open timestamp file",thisTimesFileName); mas01cr@0: unsigned insertoffset=dbH->length; mas01cr@0: unsigned timesoffset=insertoffset/(dbH->dim*sizeof(double)); mas01cr@0: double* timesdata=timesTable+timesoffset; mas01cr@0: assert(timesdata+numVectorsnumFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey)); mas01cr@0: mas01cr@0: unsigned insertoffset = dbH->length;// Store current state mas01cr@0: mas01cr@0: // Increment file count mas01cr@0: dbH->numFiles++; mas01cr@0: mas01cr@0: // Update Header information mas01cr@0: dbH->length+=(statbuf.st_size-sizeof(int)); mas01cr@0: // Copy the header back to the database mas01cr@0: memcpy (db, dbH, sizeof(dbTableHeaderT)); mas01cr@0: mas01cr@0: // Update segment to file index map mas01cr@0: //memcpy (db+segTableOffset+(dbH->numFiles-1)*sizeof(unsigned), &numVectors, sizeof(unsigned)); mas01cr@0: memcpy (segTable+dbH->numFiles-1, &numVectors, sizeof(unsigned)); mas01cr@0: mas01cr@0: // Update the feature database mas01cr@0: memcpy (db+dataoffset+insertoffset, indata+sizeof(int), statbuf.st_size-sizeof(int)); mas01cr@0: mas01cr@0: // Norm the vectors on input if the database is already L2 normed mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: unitNormAndInsertL2((double*)(db+dataoffset+insertoffset), dbH->dim, numVectors, 1); // append mas01cr@0: mas01cr@0: totalVectors+=numVectors; mas01cr@0: } mas01cr@0: } mas01cr@0: // CLEAN UP mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: close(infid); mas01mc@11: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: }while(!filesIn->eof()); mas01mc@12: mas01mc@12: // mmap the database file mas01mc@12: if ((db = (char*) mmap(0, O2_DEFAULTDBSIZE, PROT_READ | PROT_WRITE, mas01mc@12: MAP_SHARED, dbfid, 0)) == (caddr_t) -1) mas01mc@12: error("mmap error for creating database"); mas01cr@0: mas01cr@0: if(verbosity) mas01cr@0: cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors " mas01cr@0: << totalVectors*dbH->dim*sizeof(double) << " bytes." << endl; mas01cr@0: mas01cr@0: // Report status mas01cr@0: status(dbName); mas01mc@12: mas01mc@12: munmap(db,O2_DEFAULTDBSIZE); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::ws_status(const char*dbName, char* hostport){ mas01cr@0: struct soap soap; mas01cr@0: int adbStatusResult; mas01cr@0: mas01cr@0: // Query an existing adb database mas01cr@0: soap_init(&soap); mas01cr@0: if(soap_call_adb__status(&soap,hostport,NULL,(char*)dbName,adbStatusResult)==SOAP_OK) mas01cr@0: std::cout << "result = " << adbStatusResult << std::endl; mas01cr@0: else mas01cr@0: soap_print_fault(&soap,stderr); mas01cr@0: mas01cr@0: soap_destroy(&soap); mas01cr@0: soap_end(&soap); mas01cr@0: soap_done(&soap); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::ws_query(const char*dbName, const char *segKey, const char* hostport){ mas01cr@0: struct soap soap; mas01cr@0: adb__queryResult adbQueryResult; mas01cr@0: mas01cr@0: soap_init(&soap); mas01cr@0: if(soap_call_adb__query(&soap,hostport,NULL, mas01cr@0: (char*)dbName,(char*)segKey,(char*)segFileName,(char*)timesFileName, mas01cr@0: queryType, queryPoint, pointNN, segNN, sequenceLength, adbQueryResult)==SOAP_OK){ mas01cr@0: //std::cerr << "result list length:" << adbQueryResult.__sizeRlist << std::endl; mas01cr@0: for(int i=0; inumFiles << endl; mas01cr@0: cout << "data dim:" << dbH->dim <dim>0){ mas01cr@0: cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<length))/(sizeof(double)*dbH->dim) << endl; mas01cr@0: } mas01cr@0: cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(timesTableOffset-dataoffset) << "%)" << endl; mas01cr@0: cout << "bytes available:" << timesTableOffset-(dataoffset+dbH->length) << " (" << mas01cr@0: (100.0*(timesTableOffset-(dataoffset+dbH->length)))/(timesTableOffset-dataoffset) << "%)" << endl; mas01cr@0: cout << "flags:" << dbH->flags << endl; mas01cr@0: mas01cr@0: unsigned dudCount=0; mas01cr@0: unsigned nullCount=0; mas01cr@0: for(unsigned k=0; knumFiles; k++){ mas01cr@0: if(segTable[k]numFiles; k++) mas01cr@0: cout << fileTable+k*O2_FILETABLESIZE << " " << segTable[k] << endl; mas01cr@0: mas01cr@0: status(dbName); mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::l2norm(const char* dbName){ mas01cr@0: initTables(dbName,0); mas01cr@0: if(dbH->length>0){ mas01cr@0: unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@0: unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append mas01cr@0: } mas01cr@0: // Update database flags mas01cr@0: dbH->flags = dbH->flags|O2_FLAG_L2NORM; mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: mas01cr@0: void audioDB::query(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: switch(queryType){ mas01cr@0: case O2_FLAG_POINT_QUERY: mas01cr@0: pointQuery(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01cr@0: case O2_FLAG_SEQUENCE_QUERY: mas01cr@0: segSequenceQuery(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01cr@0: case O2_FLAG_SEG_QUERY: mas01cr@0: segPointQuery(dbName, inFile, adbQueryResult); mas01cr@0: break; mas01cr@0: default: mas01cr@0: error("unrecognized queryType in query()"); mas01cr@0: mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: //return ordinal position of key in keyTable mas01cr@0: unsigned audioDB::getKeyPos(char* key){ mas01cr@0: for(unsigned k=0; knumFiles; k++) mas01cr@0: if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key))==0) mas01cr@0: return k; mas01cr@0: error("Key not found",key); mas01cr@0: return O2_ERR_KEYNOTFOUND; mas01cr@0: } mas01cr@0: mas01cr@0: // Basic point query engine mas01cr@0: void audioDB::pointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: mas01cr@0: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: for(unsigned k=0; klength/(dbH->dim*sizeof(double)); mas01cr@0: double meanQdur = 0; mas01cr@0: double* timesdata = 0; mas01cr@0: double* dbdurs = 0; mas01cr@0: mas01cr@0: if(usingTimes && !(dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: j=1; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: while(j--){ // query mas01cr@0: data=dataBuf; mas01cr@0: k=totalVecs; // number of database vectors mas01cr@0: while(k--){ // database mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01cr@0: && fabs(dbdurs[totalVecs-k-1]-timesdata[numVectors-j-1])=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l >= n+1 ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01cr@0: sIndexes[n]=dbH->length/(sizeof(double)*dbH->dim)-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: gettimeofday(&tv2, NULL); mas01cr@0: if(verbosity>1) mas01cr@0: cerr << endl << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@0: mas01cr@0: if(adbQueryResult==0){ mas01cr@0: // Output answer mas01cr@0: // Loop over nearest neighbours mas01cr@0: for(k=0; k < pointNN; k++){ mas01cr@0: // Scan for key mas01cr@0: unsigned cumSeg=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01cr@0: cumSeg+=segTable[l]; mas01cr@0: if(sIndexes[k]__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@0: adbQueryResult->Qpos = new int[listLen]; mas01cr@0: adbQueryResult->Spos = new int[listLen]; mas01cr@0: for(k=0; k__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@0: adbQueryResult->Dist[k]=distances[k]; mas01cr@0: adbQueryResult->Qpos[k]=qIndexes[k]; mas01cr@0: unsigned cumSeg=0; mas01cr@0: for(l=0 ; lnumFiles; l++){ mas01cr@0: cumSeg+=segTable[l]; mas01cr@0: if(sIndexes[k]Rlist[k], "%s", fileTable+l*O2_FILETABLESIZE); mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: adbQueryResult->Spos[k]=sIndexes[k]+segTable[l]-cumSeg; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Clean up mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(dbdurs) mas01cr@0: delete dbdurs; mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::sequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: // segPointQuery mas01cr@0: // return the segNN closest segs to the query seg mas01cr@0: // uses average of pointNN points per seg mas01cr@0: void audioDB::segPointQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: unsigned numSegs = dbH->numFiles; mas01cr@0: mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: if( dbH->flags & O2_FLAG_L2NORM ){ mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: assert(queryCopy&&qNorm); mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: } mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@0: assert(segNN>0 && segNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01cr@0: double segDistances[segNN]; mas01cr@0: unsigned segIDs[segNN]; mas01cr@0: unsigned segQIndexes[segNN]; mas01cr@0: unsigned segSIndexes[segNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: unsigned j=numVectors; // number of query points mas01cr@0: unsigned k,l,n, seg, segOffset=0, processedSegs=0; mas01cr@0: double thisDist; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; knumFiles]; mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01cr@0: for(j=0; jnumVectors-1) mas01cr@0: error("queryPoint > numVectors in query"); mas01cr@0: else{ mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: numVectors=queryPoint+1; mas01cr@0: } mas01cr@0: mas01cr@0: // build segment offset table mas01cr@0: unsigned *segOffsetTable = new unsigned[dbH->numFiles]; mas01cr@0: unsigned cumSeg=0; mas01cr@0: unsigned segIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01cr@0: segOffsetTable[k]=cumSeg; mas01cr@0: cumSeg+=segTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey[MAXSTR]; mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: mas01cr@0: for(seg=0 ; seg < dbH->numFiles ; seg++, processedSegs++){ mas01cr@0: if(segFile){ mas01cr@0: if(!segFile->eof()){ mas01cr@0: //*segFile>>seg; mas01cr@0: segFile->getline(nextKey,MAXSTR); mas01mc@12: if(verbosity>3){ mas01mc@12: cerr << nextKey << endl; mas01mc@12: cerr.flush(); mas01mc@12: } mas01cr@0: seg=getKeyPos(nextKey); mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: segOffset=segOffsetTable[seg]; // numDoubles offset mas01cr@0: segIndexOffset=segOffset/dbH->dim; // numVectors offset mas01cr@0: if(verbosity>7) mas01cr@0: cerr << seg << "." << segOffset/(dbH->dim) << "." << segTable[seg] << " | ";cerr.flush(); mas01cr@0: mas01cr@0: if(dbH->flags & O2_FLAG_L2NORM) mas01cr@0: usingQueryPoint?query=queryCopy+queryPoint*dbH->dim:query=queryCopy; mas01cr@0: else mas01cr@0: usingQueryPoint?query=(double*)(indata+sizeof(int))+queryPoint*dbH->dim:query=(double*)(indata+sizeof(int)); mas01cr@0: if(usingQueryPoint) mas01cr@0: j=1; mas01cr@0: else mas01cr@0: j=numVectors; mas01cr@0: while(j--){ mas01cr@0: k=segTable[seg]; // number of vectors in seg mas01cr@0: data=dataBuf+segOffset; // data for seg mas01cr@0: while(k--){ mas01cr@0: thisDist=0; mas01cr@0: l=dbH->dim; mas01cr@0: double* q=query; mas01cr@0: while(l--) mas01cr@0: thisDist+=*q++**data++; mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01cr@0: && fabs(meanDBdur[seg]-meanQdur)=distances[n]){ mas01cr@0: if((n==0 || thisDist<=distances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l > n ; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[n]=thisDist; mas01cr@0: qIndexes[n]=numVectors-j-1; mas01cr@0: sIndexes[n]=segTable[seg]-k-1; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: } // seg mas01cr@0: // Move query pointer to next query point mas01cr@0: query+=dbH->dim; mas01cr@0: } // query mas01cr@0: // Take the average of this seg's distance mas01cr@0: // Test the seg distances mas01cr@0: thisDist=0; mas01cr@0: n=pointNN; mas01cr@0: while(n--) mas01cr@0: thisDist+=distances[pointNN-n-1]; mas01cr@0: thisDist/=pointNN; mas01cr@0: n=segNN; mas01cr@0: while(n--){ mas01cr@0: if(thisDist>=segDistances[n]){ mas01cr@0: if((n==0 || thisDist<=segDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=pointNN-1 ; l > n ; l--){ mas01cr@0: segDistances[l]=segDistances[l-1]; mas01cr@0: segQIndexes[l]=segQIndexes[l-1]; mas01cr@0: segSIndexes[l]=segSIndexes[l-1]; mas01cr@0: segIDs[l]=segIDs[l-1]; mas01cr@0: } mas01cr@0: segDistances[n]=thisDist; mas01cr@0: segQIndexes[n]=qIndexes[0]; mas01cr@0: segSIndexes[n]=sIndexes[0]; mas01cr@0: segIDs[n]=seg; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: for(unsigned k=0; k1) mas01cr@0: cerr << endl << "processed segs :" << processedSegs mas01cr@0: << " elapsed time:" << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@0: mas01cr@0: if(adbQueryResult==0){ mas01cr@0: if(verbosity>1) mas01cr@0: cerr<__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@0: adbQueryResult->Qpos = new int[listLen]; mas01cr@0: adbQueryResult->Spos = new int[listLen]; mas01cr@0: for(k=0; k__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@0: adbQueryResult->Dist[k]=segDistances[k]; mas01cr@0: adbQueryResult->Qpos[k]=segQIndexes[k]; mas01cr@0: adbQueryResult->Spos[k]=segSIndexes[k]; mas01cr@0: sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01cr@0: if(segOffsetTable) mas01cr@0: delete segOffsetTable; mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: if(qNorm) mas01cr@0: delete qNorm; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(meanDBdur) mas01cr@0: delete meanDBdur; mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::deleteDB(const char* dbName, const char* inFile){ mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: // NBest matched filter distance between query and target segs mas01cr@0: // efficient implementation mas01cr@0: // outputs average of N minimum matched filter distances mas01cr@0: void audioDB::segSequenceQuery(const char* dbName, const char* inFile, adb__queryResult *adbQueryResult){ mas01cr@0: mas01cr@0: initTables(dbName, inFile); mas01cr@0: mas01cr@0: // For each input vector, find the closest pointNN matching output vectors and report mas01cr@0: // we use stdout in this stub version mas01cr@0: unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim); mas01cr@0: unsigned numSegs = dbH->numFiles; mas01cr@0: mas01cr@0: double* query = (double*)(indata+sizeof(int)); mas01cr@0: double* data = dataBuf; mas01cr@0: double* queryCopy = 0; mas01cr@0: mas01cr@0: double qMeanL2; mas01cr@0: double* sMeanL2; mas01cr@0: mas01cr@0: unsigned USE_THRESH=0; mas01cr@0: double SILENCE_THRESH=0; mas01cr@0: double DIFF_THRESH=0; mas01cr@0: mas01cr@0: if(!(dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database must be L2 normed for sequence query","use -l2norm"); mas01cr@0: mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "performing norms ... "; cerr.flush(); mas01cr@0: unsigned dbVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@0: // Make a copy of the query mas01cr@0: queryCopy = new double[numVectors*dbH->dim]; mas01cr@0: memcpy(queryCopy, query, numVectors*dbH->dim*sizeof(double)); mas01cr@0: qNorm = new double[numVectors]; mas01cr@0: sNorm = new double[dbVectors]; mas01cr@0: sMeanL2=new double[dbH->numFiles]; mas01cr@0: assert(qNorm&&sNorm&&queryCopy&&sMeanL2&&sequenceLength); mas01cr@0: unitNorm(queryCopy, dbH->dim, numVectors, qNorm); mas01cr@0: query = queryCopy; mas01cr@0: // Make norm measurements relative to sequenceLength mas01cr@0: unsigned w = sequenceLength-1; mas01cr@0: unsigned i,j; mas01cr@0: double* ps; mas01cr@0: double tmp1,tmp2; mas01cr@0: // Copy the L2 norm values to core to avoid disk random access later on mas01cr@0: memcpy(sNorm, l2normTable, dbVectors*sizeof(double)); mas01cr@0: double* snPtr = sNorm; mas01cr@0: for(i=0; inumFiles; i++){ mas01cr@0: if(segTable[i]>sequenceLength){ mas01cr@0: tmp1=*snPtr; mas01cr@0: j=1; mas01cr@0: w=sequenceLength-1; mas01cr@0: while(w--) mas01cr@0: *snPtr+=snPtr[j++]; mas01cr@0: ps = snPtr+1; mas01cr@0: w=segTable[i]-sequenceLength; // +1 - 1 mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01cr@0: *ps=*(ps-1)-tmp1+*(ps+sequenceLength); mas01cr@0: tmp1=tmp2; mas01cr@0: ps++; mas01cr@0: } mas01cr@0: } mas01cr@0: snPtr+=segTable[i]; mas01cr@0: } mas01cr@0: mas01cr@0: double* pn = sMeanL2; mas01cr@0: w=dbH->numFiles; mas01cr@0: while(w--) mas01cr@0: *pn++=0.0; mas01cr@0: ps=sNorm; mas01cr@0: unsigned processedSegs=0; mas01cr@0: for(i=0; inumFiles; i++){ mas01cr@0: if(segTable[i]>sequenceLength-1){ mas01cr@0: w = segTable[i]-sequenceLength+1; mas01cr@0: pn = sMeanL2+i; mas01cr@0: while(w--) mas01cr@0: *pn+=*ps++; mas01cr@0: *pn/=segTable[i]-sequenceLength+1; mas01cr@0: SILENCE_THRESH+=*pn; mas01cr@0: processedSegs++; mas01cr@0: } mas01mc@12: ps = sNorm + segTable[i]; mas01cr@0: } mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "processedSegs: " << processedSegs << endl; mas01cr@0: SILENCE_THRESH/=processedSegs; mas01cr@0: USE_THRESH=1; // Turn thresholding on mas01cr@0: DIFF_THRESH=SILENCE_THRESH/=2; // 50% of the mean shingle power mas01cr@0: SILENCE_THRESH/=10; // 10% of the mean shingle power is SILENCE mas01cr@0: mas01cr@0: w=sequenceLength-1; mas01cr@0: i=1; mas01cr@0: tmp1=*qNorm; mas01cr@0: while(w--) mas01cr@0: *qNorm+=qNorm[i++]; mas01cr@0: ps = qNorm+1; mas01cr@0: qMeanL2 = *qNorm; mas01cr@0: w=numVectors-sequenceLength; mas01cr@0: while(w--){ mas01cr@0: tmp2=*ps; mas01cr@0: *ps=*(ps-1)-tmp1+*(ps+sequenceLength); mas01cr@0: tmp1=tmp2; mas01cr@0: qMeanL2+=*ps; mas01cr@0: *ps++; mas01cr@0: } mas01cr@0: qMeanL2 /= numVectors-sequenceLength+1; mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "done." << endl; mas01cr@0: mas01cr@0: mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "matching segs..." << endl; mas01cr@0: mas01cr@0: assert(pointNN>0 && pointNN<=O2_MAXNN); mas01cr@0: assert(segNN>0 && segNN<=O2_MAXNN); mas01cr@0: mas01cr@0: // Make temporary dynamic memory for results mas01cr@0: double segDistances[segNN]; mas01cr@0: unsigned segIDs[segNN]; mas01cr@0: unsigned segQIndexes[segNN]; mas01cr@0: unsigned segSIndexes[segNN]; mas01cr@0: mas01cr@0: double distances[pointNN]; mas01cr@0: unsigned qIndexes[pointNN]; mas01cr@0: unsigned sIndexes[pointNN]; mas01cr@0: mas01cr@0: mas01cr@0: unsigned k,l,m,n,seg,segOffset=0, HOP_SIZE=sequenceHop, wL=sequenceLength; mas01cr@0: double thisDist; mas01cr@0: double oneOverWL=1.0/wL; mas01cr@0: mas01cr@0: for(k=0; kflags & O2_FLAG_TIMES)){ mas01cr@0: cerr << "warning: ignoring query timestamps for non-timestamped database" << endl; mas01cr@0: usingTimes=0; mas01cr@0: } mas01cr@0: mas01cr@0: else if(!usingTimes && (dbH->flags & O2_FLAG_TIMES)) mas01cr@0: cerr << "warning: no timestamps given for query. Ignoring database timestamps." << endl; mas01cr@0: mas01cr@0: else if(usingTimes && (dbH->flags & O2_FLAG_TIMES)){ mas01cr@0: timesdata = new double[numVectors]; mas01cr@0: assert(timesdata); mas01cr@0: insertTimeStamps(numVectors, timesFile, timesdata); mas01cr@0: // Calculate durations of points mas01cr@0: for(k=0; k1) mas01cr@0: cerr << "mean query file duration: " << meanQdur << endl; mas01cr@0: meanDBdur = new double[dbH->numFiles]; mas01cr@0: assert(meanDBdur); mas01cr@0: for(k=0; knumFiles; k++){ mas01cr@0: meanDBdur[k]=0.0; mas01cr@0: for(j=0; jnumVectors || queryPoint>numVectors-wL+1) mas01cr@0: error("queryPoint > numVectors-wL+1 in query"); mas01cr@0: else{ mas01cr@0: if(verbosity>1) mas01cr@0: cerr << "query point: " << queryPoint << endl; cerr.flush(); mas01cr@0: query=query+queryPoint*dbH->dim; mas01cr@0: qNorm=qNorm+queryPoint; mas01cr@0: numVectors=wL; mas01cr@0: } mas01cr@0: mas01cr@0: double ** D = 0; // Cross-correlation between query and target mas01cr@0: double ** DD = 0; // Matched filter distance mas01cr@0: mas01cr@0: D = new double*[numVectors]; mas01cr@0: assert(D); mas01cr@0: DD = new double*[numVectors]; mas01cr@0: assert(DD); mas01cr@0: mas01cr@0: gettimeofday(&tv1, NULL); mas01cr@0: processedSegs=0; mas01cr@0: unsigned successfulSegs=0; mas01cr@0: mas01cr@0: double* qp; mas01cr@0: double* sp; mas01cr@0: double* dp; mas01cr@0: double diffL2; mas01cr@0: mas01cr@0: // build segment offset table mas01cr@0: unsigned *segOffsetTable = new unsigned[dbH->numFiles]; mas01cr@0: unsigned cumSeg=0; mas01cr@0: unsigned segIndexOffset; mas01cr@0: for(k=0; knumFiles;k++){ mas01cr@0: segOffsetTable[k]=cumSeg; mas01cr@0: cumSeg+=segTable[k]*dbH->dim; mas01cr@0: } mas01cr@0: mas01cr@0: char nextKey [MAXSTR]; mas01mc@12: for(processedSegs=0, seg=0 ; processedSegs < dbH->numFiles ; seg++, processedSegs++){ mas01cr@0: mas01cr@0: // get segID from file if using a control file mas01cr@0: if(segFile){ mas01cr@0: if(!segFile->eof()){ mas01cr@0: segFile->getline(nextKey,MAXSTR); mas01cr@0: seg=getKeyPos(nextKey); mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01mc@12: mas01cr@0: segOffset=segOffsetTable[seg]; // numDoubles offset mas01cr@0: segIndexOffset=segOffset/dbH->dim; // numVectors offset mas01cr@0: mas01cr@0: if(sequenceLength7) mas01cr@0: cerr << seg << "." << segIndexOffset << "." << segTable[seg] << " | ";cerr.flush(); mas01cr@0: mas01cr@0: // Cross-correlation matrix mas01cr@0: for(j=0; jdim; mas01cr@0: sp=dataBuf+segOffset+k*dbH->dim; mas01cr@0: DD[j][k]=0.0; // Initialize matched filter array mas01cr@0: dp=&D[j][k]; // point to correlation cell j,k mas01cr@0: *dp=0.0; // initialize correlation cell mas01cr@0: l=dbH->dim; // size of vectors mas01cr@0: while(l--) mas01cr@0: *dp+=*qp++**sp++; mas01cr@0: } mas01cr@0: mas01cr@0: // Matched Filter mas01cr@0: // HOP SIZE == 1 mas01cr@0: double* spd; mas01cr@0: if(HOP_SIZE==1){ // HOP_SIZE = shingleHop mas01cr@0: for(w=0; w3 && usingTimes){ mas01cr@0: cerr << "meanQdur=" << meanQdur << " meanDBdur=" << meanDBdur[seg] << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: if(!usingTimes || mas01cr@0: (usingTimes mas01cr@0: && fabs(meanDBdur[seg]-meanQdur)3 && usingTimes){ mas01cr@0: cerr << "within duration tolerance." << endl; mas01cr@0: cerr.flush(); mas01cr@0: } mas01cr@0: mas01cr@0: // Search for minimum distance by shingles (concatenated vectors) mas01cr@0: for(j=0;jSILENCE_THRESH && sNorm[k]>SILENCE_THRESH && mas01cr@0: // Are both query and target windows above mean energy? mas01mc@12: (qNorm[j]>qMeanL2 && sNorm[k]>sMeanL2[seg] && diffL2 < DIFF_THRESH ))) mas01cr@0: thisDist=DD[j][k]*oneOverWL; mas01cr@0: else mas01cr@0: thisDist=0.0; mas01cr@0: mas01cr@0: // NBest match algorithm mas01cr@0: for(m=0; m=distances[m]){ mas01cr@0: // Shuffle distances up the list mas01cr@0: for(l=pointNN-1; l>m; l--){ mas01cr@0: distances[l]=distances[l-1]; mas01cr@0: qIndexes[l]=qIndexes[l-1]; mas01cr@0: sIndexes[l]=sIndexes[l-1]; mas01cr@0: } mas01cr@0: distances[m]=thisDist; mas01cr@0: if(usingQueryPoint) mas01cr@0: qIndexes[m]=queryPoint; mas01cr@0: else mas01cr@0: qIndexes[m]=j; mas01cr@0: sIndexes[m]=k; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: } mas01cr@0: // Calculate the mean of the N-Best matches mas01cr@0: thisDist=0.0; mas01cr@0: for(m=0; m3) mas01mc@12: cerr << "d[" << fileTable+seg*O2_FILETABLESIZE << "]=" << thisDist << endl; mas01mc@12: mas01cr@0: // All the seg stuff goes here mas01cr@0: n=segNN; mas01cr@0: while(n--){ mas01cr@0: if(thisDist>=segDistances[n]){ mas01cr@0: if((n==0 || thisDist<=segDistances[n-1])){ mas01cr@0: // Copy all values above up the queue mas01cr@0: for( l=segNN-1 ; l > n ; l--){ mas01cr@0: segDistances[l]=segDistances[l-1]; mas01cr@0: segQIndexes[l]=segQIndexes[l-1]; mas01cr@0: segSIndexes[l]=segSIndexes[l-1]; mas01cr@0: segIDs[l]=segIDs[l-1]; mas01cr@0: } mas01cr@0: segDistances[n]=thisDist; mas01cr@0: segQIndexes[n]=qIndexes[0]; mas01cr@0: segSIndexes[n]=sIndexes[0]; mas01cr@0: successfulSegs++; mas01cr@0: segIDs[n]=seg; mas01cr@0: break; mas01cr@0: } mas01cr@0: } mas01cr@0: else mas01cr@0: break; mas01cr@0: } mas01cr@0: } // Duration match mas01cr@0: mas01cr@0: // per-seg reset array values mas01cr@0: for(unsigned k=0; k1) mas01cr@0: cerr << endl << "processed segs :" << processedSegs << " matched segments: " << successfulSegs << " elapsed time:" mas01cr@0: << ( tv2.tv_sec*1000 + tv2.tv_usec/1000 ) - ( tv1.tv_sec*1000+tv1.tv_usec/1000 ) << " msec" << endl; mas01cr@0: mas01cr@0: if(adbQueryResult==0){ mas01cr@0: if(verbosity>1) mas01cr@0: cerr<__sizeRlist=listLen; mas01cr@0: adbQueryResult->__sizeDist=listLen; mas01cr@0: adbQueryResult->__sizeQpos=listLen; mas01cr@0: adbQueryResult->__sizeSpos=listLen; mas01cr@0: adbQueryResult->Rlist= new char*[listLen]; mas01cr@0: adbQueryResult->Dist = new double[listLen]; mas01cr@0: adbQueryResult->Qpos = new int[listLen]; mas01cr@0: adbQueryResult->Spos = new int[listLen]; mas01cr@0: for(k=0; k__sizeRlist; k++){ mas01cr@0: adbQueryResult->Rlist[k]=new char[O2_MAXFILESTR]; mas01cr@0: adbQueryResult->Dist[k]=segDistances[k]; mas01cr@0: adbQueryResult->Qpos[k]=segQIndexes[k]; mas01cr@0: adbQueryResult->Spos[k]=segSIndexes[k]; mas01cr@0: sprintf(adbQueryResult->Rlist[k], "%s", fileTable+segIDs[k]*O2_FILETABLESIZE); mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Clean up mas01cr@0: if(segOffsetTable) mas01cr@0: delete segOffsetTable; mas01cr@0: if(queryCopy) mas01cr@0: delete queryCopy; mas01cr@0: //if(qNorm) mas01cr@0: //delete qNorm; mas01cr@0: if(D) mas01cr@0: delete[] D; mas01cr@0: if(DD) mas01cr@0: delete[] DD; mas01cr@0: if(timesdata) mas01cr@0: delete timesdata; mas01cr@0: if(meanDBdur) mas01cr@0: delete meanDBdur; mas01cr@0: mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::normalize(double* X, int dim, int n){ mas01cr@0: unsigned c = n*dim; mas01cr@0: double minval,maxval,v,*p; mas01cr@0: mas01cr@0: p=X; mas01cr@0: while(c--){ mas01cr@0: v=*p++; mas01cr@0: if(vmaxval) mas01cr@0: maxval=v; mas01cr@0: } mas01cr@0: mas01cr@0: normalize(X, dim, n, minval, maxval); mas01cr@0: mas01cr@0: } mas01cr@0: mas01cr@0: void audioDB::normalize(double* X, int dim, int n, double minval, double maxval){ mas01cr@0: unsigned c = n*dim; mas01cr@0: double *p; mas01cr@0: mas01cr@0: mas01cr@0: if(maxval==minval) mas01cr@0: return; mas01cr@0: mas01cr@0: maxval=1.0/(maxval-minval); mas01cr@0: c=n*dim; mas01cr@0: p=X; mas01cr@0: mas01cr@0: while(c--){ mas01cr@0: *p=(*p-minval)*maxval; mas01cr@0: p++; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNorm(double* X, unsigned dim, unsigned n, double* qNorm){ mas01cr@0: unsigned d; mas01cr@0: double L2, oneOverL2, *p; mas01cr@0: if(verbosity>2) mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@0: while(n--){ mas01cr@0: p=X; mas01cr@0: L2=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: L2+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01cr@0: L2=sqrt(L2); mas01cr@0: if(qNorm) mas01cr@0: *qNorm++=L2; mas01cr@0: oneOverL2 = 1.0/L2; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *X*=oneOverL2; mas01cr@0: X++; mas01cr@0: } mas01cr@0: } mas01cr@0: if(verbosity>2) mas01cr@0: cerr << "done..." << endl; mas01cr@0: } mas01cr@0: mas01cr@0: // Unit norm block of features mas01cr@0: void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ mas01cr@0: unsigned d; mas01cr@0: double L2, oneOverL2, *p; mas01cr@0: unsigned nn = n; mas01cr@0: mas01cr@0: assert(l2normTable); mas01cr@0: mas01cr@0: if( !append && (dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database is already L2 normed", "automatic norm on insert is enabled"); mas01cr@0: mas01cr@0: if(verbosity>2) mas01cr@0: cerr << "norming " << n << " vectors...";cerr.flush(); mas01cr@0: mas01cr@0: double* l2buf = new double[n]; mas01cr@0: double* l2ptr = l2buf; mas01cr@0: assert(l2buf); mas01cr@0: assert(X); mas01cr@0: mas01cr@0: while(nn--){ mas01cr@0: p=X; mas01cr@0: *l2ptr=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *l2ptr+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01cr@0: *l2ptr=sqrt(*l2ptr); mas01cr@0: oneOverL2 = 1.0/(*l2ptr++); mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *X*=oneOverL2; mas01cr@0: X++; mas01cr@0: } mas01cr@0: } mas01cr@0: unsigned offset; mas01cr@0: if(append) mas01cr@0: offset=dbH->length/(dbH->dim*sizeof(double)); // number of vectors mas01cr@0: else mas01cr@0: offset=0; mas01cr@0: memcpy(l2normTable+offset, l2buf, n*sizeof(double)); mas01cr@0: if(l2buf) mas01cr@0: delete l2buf; mas01cr@0: if(verbosity>2) mas01cr@0: cerr << "done..." << endl; mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // Start an audioDB server on the host mas01cr@0: void audioDB::startServer(){ mas01cr@0: struct soap soap; mas01cr@0: int m, s; // master and slave sockets mas01cr@0: soap_init(&soap); mas01cr@0: m = soap_bind(&soap, NULL, port, 100); mas01cr@0: if (m < 0) mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: else mas01cr@0: { mas01cr@0: fprintf(stderr, "Socket connection successful: master socket = %d\n", m); mas01cr@0: for (int i = 1; ; i++) mas01cr@0: { mas01cr@0: s = soap_accept(&soap); mas01cr@0: if (s < 0) mas01cr@0: { mas01cr@0: soap_print_fault(&soap, stderr); mas01cr@0: break; mas01cr@0: } mas01cr@0: fprintf(stderr, "%d: accepted connection from IP=%d.%d.%d.%d socket=%d\n", i, mas01cr@0: (soap.ip >> 24)&0xFF, (soap.ip >> 16)&0xFF, (soap.ip >> 8)&0xFF, soap.ip&0xFF, s); mas01cr@0: if (soap_serve(&soap) != SOAP_OK) // process RPC request mas01cr@0: soap_print_fault(&soap, stderr); // print error mas01cr@0: fprintf(stderr, "request served\n"); mas01cr@0: soap_destroy(&soap); // clean up class instances mas01cr@0: soap_end(&soap); // clean up everything and close socket mas01cr@0: } mas01cr@0: } mas01cr@0: soap_done(&soap); // close master socket and detach environment mas01cr@0: } mas01cr@0: mas01cr@0: mas01cr@0: // web services mas01cr@0: mas01cr@0: // SERVER SIDE mas01cr@0: int adb__status(struct soap* soap, xsd__string dbName, xsd__int &adbCreateResult){ mas01cr@0: char* const argv[]={"audioDB",COM_STATUS,dbName}; mas01cr@0: const unsigned argc = 3; mas01cr@0: audioDB(argc,argv); mas01cr@0: adbCreateResult=100; mas01cr@0: return SOAP_OK; mas01cr@0: } mas01cr@0: mas01cr@0: // Literal translation of command line to web service mas01cr@0: mas01cr@0: int adb__query(struct soap* soap, xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int seqLen, adb__queryResult &adbQueryResult){ mas01cr@0: char queryType[256]; mas01cr@0: for(int k=0; k<256; k++) mas01cr@0: queryType[k]='\0'; mas01cr@0: if(qType == O2_FLAG_POINT_QUERY) mas01cr@0: strncpy(queryType, "point", strlen("point")); mas01cr@0: else if (qType == O2_FLAG_SEQUENCE_QUERY) mas01cr@0: strncpy(queryType, "sequence", strlen("sequence")); mas01cr@0: else if(qType == O2_FLAG_SEG_QUERY) mas01cr@0: strncpy(queryType,"segment", strlen("segment")); mas01cr@0: else mas01cr@0: strncpy(queryType, "", strlen("")); mas01cr@0: mas01cr@0: if(pointNN==0) mas01cr@0: pointNN=10; mas01cr@0: if(segNN==0) mas01cr@0: segNN=10; mas01cr@0: if(seqLen==0) mas01cr@0: seqLen=16; mas01cr@0: mas01cr@0: char qPosStr[256]; mas01cr@0: sprintf(qPosStr, "%d", qPos); mas01cr@0: char pointNNStr[256]; mas01cr@0: sprintf(pointNNStr,"%d",pointNN); mas01cr@0: char segNNStr[256]; mas01cr@0: sprintf(segNNStr,"%d",segNN); mas01cr@0: char seqLenStr[256]; mas01cr@0: sprintf(seqLenStr,"%d",seqLen); mas01cr@0: mas01cr@0: const char* argv[] ={ mas01cr@0: "./audioDB", mas01cr@0: COM_QUERY, mas01cr@0: queryType, // Need to pass a parameter mas01cr@0: COM_DATABASE, mas01cr@0: dbName, mas01cr@0: COM_FEATURES, mas01cr@0: qKey, mas01cr@0: COM_KEYLIST, mas01cr@0: keyList==0?"":keyList, mas01cr@0: COM_TIMES, mas01cr@0: timesFileName==0?"":timesFileName, mas01cr@0: COM_QPOINT, mas01cr@0: qPosStr, mas01cr@0: COM_POINTNN, mas01cr@0: pointNNStr, mas01cr@0: COM_SEGNN, mas01cr@0: segNNStr, // Need to pass a parameter mas01cr@0: COM_SEQLEN, mas01cr@0: seqLenStr mas01cr@0: }; mas01cr@0: mas01cr@0: const unsigned argc = 19; mas01cr@0: audioDB(argc, (char* const*)argv, &adbQueryResult); mas01cr@0: return SOAP_OK; mas01cr@0: } mas01cr@0: mas01cr@0: int main(const unsigned argc, char* const argv[]){ mas01cr@0: audioDB(argc, argv); mas01cr@0: } mas01cr@0: mas01cr@0: