mas01cr@0: #include "audioDB.h" mas01cr@0: mas01cr@78: audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS mas01cr@78: { mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@167: error("No command found"); mas01cr@0: } mas01cr@78: mas01cr@0: if(O2_ACTION(COM_SERVER)) mas01cr@0: startServer(); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01cr@0: if(isClient) mas01cr@0: ws_query(dbName, inFile, (char*)hostport); mas01cr@0: else mas01cr@78: query(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@195: else if(O2_ACTION(COM_POWER)) mas01cr@195: power_flag(dbName); mas01cr@195: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@136: audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS mas01cr@78: { mas01cr@107: try { mas01cr@167: isServer = 1; // FIXME: Hack mas01cr@107: processArgs(argc, argv); mas01cr@107: assert(O2_ACTION(COM_QUERY)); mas01cr@136: query(dbName, inFile, adbQueryResponse); mas01cr@107: } catch(char *err) { mas01cr@107: cleanup(); mas01cr@107: throw(err); mas01cr@107: } mas01cr@78: } mas01cr@78: mas01cr@136: audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS mas01cr@78: { mas01cr@107: try { mas01cr@167: isServer = 1; // FIXME: Hack mas01cr@107: processArgs(argc, argv); mas01cr@107: assert(O2_ACTION(COM_STATUS)); mas01cr@136: status(dbName, adbStatusResponse); mas01cr@107: } catch(char *err) { mas01cr@107: cleanup(); mas01cr@107: throw(err); mas01cr@107: } mas01cr@78: } mas01cr@78: mas01cr@107: void audioDB::cleanup() { mas01cr@123: cmdline_parser_free(&args_info); mas01cr@0: if(indata) mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: if(db) mas01cr@197: munmap(db,getpagesize()); mas01cr@197: if(fileTable) mas01cr@197: munmap(fileTable, fileTableLength); mas01cr@197: if(trackTable) mas01cr@197: munmap(trackTable, trackTableLength); mas01cr@197: if(dataBuf) mas01cr@197: munmap(dataBuf, dataBufLength); mas01cr@197: if(timesTable) mas01cr@197: munmap(timesTable, timesTableLength); mas01cr@197: if(l2normTable) mas01cr@197: munmap(l2normTable, l2normTableLength); mas01cr@197: mas01cr@0: if(dbfid>0) mas01cr@0: close(dbfid); mas01cr@0: if(infid>0) mas01cr@0: close(infid); mas01cr@0: if(dbH) mas01cr@0: delete dbH; mas01cr@0: } mas01cr@0: mas01cr@107: audioDB::~audioDB(){ mas01cr@107: cleanup(); mas01cr@107: } mas01cr@107: mas01cr@0: int audioDB::processArgs(const unsigned argc, char* const argv[]){ mas01cr@0: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if (cmdline_parser (argc, argv, &args_info) != 0) mas01cr@167: error("Error parsing command line"); mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@243: verbosity = args_info.verbosity_arg; mas01cr@243: if(verbosity < 0 || verbosity > 10){ mas01cr@243: std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl; mas01cr@243: verbosity = 1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@136: if(args_info.size_given) { mas01cr@277: if(args_info.datasize_given) { mas01cr@277: error("both --size and --datasize given", ""); mas01cr@277: } mas01cr@277: if(args_info.ntracks_given) { mas01cr@277: error("both --size and --ntracks given", ""); mas01cr@277: } mas01cr@277: if(args_info.datadim_given) { mas01cr@277: error("both --size and --datadim given", ""); mas01cr@277: } mas01cr@197: if (args_info.size_arg < 50 || args_info.size_arg > 32000) { mas01cr@136: error("Size out of range", ""); mas01cr@136: } mas01cr@277: double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE); mas01cr@277: /* FIXME: what's the safe way of doing this? */ mas01cr@277: datasize = (unsigned int) ceil(datasize * ratio); mas01cr@277: ntracks = (unsigned int) ceil(ntracks * ratio); mas01cr@277: } else { mas01cr@277: if(args_info.datasize_given) { mas01cr@277: datasize = args_info.datasize_arg; mas01cr@277: } mas01cr@277: if(args_info.ntracks_given) { mas01cr@277: ntracks = args_info.ntracks_arg; mas01cr@277: } mas01cr@277: if(args_info.datadim_given) { mas01cr@277: datadim = args_info.datadim_arg; mas01cr@277: } mas01cr@136: } mas01cr@136: mas01cr@243: if(args_info.radius_given) { mas01cr@243: radius = args_info.radius_arg; mas01cr@243: if(radius <= 0 || radius > 1000000000) { mas01cr@78: error("radius out of range"); mas01cr@243: } else { mas01cr@243: VERB_LOG(3, "Setting radius to %f\n", radius); mas01cr@23: } mas01cr@23: } mas01cr@23: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@167: isServer = 1; mas01cr@107: #if defined(O2_DEBUG) mas01cr@107: struct sigaction sa; mas01cr@107: sa.sa_sigaction = sigterm_action; mas01cr@107: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@107: sigaction(SIGTERM, &sa, NULL); mas01cr@107: sa.sa_sigaction = sighup_action; mas01cr@107: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@107: sigaction(SIGHUP, &sa, NULL); mas01cr@107: #endif mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@107: if(args_info.client_given){ mas01cr@107: command=COM_CLIENT; mas01cr@107: hostport=args_info.client_arg; mas01cr@107: isClient=1; mas01cr@107: } mas01cr@0: mas01cr@107: if(args_info.NEW_given){ mas01cr@107: command=COM_CREATE; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: return 0; mas01cr@107: } mas01cr@0: mas01cr@107: if(args_info.STATUS_given){ mas01cr@107: command=COM_STATUS; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: return 0; mas01cr@107: } mas01cr@0: mas01cr@107: if(args_info.DUMP_given){ mas01cr@107: command=COM_DUMP; mas01cr@107: dbName=args_info.database_arg; mas01cr@136: output = args_info.output_arg; mas01cr@107: return 0; mas01cr@107: } mas01cr@0: mas01cr@107: if(args_info.L2NORM_given){ mas01cr@107: command=COM_L2NORM; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: return 0; mas01cr@107: } mas01cr@0: mas01cr@195: if(args_info.POWER_given){ mas01cr@195: command=COM_POWER; mas01cr@195: dbName=args_info.database_arg; mas01cr@195: return 0; mas01cr@195: } mas01cr@195: mas01cr@107: if(args_info.INSERT_given){ mas01cr@107: command=COM_INSERT; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: inFile=args_info.features_arg; mas01cr@107: if(args_info.key_given) mas01cr@107: key=args_info.key_arg; mas01cr@107: if(args_info.times_given){ mas01cr@107: timesFileName=args_info.times_arg; mas01cr@107: if(strlen(timesFileName)>0){ mas01cr@243: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@107: error("Could not open times file for reading", timesFileName); mas01cr@107: usingTimes=1; mas01cr@107: } mas01cr@107: } mas01cr@195: if (args_info.power_given) { mas01cr@195: powerFileName = args_info.power_arg; mas01cr@195: if (strlen(powerFileName) > 0) { mas01cr@195: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@195: error("Could not open power file for reading", powerFileName, "open"); mas01cr@195: } mas01cr@195: usingPower = 1; mas01cr@195: } mas01cr@195: } mas01cr@107: return 0; mas01cr@107: } mas01cr@107: mas01cr@107: if(args_info.BATCHINSERT_given){ mas01cr@107: command=COM_BATCHINSERT; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: inFile=args_info.featureList_arg; mas01cr@107: if(args_info.keyList_given) mas01cr@107: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@0: mas01cr@107: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01cr@23: trackFileName=args_info.keyList_arg; mas01cr@243: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01cr@23: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@107: */ mas01cr@107: mas01cr@107: if(args_info.timesList_given){ mas01cr@107: timesFileName=args_info.timesList_arg; mas01cr@107: if(strlen(timesFileName)>0){ mas01cr@243: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@107: error("Could not open timesList file for reading", timesFileName); mas01cr@107: usingTimes=1; mas01cr@107: } mas01cr@107: } mas01cr@195: if(args_info.powerList_given){ mas01cr@195: powerFileName=args_info.powerList_arg; mas01cr@195: if(strlen(powerFileName)>0){ mas01cr@243: if(!(powerFile = new std::ifstream(powerFileName,std::ios::in))) mas01cr@195: error("Could not open powerList file for reading", powerFileName); mas01cr@195: usingPower=1; mas01cr@195: } mas01cr@195: } mas01cr@107: return 0; mas01cr@107: } mas01cr@107: mas01cr@107: // Query command and arguments mas01cr@107: if(args_info.QUERY_given){ mas01cr@107: command=COM_QUERY; mas01cr@107: dbName=args_info.database_arg; mas01cr@107: inFile=args_info.features_arg; mas01cr@107: mas01cr@107: if(args_info.keyList_given){ mas01cr@107: trackFileName=args_info.keyList_arg; mas01cr@243: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01cr@107: error("Could not open keyList file for reading",trackFileName); mas01cr@107: } mas01cr@107: mas01cr@107: if(args_info.times_given){ mas01cr@107: timesFileName=args_info.times_arg; mas01cr@107: if(strlen(timesFileName)>0){ mas01cr@243: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@107: error("Could not open times file for reading", timesFileName); mas01cr@107: usingTimes=1; mas01cr@107: } mas01cr@107: } mas01cr@195: mas01cr@195: if(args_info.power_given){ mas01cr@195: powerFileName=args_info.power_arg; mas01cr@195: if(strlen(powerFileName)>0){ mas01cr@195: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@195: error("Could not open power file for reading", powerFileName, "open"); mas01cr@195: } mas01cr@195: usingPower = 1; mas01cr@195: } mas01cr@195: } mas01cr@107: mas01cr@107: // query type mas01cr@107: if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) mas01cr@107: queryType=O2_TRACK_QUERY; mas01cr@107: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@107: queryType=O2_POINT_QUERY; mas01cr@107: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@107: queryType=O2_SEQUENCE_QUERY; mas01cr@277: else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0) mas01cr@277: queryType=O2_N_SEQUENCE_QUERY; mas01cr@277: else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0) mas01cr@277: queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY; mas01cr@107: else mas01cr@107: error("unsupported query type",args_info.QUERY_arg); mas01cr@107: mas01cr@107: if(!args_info.exhaustive_flag){ mas01cr@107: queryPoint = args_info.qpoint_arg; mas01cr@107: usingQueryPoint=1; mas01cr@107: if(queryPoint<0 || queryPoint >10000) mas01cr@107: error("queryPoint out of range: 0 <= queryPoint <= 10000"); mas01cr@107: } mas01cr@107: mas01cr@107: pointNN = args_info.pointnn_arg; mas01cr@277: if(pointNN < 1 || pointNN > O2_MAXNN) { mas01cr@277: error("pointNN out of range: 1 <= pointNN <= 1000000"); mas01cr@107: } mas01cr@107: trackNN = args_info.resultlength_arg; mas01cr@277: if(trackNN < 1 || trackNN > O2_MAXNN) { mas01cr@277: error("resultlength out of range: 1 <= resultlength <= 1000000"); mas01cr@107: } mas01cr@107: sequenceLength = args_info.sequencelength_arg; mas01cr@107: if(sequenceLength < 1 || sequenceLength > 1000) { mas01cr@107: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@107: } mas01cr@107: sequenceHop = args_info.sequencehop_arg; mas01cr@107: if(sequenceHop < 1 || sequenceHop > 1000) { mas01cr@107: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01cr@107: } mas01cr@195: mas01cr@195: if (args_info.absolute_threshold_given) { mas01cr@195: if (args_info.absolute_threshold_arg >= 0) { mas01cr@195: error("absolute threshold out of range: should be negative"); mas01cr@195: } mas01cr@195: use_absolute_threshold = true; mas01cr@195: absolute_threshold = args_info.absolute_threshold_arg; mas01cr@195: } mas01cr@195: if (args_info.relative_threshold_given) { mas01cr@195: use_relative_threshold = true; mas01cr@195: relative_threshold = args_info.relative_threshold_arg; mas01cr@195: } mas01cr@107: return 0; mas01cr@107: } mas01cr@107: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@136: void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ mas01cr@0: if(!dbH) mas01cr@197: initTables(dbName, 0); mas01cr@0: mas01cr@0: unsigned dudCount=0; mas01cr@0: unsigned nullCount=0; mas01cr@0: for(unsigned k=0; knumFiles; k++){ mas01cr@23: if(trackTable[k]numFiles << std::endl; mas01cr@243: std::cout << "data dim:" << dbH->dim <dim>0){ mas01cr@243: std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; mas01cr@78: } mas01cr@243: std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; mas01cr@243: std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << mas01cr@243: (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; mas01cr@243: std::cout << "flags:" << dbH->flags << std::endl; mas01cr@78: mas01cr@243: std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; mas01cr@78: } else { mas01cr@136: adbStatusResponse->result.numFiles = dbH->numFiles; mas01cr@136: adbStatusResponse->result.dim = dbH->dim; mas01cr@136: adbStatusResponse->result.length = dbH->length; mas01cr@136: adbStatusResponse->result.dudCount = dudCount; mas01cr@136: adbStatusResponse->result.nullCount = nullCount; mas01cr@136: adbStatusResponse->result.flags = dbH->flags; mas01cr@78: } mas01cr@0: } mas01cr@0: mas01cr@197: void audioDB::l2norm(const char* dbName) { mas01cr@197: forWrite = true; mas01cr@197: initTables(dbName, 0); mas01cr@0: if(dbH->length>0){ mas01cr@197: /* FIXME: should probably be uint64_t */ mas01cr@0: unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@197: CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); mas01cr@0: unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append mas01cr@0: } mas01cr@0: // Update database flags mas01cr@0: dbH->flags = dbH->flags|O2_FLAG_L2NORM; mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: } mas01cr@195: mas01cr@195: void audioDB::power_flag(const char *dbName) { mas01cr@197: forWrite = true; mas01cr@197: initTables(dbName, 0); mas01cr@195: if (dbH->length > 0) { mas01cr@195: error("cannot turn on power storage for non-empty database", dbName); mas01cr@195: } mas01cr@195: dbH->flags |= O2_FLAG_POWER; mas01cr@195: memcpy(db, dbH, O2_HEADERSIZE); mas01cr@195: } mas01cr@195: mas01cr@243: // Unit norm block of features mas01cr@0: mas01cr@243: /* FIXME: in fact this does not unit norm a block of features, it just mas01cr@243: records the L2 norms somewhere. unitNorm() does in fact unit norm mas01cr@243: a block of features. */ mas01cr@0: void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ mas01cr@0: unsigned d; mas01cr@78: double *p; mas01cr@0: unsigned nn = n; mas01cr@0: mas01cr@0: assert(l2normTable); mas01cr@0: mas01cr@0: if( !append && (dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database is already L2 normed", "automatic norm on insert is enabled"); mas01cr@0: mas01cr@243: VERB_LOG(2, "norming %u vectors...", n); mas01cr@0: mas01cr@0: double* l2buf = new double[n]; mas01cr@0: double* l2ptr = l2buf; mas01cr@0: assert(l2buf); mas01cr@0: assert(X); mas01cr@0: mas01cr@0: while(nn--){ mas01cr@0: p=X; mas01cr@0: *l2ptr=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *l2ptr+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01cr@23: l2ptr++; mas01cr@23: X+=dim; mas01cr@0: } mas01cr@0: unsigned offset; mas01cr@107: if(append) { mas01cr@107: // FIXME: a hack, a very palpable hack: the vectors have already mas01cr@107: // been inserted, and dbH->length has already been updated. We mas01cr@107: // need to subtract off again the number of vectors that we've mas01cr@107: // inserted this time... mas01cr@107: offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors mas01cr@107: } else { mas01cr@0: offset=0; mas01cr@107: } mas01cr@0: memcpy(l2normTable+offset, l2buf, n*sizeof(double)); mas01cr@0: if(l2buf) mas01cr@23: delete[] l2buf; mas01cr@243: VERB_LOG(2, " done."); mas01cr@195: } mas01cr@195: mas01cr@0: int main(const unsigned argc, char* const argv[]){ mas01cr@0: audioDB(argc, argv); mas01cr@0: }