mas01cr@0: #include "audioDB.h" mas01cr@0: mas01cr@76: audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@151: error("No command found"); mas01cr@0: } mas01cr@77: mas01cr@0: if(O2_ACTION(COM_SERVER)) mas01cr@0: startServer(); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01cr@0: if(isClient) mas01cr@0: ws_query(dbName, inFile, (char*)hostport); mas01cr@0: else mas01cr@76: query(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@266: mas01cr@266: else if(O2_ACTION(COM_SAMPLE)) mas01cr@266: sample(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@193: else if(O2_ACTION(COM_POWER)) mas01cr@193: power_flag(dbName); mas01cr@193: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@133: audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01cr@151: isServer = 1; // FIXME: Hack mas01cr@97: processArgs(argc, argv); mas01cr@97: assert(O2_ACTION(COM_QUERY)); mas01cr@133: query(dbName, inFile, adbQueryResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@133: audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01cr@151: isServer = 1; // FIXME: Hack mas01cr@97: processArgs(argc, argv); mas01cr@97: assert(O2_ACTION(COM_STATUS)); mas01cr@133: status(dbName, adbStatusResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@97: void audioDB::cleanup() { mas01cr@122: cmdline_parser_free(&args_info); mas01cr@0: if(indata) mas01cr@0: munmap(indata,statbuf.st_size); mas01cr@0: if(db) mas01cr@196: munmap(db,getpagesize()); mas01cr@196: if(fileTable) mas01cr@196: munmap(fileTable, fileTableLength); mas01cr@196: if(trackTable) mas01cr@196: munmap(trackTable, trackTableLength); mas01cr@196: if(dataBuf) mas01cr@196: munmap(dataBuf, dataBufLength); mas01cr@196: if(timesTable) mas01cr@196: munmap(timesTable, timesTableLength); mas01cr@196: if(l2normTable) mas01cr@196: munmap(l2normTable, l2normTableLength); mas01cr@196: mas01cr@279: if(rng) mas01cr@279: gsl_rng_free(rng); mas01cr@279: mas01cr@0: if(dbfid>0) mas01cr@0: close(dbfid); mas01cr@0: if(infid>0) mas01cr@0: close(infid); mas01cr@0: if(dbH) mas01cr@0: delete dbH; mas01cr@0: } mas01cr@0: mas01cr@97: audioDB::~audioDB(){ mas01cr@97: cleanup(); mas01cr@97: } mas01cr@97: mas01cr@0: int audioDB::processArgs(const unsigned argc, char* const argv[]){ mas01cr@0: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if (cmdline_parser (argc, argv, &args_info) != 0) mas01cr@151: error("Error parsing command line"); mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@239: verbosity = args_info.verbosity_arg; mas01cr@239: if(verbosity < 0 || verbosity > 10){ mas01cr@239: std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl; mas01cr@239: verbosity = 1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@129: if(args_info.size_given) { mas01cr@256: if(args_info.datasize_given) { mas01cr@256: error("both --size and --datasize given", ""); mas01cr@256: } mas01cr@256: if(args_info.ntracks_given) { mas01cr@256: error("both --size and --ntracks given", ""); mas01cr@256: } mas01cr@256: if(args_info.datadim_given) { mas01cr@256: error("both --size and --datadim given", ""); mas01cr@256: } mas01cr@196: if (args_info.size_arg < 50 || args_info.size_arg > 32000) { mas01cr@129: error("Size out of range", ""); mas01cr@129: } mas01cr@256: double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE); mas01cr@256: /* FIXME: what's the safe way of doing this? */ mas01cr@256: datasize = (unsigned int) ceil(datasize * ratio); mas01cr@256: ntracks = (unsigned int) ceil(ntracks * ratio); mas01cr@256: } else { mas01cr@256: if(args_info.datasize_given) { mas01cr@256: datasize = args_info.datasize_arg; mas01cr@256: } mas01cr@256: if(args_info.ntracks_given) { mas01cr@256: ntracks = args_info.ntracks_arg; mas01cr@256: } mas01cr@256: if(args_info.datadim_given) { mas01cr@256: datadim = args_info.datadim_arg; mas01cr@256: } mas01cr@129: } mas01cr@129: mas01cr@239: if(args_info.radius_given) { mas01cr@239: radius = args_info.radius_arg; mas01cr@239: if(radius <= 0 || radius > 1000000000) { mas01cr@77: error("radius out of range"); mas01cr@239: } else { mas01cr@239: VERB_LOG(3, "Setting radius to %f\n", radius); mas01mc@17: } mas01mc@17: } mas01mc@17: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@151: isServer = 1; mas01cr@105: #if defined(O2_DEBUG) mas01cr@104: struct sigaction sa; mas01cr@104: sa.sa_sigaction = sigterm_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGTERM, &sa, NULL); mas01cr@104: sa.sa_sigaction = sighup_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGHUP, &sa, NULL); mas01cr@105: #endif mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@105: if(args_info.client_given){ mas01cr@105: command=COM_CLIENT; mas01cr@105: hostport=args_info.client_arg; mas01cr@105: isClient=1; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.NEW_given){ mas01cr@105: command=COM_CREATE; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.STATUS_given){ mas01cr@105: command=COM_STATUS; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@266: if(args_info.SAMPLE_given) { mas01cr@266: command = COM_SAMPLE; mas01cr@266: dbName = args_info.database_arg; mas01cr@267: sequenceLength = args_info.sequencelength_arg; mas01cr@267: if(sequenceLength < 1 || sequenceLength > 1000) { mas01cr@267: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@267: } mas01cr@274: nsamples = args_info.nsamples_arg; mas01cr@266: return 0; mas01cr@266: } mas01cr@266: mas01cr@105: if(args_info.DUMP_given){ mas01cr@105: command=COM_DUMP; mas01cr@105: dbName=args_info.database_arg; mas01cr@131: output = args_info.output_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.L2NORM_given){ mas01cr@105: command=COM_L2NORM; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@193: if(args_info.POWER_given){ mas01cr@193: command=COM_POWER; mas01cr@193: dbName=args_info.database_arg; mas01cr@193: return 0; mas01cr@193: } mas01cr@193: mas01cr@105: if(args_info.INSERT_given){ mas01cr@105: command=COM_INSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.features_arg; mas01cr@105: if(args_info.key_given) mas01cr@105: key=args_info.key_arg; mas01cr@105: if(args_info.times_given){ mas01cr@105: timesFileName=args_info.times_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@239: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@193: if (args_info.power_given) { mas01cr@193: powerFileName = args_info.power_arg; mas01cr@193: if (strlen(powerFileName) > 0) { mas01cr@193: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@193: error("Could not open power file for reading", powerFileName, "open"); mas01cr@193: } mas01cr@193: usingPower = 1; mas01cr@193: } mas01cr@193: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: mas01cr@105: if(args_info.BATCHINSERT_given){ mas01cr@105: command=COM_BATCHINSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.featureList_arg; mas01cr@105: if(args_info.keyList_given) mas01cr@105: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@0: mas01cr@105: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01mc@18: trackFileName=args_info.keyList_arg; mas01cr@239: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01mc@18: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@105: */ mas01cr@105: mas01cr@105: if(args_info.timesList_given){ mas01cr@105: timesFileName=args_info.timesList_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@239: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@105: error("Could not open timesList file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@193: if(args_info.powerList_given){ mas01cr@193: powerFileName=args_info.powerList_arg; mas01cr@193: if(strlen(powerFileName)>0){ mas01cr@239: if(!(powerFile = new std::ifstream(powerFileName,std::ios::in))) mas01cr@193: error("Could not open powerList file for reading", powerFileName); mas01cr@193: usingPower=1; mas01cr@193: } mas01cr@193: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: mas01cr@105: // Query command and arguments mas01cr@105: if(args_info.QUERY_given){ mas01cr@105: command=COM_QUERY; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.features_arg; mas01cr@105: mas01cr@105: if(args_info.keyList_given){ mas01cr@105: trackFileName=args_info.keyList_arg; mas01cr@239: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01cr@105: error("Could not open keyList file for reading",trackFileName); mas01cr@105: } mas01cr@105: mas01cr@105: if(args_info.times_given){ mas01cr@105: timesFileName=args_info.times_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@239: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@193: mas01cr@193: if(args_info.power_given){ mas01cr@193: powerFileName=args_info.power_arg; mas01cr@193: if(strlen(powerFileName)>0){ mas01cr@193: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@193: error("Could not open power file for reading", powerFileName, "open"); mas01cr@193: } mas01cr@193: usingPower = 1; mas01cr@193: } mas01cr@193: } mas01cr@105: mas01cr@105: // query type mas01cr@105: if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) mas01cr@105: queryType=O2_TRACK_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@105: queryType=O2_POINT_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@105: queryType=O2_SEQUENCE_QUERY; mas01mc@248: else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0) mas01mc@248: queryType=O2_N_SEQUENCE_QUERY; mas01mc@263: else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0) mas01mc@263: queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY; mas01cr@105: else mas01cr@105: error("unsupported query type",args_info.QUERY_arg); mas01cr@105: mas01cr@105: if(!args_info.exhaustive_flag){ mas01cr@105: queryPoint = args_info.qpoint_arg; mas01cr@105: usingQueryPoint=1; mas01cr@105: if(queryPoint<0 || queryPoint >10000) mas01cr@105: error("queryPoint out of range: 0 <= queryPoint <= 10000"); mas01cr@105: } mas01cr@105: mas01cr@105: pointNN = args_info.pointnn_arg; mas01mc@263: if(pointNN < 1 || pointNN > O2_MAXNN) { mas01mc@263: error("pointNN out of range: 1 <= pointNN <= 1000000"); mas01cr@105: } mas01cr@105: trackNN = args_info.resultlength_arg; mas01mc@263: if(trackNN < 1 || trackNN > O2_MAXNN) { mas01mc@263: error("resultlength out of range: 1 <= resultlength <= 1000000"); mas01cr@105: } mas01cr@105: sequenceLength = args_info.sequencelength_arg; mas01cr@105: if(sequenceLength < 1 || sequenceLength > 1000) { mas01cr@105: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@105: } mas01cr@105: sequenceHop = args_info.sequencehop_arg; mas01cr@105: if(sequenceHop < 1 || sequenceHop > 1000) { mas01cr@105: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01cr@105: } mas01cr@193: mas01cr@193: if (args_info.absolute_threshold_given) { mas01cr@193: if (args_info.absolute_threshold_arg >= 0) { mas01cr@193: error("absolute threshold out of range: should be negative"); mas01cr@193: } mas01cr@193: use_absolute_threshold = true; mas01cr@193: absolute_threshold = args_info.absolute_threshold_arg; mas01cr@193: } mas01cr@193: if (args_info.relative_threshold_given) { mas01cr@193: use_relative_threshold = true; mas01cr@193: relative_threshold = args_info.relative_threshold_arg; mas01cr@193: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@133: void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ mas01cr@0: if(!dbH) mas01cr@196: initTables(dbName, 0); mas01cr@0: mas01cr@0: unsigned dudCount=0; mas01cr@0: unsigned nullCount=0; mas01cr@0: for(unsigned k=0; knumFiles; k++){ mas01mc@18: if(trackTable[k]numFiles << std::endl; mas01cr@239: std::cout << "data dim:" << dbH->dim <dim>0){ mas01cr@239: std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl; mas01cr@76: } mas01cr@239: std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; mas01cr@239: std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" << mas01cr@239: (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl; mas01cr@239: std::cout << "flags:" << dbH->flags << std::endl; mas01cr@76: mas01cr@239: std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl; mas01cr@76: } else { mas01cr@133: adbStatusResponse->result.numFiles = dbH->numFiles; mas01cr@133: adbStatusResponse->result.dim = dbH->dim; mas01cr@133: adbStatusResponse->result.length = dbH->length; mas01cr@133: adbStatusResponse->result.dudCount = dudCount; mas01cr@133: adbStatusResponse->result.nullCount = nullCount; mas01cr@133: adbStatusResponse->result.flags = dbH->flags; mas01cr@76: } mas01cr@0: } mas01cr@0: mas01cr@196: void audioDB::l2norm(const char* dbName) { mas01cr@196: forWrite = true; mas01cr@196: initTables(dbName, 0); mas01cr@0: if(dbH->length>0){ mas01cr@196: /* FIXME: should probably be uint64_t */ mas01cr@0: unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); mas01cr@196: CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); mas01cr@0: unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append mas01cr@0: } mas01cr@0: // Update database flags mas01cr@0: dbH->flags = dbH->flags|O2_FLAG_L2NORM; mas01cr@0: memcpy (db, dbH, O2_HEADERSIZE); mas01cr@0: } mas01cr@193: mas01cr@193: void audioDB::power_flag(const char *dbName) { mas01cr@196: forWrite = true; mas01cr@196: initTables(dbName, 0); mas01cr@193: if (dbH->length > 0) { mas01cr@193: error("cannot turn on power storage for non-empty database", dbName); mas01cr@193: } mas01cr@193: dbH->flags |= O2_FLAG_POWER; mas01cr@193: memcpy(db, dbH, O2_HEADERSIZE); mas01cr@193: } mas01cr@193: mas01cr@239: // Unit norm block of features mas01cr@0: mas01cr@239: /* FIXME: in fact this does not unit norm a block of features, it just mas01cr@239: records the L2 norms somewhere. unitNorm() does in fact unit norm mas01cr@239: a block of features. */ mas01cr@0: void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ mas01cr@0: unsigned d; mas01cr@59: double *p; mas01cr@0: unsigned nn = n; mas01cr@0: mas01cr@0: assert(l2normTable); mas01cr@0: mas01cr@0: if( !append && (dbH->flags & O2_FLAG_L2NORM) ) mas01cr@0: error("Database is already L2 normed", "automatic norm on insert is enabled"); mas01cr@0: mas01cr@239: VERB_LOG(2, "norming %u vectors...", n); mas01cr@0: mas01cr@0: double* l2buf = new double[n]; mas01cr@0: double* l2ptr = l2buf; mas01cr@0: assert(l2buf); mas01cr@0: assert(X); mas01cr@0: mas01cr@0: while(nn--){ mas01cr@0: p=X; mas01cr@0: *l2ptr=0.0; mas01cr@0: d=dim; mas01cr@0: while(d--){ mas01cr@0: *l2ptr+=*p**p; mas01cr@0: p++; mas01cr@0: } mas01mc@17: l2ptr++; mas01mc@17: X+=dim; mas01cr@0: } mas01cr@0: unsigned offset; mas01cr@84: if(append) { mas01cr@84: // FIXME: a hack, a very palpable hack: the vectors have already mas01cr@84: // been inserted, and dbH->length has already been updated. We mas01cr@84: // need to subtract off again the number of vectors that we've mas01cr@84: // inserted this time... mas01cr@84: offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors mas01cr@84: } else { mas01cr@0: offset=0; mas01cr@84: } mas01cr@0: memcpy(l2normTable+offset, l2buf, n*sizeof(double)); mas01cr@0: if(l2buf) mas01mc@17: delete[] l2buf; mas01cr@239: VERB_LOG(2, " done."); mas01cr@193: } mas01cr@193: mas01cr@0: int main(const unsigned argc, char* const argv[]){ mas01cr@0: audioDB(argc, argv); mas01cr@0: }