mas01cr@0: #include "audioDB.h" mas01cr@498: #include "reporter.h" mas01cr@0: mas01cr@697: #include mas01cr@697: mas01mc@324: char* SERVER_ADB_ROOT; mas01mc@324: char* SERVER_ADB_FEATURE_ROOT; mas01mc@308: mas01cr@370: audioDB::audioDB(const unsigned argc, const char *argv[]): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@0: if(processArgs(argc, argv)<0){ mas01cr@0: printf("No command found.\n"); mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@151: error("No command found"); mas01cr@0: } mas01cr@77: mas01mc@324: // Perform database prefix substitution mas01mc@328: if(dbName && adb_root) mas01mc@324: prefix_name((char** const)&dbName, adb_root); mas01mc@324: mas01mc@474: if(O2_ACTION(COM_SERVER)){ mas01cr@0: startServer(); mas01mc@474: } mas01cr@0: else if(O2_ACTION(COM_CREATE)) mas01cr@0: create(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_INSERT)) mas01cr@0: insert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_BATCHINSERT)) mas01cr@0: batchinsert(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_QUERY)) mas01mc@307: if(isClient){ mas01mc@329: if(query_from_key){ mas01mc@332: VERB_LOG(1, "Calling web services query %s on database %s, query=%s\n", radius>0?"(Radius)":"(NN)", dbName, (key&&strlen(key))?key:inFile); mas01mc@328: ws_query_by_key(dbName, key, inFile, (char*)hostport); mas01mc@329: } mas01mc@329: else{ mas01mc@332: VERB_LOG(1, "Calling web services query on database %s, query=%s\n", dbName, (key&&strlen(key))?key:inFile); mas01mc@307: ws_query(dbName, inFile, (char*)hostport); mas01mc@329: } mas01mc@307: } mas01cr@0: else mas01cr@76: query(dbName, inFile); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_STATUS)) mas01cr@0: if(isClient) mas01cr@0: ws_status(dbName,(char*)hostport); mas01cr@0: else mas01cr@0: status(dbName); mas01cr@280: mas01cr@280: else if(O2_ACTION(COM_SAMPLE)) mas01cr@280: sample(dbName); mas01cr@0: mas01cr@0: else if(O2_ACTION(COM_L2NORM)) mas01cr@0: l2norm(dbName); mas01cr@0: mas01cr@193: else if(O2_ACTION(COM_POWER)) mas01cr@193: power_flag(dbName); mas01cr@193: mas01cr@0: else if(O2_ACTION(COM_DUMP)) mas01cr@0: dump(dbName); mas01mc@292: mas01mc@334: else if(O2_ACTION(COM_LISZT)) mas01mc@334: if(isClient) mas01mc@334: ws_liszt(dbName, (char*) hostport); mas01mc@334: else mas01mc@334: liszt(dbName, lisztOffset, lisztLength); mas01mc@334: mas01mc@292: else if(O2_ACTION(COM_INDEX)) mas01mc@292: index_index_db(dbName); mas01cr@0: mas01cr@0: else mas01cr@0: error("Unrecognized command",command); mas01cr@0: } mas01cr@0: mas01cr@508: audioDB::audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01mc@338: isServer = 1; // Set to make errors report over SOAP mas01cr@97: processArgs(argc, argv); mas01mc@324: // Perform database prefix substitution mas01mc@328: if(dbName && adb_root) mas01mc@324: prefix_name((char** const)&dbName, adb_root); mas01cr@97: assert(O2_ACTION(COM_QUERY)); mas01cr@508: query(dbName, inFile, soap, adbQueryResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@370: audioDB::audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS mas01cr@76: { mas01cr@97: try { mas01mc@338: isServer = 1; // Set to make errors report over SOAP mas01cr@97: processArgs(argc, argv); mas01mc@324: // Perform database prefix substitution mas01mc@328: if(dbName && adb_root) mas01mc@324: prefix_name((char** const)&dbName, adb_root); mas01cr@97: assert(O2_ACTION(COM_STATUS)); mas01cr@133: status(dbName, adbStatusResponse); mas01cr@97: } catch(char *err) { mas01cr@97: cleanup(); mas01cr@97: throw(err); mas01cr@97: } mas01cr@76: } mas01cr@76: mas01cr@548: audioDB::audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse): O2_AUDIODB_INITIALIZERS mas01mc@334: { mas01mc@334: try { mas01mc@338: isServer = 1; // Set to make errors report over SOAP mas01mc@338: processArgs(argc, argv); mas01mc@334: // Perform database prefix substitution mas01mc@334: if(dbName && adb_root) mas01mc@334: prefix_name((char** const)&dbName, adb_root); mas01mc@334: assert(O2_ACTION(COM_LISZT)); mas01cr@548: liszt(dbName, lisztOffset, lisztLength, soap, adbLisztResponse); mas01mc@334: } catch(char *err) { mas01mc@334: cleanup(); mas01mc@334: throw(err); mas01mc@334: } mas01mc@334: } mas01mc@334: mas01cr@97: void audioDB::cleanup() { mas01cr@122: cmdline_parser_free(&args_info); mas01cr@196: if(fileTable) mas01cr@196: munmap(fileTable, fileTableLength); mas01cr@196: if(trackTable) mas01cr@196: munmap(trackTable, trackTableLength); mas01cr@196: if(timesTable) mas01cr@196: munmap(timesTable, timesTableLength); mas01mc@314: if(powerTable) mas01mc@314: munmap(powerTable, powerTableLength); mas01cr@196: if(l2normTable) mas01cr@196: munmap(l2normTable, l2normTableLength); mas01mc@324: if(featureFileNameTable) mas01mc@324: munmap(featureFileNameTable, fileTableLength); mas01mc@324: if(timesFileNameTable) mas01mc@324: munmap(timesFileNameTable, fileTableLength); mas01mc@324: if(powerFileNameTable) mas01mc@324: munmap(powerFileNameTable, fileTableLength); mas01mc@292: if(reporter) mas01mc@292: delete reporter; mas01cr@601: if(infid>0) { mas01cr@0: close(infid); mas01cr@601: infid = 0; mas01cr@601: } mas01cr@601: if(powerfd) { mas01cr@601: close(powerfd); mas01cr@601: powerfd = 0; mas01cr@601: } mas01cr@601: if(timesFile) { mas01cr@601: delete timesFile; mas01cr@601: timesFile = 0; mas01cr@601: } mas01cr@498: if(adb) { mas01cr@498: audiodb_close(adb); mas01cr@498: adb = NULL; mas01cr@498: } mas01cr@498: if(lsh) mas01mc@308: delete lsh; mas01cr@0: } mas01cr@0: mas01cr@97: audioDB::~audioDB(){ mas01cr@97: cleanup(); mas01cr@97: } mas01cr@97: mas01cr@370: int audioDB::processArgs(const unsigned argc, const char *argv[]){ mas01cr@0: mas01mj@564: /* KLUDGE: gengetopt generates a function which is not completely mas01mj@564: const-clean in its declaration. We cast argv here to keep the mas01mj@564: compiler happy. -- CSR, 2008-10-08 */ mas01cr@655: if (cmdline_parser (argc, (char **) argv, &args_info) != 0) mas01mj@564: error("Error parsing command line"); mas01mj@564: mas01cr@0: if(argc<2){ mas01cr@0: cmdline_parser_print_version (); mas01cr@0: if (strlen(gengetopt_args_info_purpose) > 0) mas01cr@0: printf("%s\n", gengetopt_args_info_purpose); mas01cr@0: printf("%s\n", gengetopt_args_info_usage); mas01cr@0: printf("%s\n", gengetopt_args_info_help[1]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[2]); mas01cr@0: printf("%s\n", gengetopt_args_info_help[0]); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.help_given){ mas01cr@0: cmdline_parser_print_help(); mas01cr@0: exit(0); mas01cr@0: } mas01cr@0: mas01cr@0: if(args_info.verbosity_given){ mas01cr@239: verbosity = args_info.verbosity_arg; mas01cr@239: if(verbosity < 0 || verbosity > 10){ mas01cr@239: std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl; mas01cr@239: verbosity = 1; mas01cr@0: } mas01cr@0: } mas01cr@0: mas01cr@129: if(args_info.size_given) { mas01cr@256: if(args_info.datasize_given) { mas01cr@256: error("both --size and --datasize given", ""); mas01cr@256: } mas01cr@256: if(args_info.ntracks_given) { mas01cr@256: error("both --size and --ntracks given", ""); mas01cr@256: } mas01cr@256: if(args_info.datadim_given) { mas01cr@256: error("both --size and --datadim given", ""); mas01cr@256: } mas01cr@196: if (args_info.size_arg < 50 || args_info.size_arg > 32000) { mas01cr@129: error("Size out of range", ""); mas01cr@129: } mas01cr@256: double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE); mas01cr@256: /* FIXME: what's the safe way of doing this? */ mas01cr@256: datasize = (unsigned int) ceil(datasize * ratio); mas01cr@256: ntracks = (unsigned int) ceil(ntracks * ratio); mas01cr@256: } else { mas01cr@256: if(args_info.datasize_given) { mas01cr@256: datasize = args_info.datasize_arg; mas01cr@256: } mas01cr@256: if(args_info.ntracks_given) { mas01cr@256: ntracks = args_info.ntracks_arg; mas01cr@256: } mas01cr@256: if(args_info.datadim_given) { mas01cr@256: datadim = args_info.datadim_arg; mas01cr@256: } mas01cr@129: } mas01cr@129: mas01cr@239: if(args_info.radius_given) { mas01cr@239: radius = args_info.radius_arg; mas01mc@307: if(radius < 0 || radius > 1000000000) { mas01cr@77: error("radius out of range"); mas01cr@239: } else { mas01cr@239: VERB_LOG(3, "Setting radius to %f\n", radius); mas01mc@17: } mas01mc@17: } mas01mc@17: mas01mc@292: sequenceLength = args_info.sequencelength_arg; mas01mc@292: if(sequenceLength < 1 || sequenceLength > 1000) { mas01mc@292: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01mc@292: } mas01mc@292: sequenceHop = args_info.sequencehop_arg; mas01mc@292: if(sequenceHop < 1 || sequenceHop > 1000) { mas01mc@292: error("seqhop out of range: 1 <= seqhop <= 1000"); mas01mc@292: } mas01mc@292: mas01mc@292: if (args_info.absolute_threshold_given) { mas01mc@292: if (args_info.absolute_threshold_arg >= 0) { mas01mc@292: error("absolute threshold out of range: should be negative"); mas01mc@292: } mas01mc@292: use_absolute_threshold = true; mas01mc@292: absolute_threshold = args_info.absolute_threshold_arg; mas01mc@292: } mas01mc@292: if (args_info.relative_threshold_given) { mas01mc@292: use_relative_threshold = true; mas01mc@292: relative_threshold = args_info.relative_threshold_arg; mas01mc@292: } mas01mc@292: mas01mc@324: if (args_info.adb_root_given){ mas01mc@324: adb_root = args_info.adb_root_arg; mas01mc@324: } mas01mc@324: mas01mc@324: if (args_info.adb_feature_root_given){ mas01mc@324: adb_feature_root = args_info.adb_feature_root_arg; mas01mc@324: } mas01mc@324: mas01mc@324: // perform dbName path prefix SERVER-side subsitution mas01mc@324: if(SERVER_ADB_ROOT && !adb_root) mas01mc@324: adb_root = SERVER_ADB_ROOT; mas01mc@324: if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root) mas01mc@324: adb_feature_root = SERVER_ADB_FEATURE_ROOT; mas01mc@339: mas01cr@0: if(args_info.SERVER_given){ mas01cr@0: command=COM_SERVER; mas01cr@0: port=args_info.SERVER_arg; mas01cr@0: if(port<100 || port > 100000) mas01cr@0: error("port out of range"); mas01cr@105: #if defined(O2_DEBUG) mas01cr@104: struct sigaction sa; mas01cr@104: sa.sa_sigaction = sigterm_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGTERM, &sa, NULL); mas01cr@104: sa.sa_sigaction = sighup_action; mas01cr@104: sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; mas01cr@104: sigaction(SIGHUP, &sa, NULL); mas01cr@105: #endif mas01mc@308: if(args_info.load_index_given){ mas01mc@308: if(!args_info.database_given) mas01mc@308: error("load_index requires a --database argument"); mas01mc@308: else mas01mc@308: dbName=args_info.database_arg; mas01mc@308: if(!args_info.radius_given) mas01mc@308: error("load_index requires a --radius argument"); mas01mc@308: if(!args_info.sequencelength_given) mas01mc@308: error("load_index requires a --sequenceLength argument"); mas01mc@308: WS_load_index = true; mas01mc@308: } mas01cr@0: return 0; mas01cr@0: } mas01cr@0: mas01cr@0: // No return on client command, find database command mas01cr@105: if(args_info.client_given){ mas01cr@105: command=COM_CLIENT; mas01cr@105: hostport=args_info.client_arg; mas01cr@105: isClient=1; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.NEW_given){ mas01cr@105: command=COM_CREATE; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.STATUS_given){ mas01cr@105: command=COM_STATUS; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@280: if(args_info.SAMPLE_given) { mas01cr@280: command = COM_SAMPLE; mas01cr@280: dbName = args_info.database_arg; mas01cr@280: sequenceLength = args_info.sequencelength_arg; mas01cr@280: if(sequenceLength < 1 || sequenceLength > 1000) { mas01cr@280: error("seqlen out of range: 1 <= seqlen <= 1000"); mas01cr@280: } mas01cr@659: if(args_info.nsamples_given) { mas01cr@659: nsamples = args_info.nsamples_arg; mas01cr@659: } else if(args_info.resultlength_given) { mas01cr@659: nsamples = args_info.resultlength_arg; mas01cr@659: } else { mas01cr@659: nsamples = args_info.nsamples_arg; mas01cr@659: } mas01cr@659: if(args_info.key_given) { mas01cr@659: query_from_key = true; mas01cr@659: key = args_info.key_arg; mas01cr@697: } else if (args_info.features_given) { mas01cr@697: inFile = args_info.features_arg; mas01cr@659: } mas01cr@696: if(!args_info.exhaustive_flag){ mas01cr@696: queryPoint = args_info.qpoint_arg; mas01cr@696: usingQueryPoint=1; mas01cr@696: if(queryPoint<0 || queryPoint >O2_MAX_VECTORS) mas01cr@696: error("queryPoint out of range: 0 <= queryPoint <= O2_MAX_VECTORS"); mas01cr@696: } mas01cr@696: mas01cr@659: mas01cr@280: return 0; mas01cr@280: } mas01cr@280: mas01cr@105: if(args_info.DUMP_given){ mas01cr@105: command=COM_DUMP; mas01cr@105: dbName=args_info.database_arg; mas01cr@131: output = args_info.output_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@105: if(args_info.L2NORM_given){ mas01cr@105: command=COM_L2NORM; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: return 0; mas01cr@105: } mas01cr@0: mas01cr@193: if(args_info.POWER_given){ mas01cr@193: command=COM_POWER; mas01cr@193: dbName=args_info.database_arg; mas01cr@193: return 0; mas01cr@193: } mas01cr@193: mas01cr@370: if(args_info.INSERT_given) { mas01cr@105: command=COM_INSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.features_arg; mas01cr@370: if(args_info.key_given) { mas01cr@370: if(!args_info.features_given) { mas01mc@292: error("INSERT: '-k key' argument depends on '-f features'"); mas01cr@370: } else { mas01mc@292: key=args_info.key_arg; mas01cr@370: } mas01cr@370: } mas01cr@370: if(args_info.times_given) { mas01cr@105: timesFileName=args_info.times_arg; mas01cr@370: if(strlen(timesFileName)>0) { mas01cr@370: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) { mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@370: } mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@193: if (args_info.power_given) { mas01cr@193: powerFileName = args_info.power_arg; mas01cr@193: if (strlen(powerFileName) > 0) { mas01cr@193: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@193: error("Could not open power file for reading", powerFileName, "open"); mas01cr@193: } mas01cr@193: usingPower = 1; mas01cr@193: } mas01cr@193: } mas01cr@105: return 0; mas01cr@105: } mas01cr@105: mas01cr@370: if(args_info.BATCHINSERT_given) { mas01cr@105: command=COM_BATCHINSERT; mas01cr@105: dbName=args_info.database_arg; mas01cr@105: inFile=args_info.featureList_arg; mas01cr@370: if(args_info.keyList_given) { mas01cr@370: if(!args_info.featureList_given) { mas01tc@300: error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'"); mas01cr@370: } else { mas01cr@304: key=args_info.keyList_arg; // INCONSISTENT NO CHECK mas01cr@370: } mas01cr@370: } mas01cr@105: /* TO DO: REPLACE WITH mas01cr@0: if(args_info.keyList_given){ mas01mc@18: trackFileName=args_info.keyList_arg; mas01cr@239: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01mc@18: error("Could not open keyList file for reading",trackFileName); mas01cr@0: } mas01cr@0: AND UPDATE BATCHINSERT() mas01cr@105: */ mas01cr@105: mas01cr@370: if(args_info.timesList_given) { mas01cr@105: timesFileName=args_info.timesList_arg; mas01cr@370: if(strlen(timesFileName)>0) { mas01cr@239: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@105: error("Could not open timesList file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@370: if(args_info.powerList_given) { mas01cr@193: powerFileName=args_info.powerList_arg; mas01cr@370: if(strlen(powerFileName)>0) { mas01cr@239: if(!(powerFile = new std::ifstream(powerFileName,std::ios::in))) mas01cr@193: error("Could not open powerList file for reading", powerFileName); mas01cr@193: usingPower=1; mas01cr@193: } mas01cr@193: } mas01cr@105: return 0; mas01cr@105: } mas01mc@292: mas01mc@292: // Set no_unit_norm flag mas01mc@768: distance_kullback = args_info.distance_kullback_flag; mas01mc@292: no_unit_norming = args_info.no_unit_norming_flag; mas01mc@292: lsh_use_u_functions = args_info.lsh_use_u_functions_flag; mas01mc@292: mas01mc@292: // LSH Index Command mas01mc@292: if(args_info.INDEX_given){ mas01mc@292: if(radius <= 0 ) mas01mc@292: error("INDEXing requires a Radius argument"); mas01mc@292: if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN)) mas01mc@292: error("INDEXing requires 1 <= sequenceLength <= 1000"); mas01mc@292: command=COM_INDEX; mas01mc@337: if(!args_info.database_given) mas01mc@337: error("INDEXing requires a database"); mas01mc@292: dbName=args_info.database_arg; mas01mc@292: mas01mc@292: // Whether to store LSH hash tables for query in core (FORMAT2) mas01mc@297: lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested mas01mc@292: mas01mc@292: lsh_param_w = args_info.lsh_w_arg; mas01mc@292: if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH)) mas01mc@292: error("Indexing parameter w out of range (0.0 < w <= 100.0)"); mas01mc@292: mas01mc@292: lsh_param_k = args_info.lsh_k_arg; mas01mc@292: if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS)) mas01mc@292: error("Indexing parameter k out of range (1 <= k <= 100)"); mas01mc@292: mas01mc@292: lsh_param_m = args_info.lsh_m_arg; mas01mc@292: if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0)) mas01mc@292: error("Indexing parameter m out of range (1 <= m <= 20)"); mas01mc@292: mas01mc@292: lsh_param_N = args_info.lsh_N_arg; mas01mc@292: if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS)) mas01mc@292: error("Indexing parameter N out of range (1 <= N <= 1000000)"); mas01mc@292: mas01mc@292: lsh_param_b = args_info.lsh_b_arg; mas01mc@292: if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH)) mas01mc@292: error("Indexing parameter b out of range (1 <= b <= 10000)"); mas01mc@292: mas01mc@296: lsh_param_ncols = args_info.lsh_ncols_arg; mas01mc@296: if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing mas01mc@296: lsh_param_ncols = O2_SERIAL_MAX_COLS; mas01mc@292: if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS)) mas01mc@292: error("Indexing parameter ncols out of range (1 <= ncols <= 1000"); mas01mc@292: mas01mc@292: return 0; mas01mc@292: } mas01mc@292: mas01cr@105: // Query command and arguments mas01cr@105: if(args_info.QUERY_given){ mas01cr@105: command=COM_QUERY; mas01cr@105: dbName=args_info.database_arg; mas01mc@292: // XOR features and key search mas01cr@370: if((!args_info.features_given && !args_info.key_given) || (args_info.features_given && args_info.key_given)) mas01mc@292: error("QUERY requires exactly one of either -f features or -k key"); mas01mc@292: if(args_info.features_given) mas01mc@292: inFile=args_info.features_arg; // query from file mas01mc@292: else{ mas01mc@292: query_from_key = true; mas01mc@292: key=args_info.key_arg; // query from key mas01mc@292: } mas01mc@292: mas01cr@105: if(args_info.keyList_given){ mas01cr@105: trackFileName=args_info.keyList_arg; mas01cr@239: if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in))) mas01cr@105: error("Could not open keyList file for reading",trackFileName); mas01cr@105: } mas01cr@105: mas01cr@105: if(args_info.times_given){ mas01cr@105: timesFileName=args_info.times_arg; mas01cr@105: if(strlen(timesFileName)>0){ mas01cr@239: if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) mas01cr@105: error("Could not open times file for reading", timesFileName); mas01cr@105: usingTimes=1; mas01cr@105: } mas01cr@105: } mas01cr@193: mas01cr@193: if(args_info.power_given){ mas01cr@193: powerFileName=args_info.power_arg; mas01cr@193: if(strlen(powerFileName)>0){ mas01cr@193: if (!(powerfd = open(powerFileName, O_RDONLY))) { mas01cr@193: error("Could not open power file for reading", powerFileName, "open"); mas01cr@193: } mas01cr@193: usingPower = 1; mas01cr@193: } mas01cr@193: } mas01cr@105: mas01cr@105: // query type mas01cr@105: if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0) mas01cr@105: queryType=O2_TRACK_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0) mas01cr@105: queryType=O2_POINT_QUERY; mas01cr@105: else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0) mas01cr@105: queryType=O2_SEQUENCE_QUERY; mas01mc@248: else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0) mas01mc@248: queryType=O2_N_SEQUENCE_QUERY; mas01mc@263: else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0) mas01mc@263: queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY; mas01cr@105: else mas01cr@105: error("unsupported query type",args_info.QUERY_arg); mas01cr@105: mas01cr@105: if(!args_info.exhaustive_flag){ mas01cr@105: queryPoint = args_info.qpoint_arg; mas01cr@105: usingQueryPoint=1; mas01mc@467: if(queryPoint<0 || queryPoint >O2_MAX_VECTORS) mas01mc@467: error("queryPoint out of range: 0 <= queryPoint <= O2_MAX_VECTORS"); mas01cr@105: } mas01mc@292: mas01mc@296: // Whether to pre-load LSH hash tables for query (default on, if flag set then off) mas01mc@297: lsh_in_core = !args_info.lsh_on_disk_flag; mas01mc@292: mas01mc@292: // Whether to perform exact evaluation of points returned by LSH mas01mc@292: lsh_exact = args_info.lsh_exact_flag; mas01mc@292: mas01cr@105: pointNN = args_info.pointnn_arg; mas01mc@263: if(pointNN < 1 || pointNN > O2_MAXNN) { mas01mc@263: error("pointNN out of range: 1 <= pointNN <= 1000000"); mas01cr@105: } mas01cr@105: trackNN = args_info.resultlength_arg; mas01mc@263: if(trackNN < 1 || trackNN > O2_MAXNN) { mas01mc@263: error("resultlength out of range: 1 <= resultlength <= 1000000"); mas01cr@105: } mas01cr@105: return 0; mas01cr@105: } mas01mc@334: mas01mc@334: if(args_info.LISZT_given){ mas01mc@334: command = COM_LISZT; mas01mc@334: dbName=args_info.database_arg; mas01mc@334: lisztOffset = args_info.lisztOffset_arg; mas01mc@334: lisztLength = args_info.lisztLength_arg; mas01mc@334: if(args_info.lisztOffset_arg<0) // check upper bound later when database is opened mas01mc@334: error("lisztOffset cannot be negative"); mas01mc@334: if(args_info.lisztLength_arg<0) mas01mc@334: error("lisztLength cannot be negative"); mas01mc@334: if(lisztLength >1000000) mas01mc@334: error("lisztLength too large (>1000000)"); mas01mc@334: return 0; mas01mc@334: } mas01mc@334: mas01cr@105: return -1; // no command found mas01cr@0: } mas01cr@0: mas01cr@133: void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){ mas01cr@498: adb_status_t status; mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDONLY))) { mas01cr@498: error("Failed to open database file", dbName); mas01cr@0: } mas01cr@0: } mas01cr@498: if(audiodb_status(adb, &status)) { mas01cr@498: error("Failed to retrieve database status", dbName); mas01cr@498: } mas01cr@76: mas01cr@133: if(adbStatusResponse == 0) { mas01cr@498: std::cout << "num files:" << status.numFiles << std::endl; mas01cr@498: std::cout << "data dim:" << status.dim < 0) { mas01cr@498: size_t bytes_per_vector = sizeof(double) * status.dim; mas01cr@498: off_t nvectors = status.length / bytes_per_vector; mas01cr@498: off_t data_region_vectors = status.data_region_size / bytes_per_vector; mas01cr@498: std::cout << "total vectors:" << nvectors << std::endl; mas01cr@498: std::cout << "vectors available:"; mas01cr@498: if(status.flags & O2_FLAG_LARGE_ADB) { mas01cr@498: std::cout << O2_MAX_VECTORS - nvectors << std::endl; mas01cr@498: } else { mas01cr@498: std::cout << data_region_vectors - nvectors << std::endl; mas01cr@498: } mas01cr@76: } mas01cr@498: if(!(status.flags & O2_FLAG_LARGE_ADB)) { mas01cr@498: double used_frac = ((double) status.length) / status.data_region_size; mas01cr@498: std::cout << "total bytes:" << status.length << mas01cr@498: " (" << (100.0*used_frac) << "%)" << std::endl; mas01cr@498: std::cout << "bytes available:" << status.data_region_size - status.length << mas01cr@498: " (" << (100.0*(1-used_frac)) << "%)" << std::endl; mas01mc@324: } mas01cr@498: std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(status.flags&O2_FLAG_L2NORM) mas01cr@498: << "] minmax[" << DISPLAY_FLAG(status.flags&O2_FLAG_MINMAX) mas01cr@498: << "] power[" << DISPLAY_FLAG(status.flags&O2_FLAG_POWER) mas01cr@498: << "] times[" << DISPLAY_FLAG(status.flags&O2_FLAG_TIMES) mas01cr@498: << "] largeADB[" << DISPLAY_FLAG(status.flags&O2_FLAG_LARGE_ADB) mas01mc@324: << "]" << endl; mas01mc@324: mas01cr@498: std::cout << "null count: " << status.nullCount << " small sequence count " << status.dudCount-status.nullCount << std::endl; mas01cr@76: } else { mas01cr@498: adbStatusResponse->result.numFiles = status.numFiles; mas01cr@498: adbStatusResponse->result.dim = status.dim; mas01cr@498: adbStatusResponse->result.length = status.length; mas01cr@498: adbStatusResponse->result.dudCount = status.dudCount; mas01cr@498: adbStatusResponse->result.nullCount = status.nullCount; mas01cr@498: adbStatusResponse->result.flags = status.flags; mas01cr@76: } mas01cr@0: } mas01cr@0: mas01cr@498: void audioDB::l2norm(const char* dbName) { mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDWR))) { mas01cr@498: error("Failed to open database file", dbName); mas01ik@355: } mas01ik@355: } mas01cr@498: if(audiodb_l2norm(adb)) { mas01cr@498: error("failed to turn on l2norm flag for database", dbName); mas01cr@0: } mas01cr@0: } mas01cr@193: mas01cr@193: void audioDB::power_flag(const char *dbName) { mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDWR))) { mas01cr@498: error("Failed to open database file", dbName); mas01cr@498: } mas01cr@193: } mas01cr@498: if(audiodb_power(adb)) { mas01cr@498: error("can't turn on power flag for database", dbName); mas01cr@498: } mas01cr@193: } mas01cr@193: mas01cr@498: void audioDB::create(const char *dbName) { mas01cr@498: if(adb) { mas01cr@498: error("Already have an adb in this object", ""); mas01cr@498: } mas01cr@498: if(!(adb = audiodb_create(dbName, datasize, ntracks, datadim))) { mas01cr@498: error("Failed to create database file", dbName); mas01cr@498: } mas01cr@498: } mas01cr@0: mas01cr@498: void audioDB::dump(const char *dbName) { mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDONLY))) { mas01cr@498: error("Failed to open database file", dbName); mas01cr@498: } mas01cr@498: } mas01cr@498: if(audiodb_dump(adb, output)) { mas01cr@498: error("Failed to dump database to ", output); mas01cr@498: } mas01cr@498: status(dbName); mas01cr@498: } mas01cr@0: mas01cr@498: void audioDB::insert(const char* dbName, const char* inFile) { mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDWR))) { mas01cr@498: error("failed to open database", dbName); mas01cr@498: } mas01cr@498: } mas01cr@0: mas01cr@498: /* at this point, we have powerfd (an fd), timesFile (a mas01cr@498: * std::ifstream *) and inFile (a char *). Wacky, huh? Ignore mas01cr@498: * the wackiness and just use the names. */ mas01cr@498: adb_insert_t insert; mas01cr@498: insert.features = inFile; mas01cr@498: insert.times = timesFileName; mas01cr@498: insert.power = powerFileName; mas01cr@498: insert.key = key; mas01cr@0: mas01cr@498: if(audiodb_insert(adb, &insert)) { mas01cr@498: error("insertion failure", inFile); mas01cr@498: } mas01cr@498: status(dbName); mas01cr@498: } mas01cr@0: mas01cr@498: void audioDB::batchinsert(const char* dbName, const char* inFile) { mas01cr@498: if(!adb) { mas01cr@498: if(!(adb = audiodb_open(dbName, O_RDWR))) { mas01cr@498: error("failed to open database", dbName); mas01cr@498: } mas01cr@498: } mas01cr@0: mas01cr@498: if(!key) mas01cr@498: key=inFile; mas01cr@498: std::ifstream *filesIn = 0; mas01cr@498: std::ifstream *keysIn = 0; mas01cr@498: mas01cr@498: if(!(filesIn = new std::ifstream(inFile))) mas01cr@498: error("Could not open batch in file", inFile); mas01cr@498: if(key && key!=inFile) mas01cr@498: if(!(keysIn = new std::ifstream(key))) mas01cr@498: error("Could not open batch key file",key); mas01cr@498: mas01cr@498: unsigned totalVectors=0; mas01cr@498: char *thisFile = new char[MAXSTR]; mas01cr@498: char *thisKey = 0; mas01cr@498: if (key && (key != inFile)) { mas01cr@498: thisKey = new char[MAXSTR]; mas01cr@498: } mas01cr@498: char *thisTimesFileName = new char[MAXSTR]; mas01cr@498: char *thisPowerFileName = new char[MAXSTR]; mas01cr@498: mas01cr@498: do { mas01cr@498: filesIn->getline(thisFile,MAXSTR); mas01cr@498: if(key && key!=inFile) { mas01cr@498: keysIn->getline(thisKey,MAXSTR); mas01cr@498: } else { mas01cr@498: thisKey = thisFile; mas01cr@0: } mas01cr@498: if(usingTimes) { mas01cr@498: timesFile->getline(thisTimesFileName,MAXSTR); mas01cr@498: } mas01cr@498: if(usingPower) { mas01cr@498: powerFile->getline(thisPowerFileName, MAXSTR); mas01cr@498: } mas01cr@498: mas01cr@498: if(filesIn->eof()) { mas01cr@498: break; mas01cr@498: } mas01cr@498: if(usingTimes){ mas01cr@498: if(timesFile->eof()) { mas01cr@498: error("not enough timestamp files in timesList", timesFileName); mas01cr@498: } mas01cr@498: } mas01cr@498: if (usingPower) { mas01cr@498: if(powerFile->eof()) { mas01cr@498: error("not enough power files in powerList", powerFileName); mas01cr@498: } mas01cr@498: } mas01cr@498: adb_insert_t insert; mas01cr@498: insert.features = thisFile; mas01cr@498: insert.times = usingTimes ? thisTimesFileName : NULL; mas01cr@498: insert.power = usingPower ? thisPowerFileName : NULL; mas01cr@498: insert.key = thisKey; mas01cr@498: if(audiodb_insert(adb, &insert)) { mas01cr@498: error("insertion failure", thisFile); mas01cr@498: } mas01cr@498: } while(!filesIn->eof()); mas01cr@498: mas01mc@537: VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * adb->header->dim * sizeof(double))); mas01cr@498: mas01cr@498: delete [] thisPowerFileName; mas01cr@498: if(key && (key != inFile)) { mas01cr@498: delete [] thisKey; mas01cr@0: } mas01cr@498: delete [] thisFile; mas01cr@498: delete [] thisTimesFileName; mas01cr@498: mas01cr@498: delete filesIn; mas01cr@498: delete keysIn; mas01cr@498: mas01cr@498: // Report status mas01cr@498: status(dbName); mas01cr@498: } mas01cr@498: mas01cr@697: void audioDB::datumFromFiles(adb_datum_t *datum) { mas01cr@697: int fd; mas01cr@697: struct stat st; mas01cr@697: mas01cr@697: /* FIXME: around here error conditions will cause all sorts of mas01cr@697: hideous leaks. */ mas01cr@697: fd = open(inFile, O_RDONLY); mas01cr@697: if(fd < 0) { mas01cr@697: error("failed to open feature file", inFile); mas01cr@697: } mas01cr@697: fstat(fd, &st); mas01cr@697: read(fd, &(datum->dim), sizeof(uint32_t)); mas01cr@697: datum->nvectors = (st.st_size - sizeof(uint32_t)) / (datum->dim * sizeof(double)); mas01cr@697: datum->data = (double *) malloc(st.st_size - sizeof(uint32_t)); mas01cr@697: read(fd, datum->data, st.st_size - sizeof(uint32_t)); mas01cr@697: close(fd); mas01cr@697: if(usingPower) { mas01cr@697: uint32_t one; mas01cr@697: fd = open(powerFileName, O_RDONLY); mas01cr@697: if(fd < 0) { mas01cr@697: error("failed to open power file", powerFileName); mas01cr@697: } mas01cr@697: read(fd, &one, sizeof(uint32_t)); mas01cr@697: if(one != 1) { mas01cr@697: error("malformed power file dimensionality", powerFileName); mas01cr@697: } mas01cr@697: datum->power = (double *) malloc(datum->nvectors * sizeof(double)); mas01cr@697: if(read(fd, datum->power, datum->nvectors * sizeof(double)) != (ssize_t) (datum->nvectors * sizeof(double))) { mas01cr@697: error("malformed power file", powerFileName); mas01cr@697: } mas01cr@697: close(fd); mas01cr@697: } mas01cr@697: if(usingTimes) { mas01cr@697: datum->times = (double *) malloc(2 * datum->nvectors * sizeof(double)); mas01cr@697: insertTimeStamps(datum->nvectors, timesFile, datum->times); mas01cr@697: } mas01cr@697: } mas01cr@697: mas01cr@508: void audioDB::query(const char* dbName, const char* inFile, struct soap *soap, adb__queryResponse *adbQueryResponse) { mas01cr@498: mas01cr@498: if(!adb) { mas01cr@515: if(!(adb = audiodb_open(dbName, O_RDONLY))) { mas01cr@498: error("failed to open database", dbName); mas01cr@498: } mas01cr@498: } mas01cr@498: mas01cr@498: /* FIXME: we only need this for getting nfiles, which we only need mas01cr@498: * because the reporters aren't desperately well implemented, mas01cr@498: * relying on statically-sized vectors rather than adjustable data mas01cr@498: * structures. Rework reporter.h to be less lame. */ mas01cr@498: adb_status_t status; mas01cr@498: audiodb_status(adb, &status); mas01cr@498: uint32_t nfiles = status.numFiles; mas01cr@498: mas01cr@498: adb_query_spec_t qspec; mas01cr@498: adb_datum_t datum = {0}; mas01cr@498: mas01cr@498: qspec.refine.flags = 0; mas01cr@498: if(trackFile) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_INCLUDE_KEYLIST; mas01cr@498: std::vector v; mas01cr@498: char *k = new char[MAXSTR]; mas01cr@498: trackFile->getline(k, MAXSTR); mas01cr@498: while(!trackFile->eof()) { mas01cr@498: v.push_back(k); mas01cr@498: k = new char[MAXSTR]; mas01cr@498: trackFile->getline(k, MAXSTR); mas01cr@498: } mas01cr@498: delete [] k; mas01cr@498: qspec.refine.include.nkeys = v.size(); mas01cr@498: qspec.refine.include.keys = new const char *[qspec.refine.include.nkeys]; mas01cr@498: for(unsigned int k = 0; k < qspec.refine.include.nkeys; k++) { mas01cr@498: qspec.refine.include.keys[k] = v[k]; mas01cr@498: } mas01cr@498: } mas01cr@498: if(query_from_key) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST; mas01cr@498: qspec.refine.exclude.nkeys = 1; mas01cr@498: qspec.refine.exclude.keys = &key; mas01cr@498: } mas01cr@498: if(radius) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_RADIUS; mas01cr@498: qspec.refine.radius = radius; mas01cr@498: } mas01cr@498: if(use_absolute_threshold) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD; mas01cr@498: qspec.refine.absolute_threshold = absolute_threshold; mas01cr@498: } mas01cr@498: if(use_relative_threshold) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD; mas01cr@498: qspec.refine.relative_threshold = relative_threshold; mas01cr@498: } mas01cr@498: if(usingTimes) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_DURATION_RATIO; mas01cr@498: qspec.refine.duration_ratio = timesTol; mas01cr@498: } mas01cr@675: mas01cr@675: qspec.refine.qhopsize = sequenceHop; mas01cr@675: qspec.refine.ihopsize = sequenceHop; mas01cr@498: if(sequenceHop != 1) { mas01cr@498: qspec.refine.flags |= ADB_REFINE_HOP_SIZE; mas01cr@498: } mas01cr@498: mas01cr@498: if(query_from_key) { mas01cr@498: datum.key = key; mas01cr@84: } else { mas01cr@697: datumFromFiles(&datum); mas01cr@84: } mas01cr@498: mas01cr@498: qspec.qid.datum = &datum; mas01cr@498: qspec.qid.sequence_length = sequenceLength; mas01cr@498: qspec.qid.flags = 0; mas01cr@498: qspec.qid.flags |= usingQueryPoint ? 0 : ADB_QID_FLAG_EXHAUSTIVE; mas01cr@498: qspec.qid.flags |= lsh_exact ? 0 : ADB_QID_FLAG_ALLOW_FALSE_POSITIVES; mas01cr@498: qspec.qid.sequence_start = queryPoint; mas01cr@498: mas01cr@498: switch(queryType) { mas01cr@498: case O2_POINT_QUERY: mas01cr@498: qspec.qid.sequence_length = 1; mas01cr@498: qspec.params.accumulation = ADB_ACCUMULATION_DB; mas01cr@498: qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT; mas01cr@498: qspec.params.npoints = pointNN; mas01cr@498: qspec.params.ntracks = 0; mas01cr@498: reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN); mas01cr@498: break; mas01cr@498: case O2_TRACK_QUERY: mas01cr@498: qspec.qid.sequence_length = 1; mas01cr@498: qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK; mas01cr@498: qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT; mas01cr@498: qspec.params.npoints = pointNN; mas01cr@498: qspec.params.ntracks = trackNN; mas01cr@498: reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, nfiles); mas01cr@498: break; mas01cr@498: case O2_SEQUENCE_QUERY: mas01cr@498: case O2_N_SEQUENCE_QUERY: mas01cr@498: qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK; mas01mc@768: if (distance_kullback) mas01mc@768: qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; mas01mc@768: else mas01mc@768: qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; mas01cr@498: qspec.params.npoints = pointNN; mas01cr@498: qspec.params.ntracks = trackNN; mas01cr@498: switch(queryType) { mas01cr@498: case O2_SEQUENCE_QUERY: mas01cr@498: if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { mas01cr@498: reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, nfiles); mas01cr@498: } else { mas01cr@498: reporter = new trackSequenceQueryRadReporter(trackNN, nfiles); mas01cr@498: } mas01cr@498: break; mas01cr@498: case O2_N_SEQUENCE_QUERY: mas01cr@498: if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { mas01cr@498: reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, nfiles); mas01cr@498: } else { mas01cr@498: reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, nfiles); mas01cr@498: } mas01cr@498: break; mas01cr@498: } mas01cr@498: break; mas01cr@498: case O2_ONE_TO_ONE_N_SEQUENCE_QUERY: mas01cr@498: qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; mas01mc@768: if (distance_kullback) mas01mc@768: qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; mas01mc@768: else mas01mc@768: qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; mas01cr@498: qspec.params.npoints = 0; mas01cr@498: qspec.params.ntracks = 0; mas01cr@498: if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) { mas01cr@498: error("query-type not yet supported"); mas01cr@498: } else { mas01mc@537: reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, adb->header->numFiles); mas01cr@498: } mas01cr@498: break; mas01cr@498: default: mas01cr@498: error("unrecognized queryType"); mas01cr@498: } mas01cr@498: mas01cr@498: adb_query_results_t *rs = audiodb_query_spec(adb, &qspec); mas01cr@498: mas01cr@507: // FIXME: we don't yet free everything up if there are error mas01cr@507: // conditions during the construction of the query spec (including mas01cr@507: // the datum itself). mas01cr@507: if(datum.data) { mas01cr@507: free(datum.data); mas01cr@507: datum.data = NULL; mas01cr@507: } mas01cr@507: if(datum.power) { mas01cr@577: free(datum.power); mas01cr@577: datum.power = NULL; mas01cr@507: } mas01cr@507: if(datum.times) { mas01cr@577: free(datum.times); mas01cr@577: datum.times = NULL; mas01cr@507: } mas01cr@498: mas01cr@498: if(rs == NULL) { mas01cr@498: error("audiodb_query_spec failed"); mas01cr@498: } mas01cr@498: mas01cr@498: for(unsigned int k = 0; k < rs->nresults; k++) { mas01cr@498: adb_result_t r = rs->results[k]; mas01cr@672: reporter->add_point(audiodb_key_index(adb, r.ikey), r.qpos, r.ipos, r.dist); mas01cr@498: } mas01cr@498: audiodb_query_free_results(adb, &qspec, rs); mas01cr@498: mas01cr@508: reporter->report(adb, soap, adbQueryResponse); mas01cr@193: } mas01cr@193: mas01cr@548: void audioDB::liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap, adb__lisztResponse* adbLisztResponse) { mas01cr@548: if(!adb) { mas01cr@548: if(!(adb = audiodb_open(dbName, O_RDONLY))) { mas01cr@548: error("failed to open database", dbName); mas01cr@548: } mas01cr@548: } mas01cr@548: mas01cr@548: adb_liszt_results_t *results = audiodb_liszt(adb); mas01cr@548: if(!results) { mas01cr@548: error("audiodb_liszt() failed"); mas01cr@548: } mas01cr@548: mas01cr@548: if(offset > results->nresults) { mas01cr@548: audiodb_liszt_free_results(adb, results); mas01cr@548: error("listKeys offset out of range"); mas01cr@548: } mas01cr@548: mas01cr@548: if(!adbLisztResponse){ mas01cr@548: for(uint32_t k = 0; k < numLines && offset + k < results->nresults; k++) { mas01cr@548: uint32_t index = offset + k; mas01cr@548: printf("[%d] %s (%d)\n", index, results->entries[index].key, results->entries[index].nvectors); mas01cr@548: } mas01cr@548: } else { mas01cr@548: adbLisztResponse->result.Rkey = (char **) soap_malloc(soap, numLines * sizeof(char *)); mas01cr@548: adbLisztResponse->result.Rlen = (unsigned int *) soap_malloc(soap, numLines * sizeof(unsigned int)); mas01cr@548: uint32_t k; mas01cr@548: for(k = 0; k < numLines && offset + k < results->nresults; k++) { mas01cr@548: uint32_t index = offset + k; mas01cr@548: adbLisztResponse->result.Rkey[k] = (char *) soap_malloc(soap, O2_MAXFILESTR); mas01cr@548: snprintf(adbLisztResponse->result.Rkey[k], O2_MAXFILESTR, "%s", results->entries[index].key); mas01cr@548: adbLisztResponse->result.Rlen[k] = results->entries[index].nvectors; mas01cr@548: } mas01cr@548: adbLisztResponse->result.__sizeRkey = k; mas01cr@548: adbLisztResponse->result.__sizeRlen = k; mas01cr@548: } mas01cr@548: audiodb_liszt_free_results(adb, results); mas01cr@548: } mas01cr@548: mas01cr@693: static mas01cr@693: double yfun(double d) { mas01cr@693: return gsl_sf_log(d) - gsl_sf_psi(d); mas01cr@693: } mas01cr@693: mas01cr@693: static mas01cr@693: double yinv(double y) { mas01cr@693: double a = 1.0e-5; mas01cr@693: double b = 1000.0; mas01cr@693: mas01cr@693: double ay = yfun(a); mas01cr@693: double by = yfun(b); mas01cr@693: mas01cr@693: double c = 0; mas01cr@693: double cy; mas01cr@693: mas01cr@693: /* FIXME: simple binary search; there's probably some clever solver mas01cr@693: in gsl somewhere which is less sucky. */ mas01cr@693: while ((b - a) > 1.0e-5) { mas01cr@693: c = (a + b) / 2; mas01cr@693: cy = yfun(c); mas01cr@693: if (cy > y) { mas01cr@693: a = c; mas01cr@693: ay = cy; mas01cr@693: } else { mas01cr@693: b = c; mas01cr@693: by = cy; mas01cr@693: } mas01cr@693: } mas01cr@693: mas01cr@693: return c; mas01cr@693: } mas01cr@693: mas01cr@693: void audioDB::sample(const char *dbName) { mas01cr@693: if(!adb) { mas01cr@693: if(!(adb = audiodb_open(dbName, O_RDONLY))) { mas01cr@693: error("failed to open database", dbName); mas01cr@693: } mas01cr@693: } mas01cr@693: mas01cr@693: adb_status_t status; mas01cr@693: if(audiodb_status(adb, &status)) { mas01cr@693: error("error getting status"); mas01cr@693: } mas01cr@693: mas01cr@693: double sumdist = 0; mas01cr@693: double sumlogdist = 0; mas01cr@693: mas01cr@693: adb_query_results_t *results; mas01cr@693: adb_query_spec_t spec = {{0},{0},{0}}; mas01cr@693: adb_datum_t datum = {0}; mas01cr@693: mas01cr@693: spec.refine.qhopsize = sequenceHop; mas01cr@693: spec.refine.ihopsize = sequenceHop; mas01cr@693: if(sequenceHop != 1) { mas01cr@693: spec.refine.flags |= ADB_REFINE_HOP_SIZE; mas01cr@693: } mas01cr@693: mas01cr@693: if(query_from_key) { mas01cr@693: datum.key = key; mas01cr@693: spec.qid.datum = &datum; mas01cr@693: spec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST; mas01cr@693: spec.refine.exclude.nkeys = 1; mas01cr@693: spec.refine.exclude.keys = &key; mas01cr@693: } else if(inFile) { mas01cr@697: datumFromFiles(&datum); mas01cr@697: spec.qid.datum = &datum; mas01cr@693: } else { mas01cr@693: spec.qid.datum = NULL; /* full db sample */ mas01cr@693: } mas01cr@693: spec.qid.sequence_length = sequenceLength; mas01cr@693: spec.qid.flags |= usingQueryPoint ? 0 : ADB_QID_FLAG_EXHAUSTIVE; mas01cr@693: spec.qid.sequence_start = queryPoint; mas01mc@768: if (distance_kullback) mas01mc@768: spec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE; mas01mc@768: else mas01mc@768: spec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED; mas01cr@693: spec.params.accumulation = ADB_ACCUMULATION_DB; mas01cr@693: spec.params.npoints = nsamples; mas01cr@693: mas01cr@693: if(!(results = audiodb_sample_spec(adb, &spec))) { mas01cr@693: error("error in audiodb_sample_spec"); mas01cr@693: } mas01cr@693: mas01cr@697: if(datum.data) { mas01cr@697: free(datum.data); mas01cr@697: datum.data = NULL; mas01cr@697: } mas01cr@697: if(datum.power) { mas01cr@697: free(datum.power); mas01cr@697: datum.power = NULL; mas01cr@697: } mas01cr@697: if(datum.times) { mas01cr@697: free(datum.times); mas01cr@697: datum.times = NULL; mas01cr@697: } mas01cr@697: mas01cr@693: if(results->nresults != nsamples) { mas01cr@693: error("mismatch in sample count"); mas01cr@693: } mas01cr@693: mas01cr@693: for(uint32_t i = 0; i < nsamples; i++) { mas01cr@693: double d = results->results[i].dist; mas01cr@693: sumdist += d; mas01cr@693: sumlogdist += log(d); mas01cr@693: } mas01cr@693: mas01cr@693: audiodb_query_free_results(adb, &spec, results); mas01cr@693: mas01cr@693: unsigned total = 0; mas01cr@693: unsigned count = 0; mas01cr@693: adb_liszt_results_t *liszt; mas01cr@693: if(!(liszt = audiodb_liszt(adb))) { mas01cr@693: error("liszt failed"); mas01cr@693: } mas01cr@693: for(uint32_t i = 0; i < liszt->nresults; i++) { mas01cr@767: int prop = (liszt->entries[i].nvectors - sequenceLength)/sequenceHop + 1; mas01cr@693: prop = prop > 0 ? prop : 0; mas01cr@693: if (prop > 0) { mas01cr@693: count++; mas01cr@693: } mas01cr@693: total += prop; mas01cr@693: } mas01cr@695: audiodb_liszt_free_results(adb, liszt); mas01cr@693: mas01cr@693: /* FIXME: the mean isn't really what we should be using here; it's mas01cr@693: more a question of "how many independent sequences of length mas01cr@693: sequenceLength are there in the database? */ mas01cr@693: unsigned meanN = total / count; mas01cr@693: mas01cr@693: double sigma2 = sumdist / (sequenceLength * status.dim * nsamples); mas01cr@693: double d = 2 * yinv(log(sumdist/nsamples) - sumlogdist/nsamples); mas01cr@693: mas01cr@693: std::cout << "Summary statistics" << std::endl; mas01cr@693: std::cout << "number of samples: " << nsamples << std::endl; mas01cr@693: std::cout << "sum of distances (S): " << sumdist << std::endl; mas01cr@693: std::cout << "sum of log distances (L): " << sumlogdist << std::endl; mas01cr@693: mas01cr@693: /* FIXME: we'll also want some more summary statistics based on mas01cr@693: propTable, for the minimum-of-X estimate */ mas01cr@693: std::cout << "mean number of applicable sequences (N): " << meanN << std::endl; mas01cr@693: std::cout << std::endl; mas01cr@693: std::cout << "Estimated parameters" << std::endl; mas01cr@693: std::cout << "sigma^2: " << sigma2 << "; "; mas01cr@693: std::cout << "Msigma^2: " << sumdist / nsamples << std::endl; mas01cr@693: std::cout << "d: " << d << std::endl; mas01cr@693: mas01cr@693: double logw = (2 / d) * gsl_sf_log(-gsl_sf_log(0.99)); mas01cr@693: double logxthresh = gsl_sf_log(sumdist / nsamples) + logw mas01cr@693: - (2 / d) * gsl_sf_log(meanN) mas01cr@693: - gsl_sf_log(d/2) mas01cr@693: - (2 / d) * gsl_sf_log(2 / d) mas01cr@693: + (2 / d) * gsl_sf_lngamma(d / 2); mas01cr@693: mas01cr@693: std::cout << "track xthresh: " << exp(logxthresh) << std::endl; mas01cr@693: } mas01cr@693: mas01cr@693: mas01mc@308: // This entry point is visited once per instance mas01mc@308: // so it is a good place to set any global state variables mas01cr@370: int main(const int argc, const char* argv[]){ mas01mc@324: SERVER_ADB_ROOT = 0; // Server-side database root prefix mas01mc@324: SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix mas01cr@0: audioDB(argc, argv); mas01cr@0: }