annotate audioDB.cpp @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents b9dbe4611dde
children
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@498 2 #include "reporter.h"
mas01cr@0 3
mas01cr@697 4 #include <gsl/gsl_sf.h>
mas01cr@697 5
mas01mc@324 6 char* SERVER_ADB_ROOT;
mas01mc@324 7 char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 8
mas01cr@370 9 audioDB::audioDB(const unsigned argc, const char *argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 10 {
mas01cr@0 11 if(processArgs(argc, argv)<0){
mas01cr@0 12 printf("No command found.\n");
mas01cr@0 13 cmdline_parser_print_version ();
mas01cr@0 14 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 15 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 16 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 17 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 18 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 19 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 20 error("No command found");
mas01cr@0 21 }
mas01cr@77 22
mas01mc@324 23 // Perform database prefix substitution
mas01mc@328 24 if(dbName && adb_root)
mas01mc@324 25 prefix_name((char** const)&dbName, adb_root);
mas01mc@324 26
mas01mc@474 27 if(O2_ACTION(COM_SERVER)){
mas01cr@0 28 startServer();
mas01mc@474 29 }
mas01cr@0 30 else if(O2_ACTION(COM_CREATE))
mas01cr@0 31 create(dbName);
mas01cr@0 32
mas01cr@0 33 else if(O2_ACTION(COM_INSERT))
mas01cr@0 34 insert(dbName, inFile);
mas01cr@0 35
mas01cr@0 36 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 37 batchinsert(dbName, inFile);
mas01cr@0 38
mas01cr@0 39 else if(O2_ACTION(COM_QUERY))
mas01mc@307 40 if(isClient){
mas01mc@329 41 if(query_from_key){
mas01mc@332 42 VERB_LOG(1, "Calling web services query %s on database %s, query=%s\n", radius>0?"(Radius)":"(NN)", dbName, (key&&strlen(key))?key:inFile);
mas01mc@328 43 ws_query_by_key(dbName, key, inFile, (char*)hostport);
mas01mc@329 44 }
mas01mc@329 45 else{
mas01mc@332 46 VERB_LOG(1, "Calling web services query on database %s, query=%s\n", dbName, (key&&strlen(key))?key:inFile);
mas01mc@307 47 ws_query(dbName, inFile, (char*)hostport);
mas01mc@329 48 }
mas01mc@307 49 }
mas01cr@0 50 else
mas01cr@76 51 query(dbName, inFile);
mas01cr@0 52
mas01cr@0 53 else if(O2_ACTION(COM_STATUS))
mas01cr@0 54 if(isClient)
mas01cr@0 55 ws_status(dbName,(char*)hostport);
mas01cr@0 56 else
mas01cr@0 57 status(dbName);
mas01cr@280 58
mas01cr@280 59 else if(O2_ACTION(COM_SAMPLE))
mas01cr@280 60 sample(dbName);
mas01cr@0 61
mas01cr@0 62 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 63 l2norm(dbName);
mas01cr@0 64
mas01cr@193 65 else if(O2_ACTION(COM_POWER))
mas01cr@193 66 power_flag(dbName);
mas01cr@193 67
mas01cr@0 68 else if(O2_ACTION(COM_DUMP))
mas01cr@0 69 dump(dbName);
mas01mc@292 70
mas01mc@334 71 else if(O2_ACTION(COM_LISZT))
mas01mc@334 72 if(isClient)
mas01mc@334 73 ws_liszt(dbName, (char*) hostport);
mas01mc@334 74 else
mas01mc@334 75 liszt(dbName, lisztOffset, lisztLength);
mas01mc@334 76
mas01mc@292 77 else if(O2_ACTION(COM_INDEX))
mas01mc@292 78 index_index_db(dbName);
mas01cr@0 79
mas01cr@0 80 else
mas01cr@0 81 error("Unrecognized command",command);
mas01cr@0 82 }
mas01cr@0 83
mas01cr@508 84 audioDB::audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 85 {
mas01cr@97 86 try {
mas01mc@338 87 isServer = 1; // Set to make errors report over SOAP
mas01cr@97 88 processArgs(argc, argv);
mas01mc@324 89 // Perform database prefix substitution
mas01mc@328 90 if(dbName && adb_root)
mas01mc@324 91 prefix_name((char** const)&dbName, adb_root);
mas01cr@97 92 assert(O2_ACTION(COM_QUERY));
mas01cr@508 93 query(dbName, inFile, soap, adbQueryResponse);
mas01cr@97 94 } catch(char *err) {
mas01cr@97 95 cleanup();
mas01cr@97 96 throw(err);
mas01cr@97 97 }
mas01cr@76 98 }
mas01cr@76 99
mas01cr@370 100 audioDB::audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 101 {
mas01cr@97 102 try {
mas01mc@338 103 isServer = 1; // Set to make errors report over SOAP
mas01cr@97 104 processArgs(argc, argv);
mas01mc@324 105 // Perform database prefix substitution
mas01mc@328 106 if(dbName && adb_root)
mas01mc@324 107 prefix_name((char** const)&dbName, adb_root);
mas01cr@97 108 assert(O2_ACTION(COM_STATUS));
mas01cr@133 109 status(dbName, adbStatusResponse);
mas01cr@97 110 } catch(char *err) {
mas01cr@97 111 cleanup();
mas01cr@97 112 throw(err);
mas01cr@97 113 }
mas01cr@76 114 }
mas01cr@76 115
mas01cr@548 116 audioDB::audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse): O2_AUDIODB_INITIALIZERS
mas01mc@334 117 {
mas01mc@334 118 try {
mas01mc@338 119 isServer = 1; // Set to make errors report over SOAP
mas01mc@338 120 processArgs(argc, argv);
mas01mc@334 121 // Perform database prefix substitution
mas01mc@334 122 if(dbName && adb_root)
mas01mc@334 123 prefix_name((char** const)&dbName, adb_root);
mas01mc@334 124 assert(O2_ACTION(COM_LISZT));
mas01cr@548 125 liszt(dbName, lisztOffset, lisztLength, soap, adbLisztResponse);
mas01mc@334 126 } catch(char *err) {
mas01mc@334 127 cleanup();
mas01mc@334 128 throw(err);
mas01mc@334 129 }
mas01mc@334 130 }
mas01mc@334 131
mas01cr@97 132 void audioDB::cleanup() {
mas01cr@122 133 cmdline_parser_free(&args_info);
mas01cr@196 134 if(fileTable)
mas01cr@196 135 munmap(fileTable, fileTableLength);
mas01cr@196 136 if(trackTable)
mas01cr@196 137 munmap(trackTable, trackTableLength);
mas01cr@196 138 if(timesTable)
mas01cr@196 139 munmap(timesTable, timesTableLength);
mas01mc@314 140 if(powerTable)
mas01mc@314 141 munmap(powerTable, powerTableLength);
mas01cr@196 142 if(l2normTable)
mas01cr@196 143 munmap(l2normTable, l2normTableLength);
mas01mc@324 144 if(featureFileNameTable)
mas01mc@324 145 munmap(featureFileNameTable, fileTableLength);
mas01mc@324 146 if(timesFileNameTable)
mas01mc@324 147 munmap(timesFileNameTable, fileTableLength);
mas01mc@324 148 if(powerFileNameTable)
mas01mc@324 149 munmap(powerFileNameTable, fileTableLength);
mas01mc@292 150 if(reporter)
mas01mc@292 151 delete reporter;
mas01cr@601 152 if(infid>0) {
mas01cr@0 153 close(infid);
mas01cr@601 154 infid = 0;
mas01cr@601 155 }
mas01cr@601 156 if(powerfd) {
mas01cr@601 157 close(powerfd);
mas01cr@601 158 powerfd = 0;
mas01cr@601 159 }
mas01cr@601 160 if(timesFile) {
mas01cr@601 161 delete timesFile;
mas01cr@601 162 timesFile = 0;
mas01cr@601 163 }
mas01cr@498 164 if(adb) {
mas01cr@498 165 audiodb_close(adb);
mas01cr@498 166 adb = NULL;
mas01cr@498 167 }
mas01cr@498 168 if(lsh)
mas01mc@308 169 delete lsh;
mas01cr@0 170 }
mas01cr@0 171
mas01cr@97 172 audioDB::~audioDB(){
mas01cr@97 173 cleanup();
mas01cr@97 174 }
mas01cr@97 175
mas01cr@370 176 int audioDB::processArgs(const unsigned argc, const char *argv[]){
mas01cr@0 177
mas01mj@564 178 /* KLUDGE: gengetopt generates a function which is not completely
mas01mj@564 179 const-clean in its declaration. We cast argv here to keep the
mas01mj@564 180 compiler happy. -- CSR, 2008-10-08 */
mas01cr@655 181 if (cmdline_parser (argc, (char **) argv, &args_info) != 0)
mas01mj@564 182 error("Error parsing command line");
mas01mj@564 183
mas01cr@0 184 if(argc<2){
mas01cr@0 185 cmdline_parser_print_version ();
mas01cr@0 186 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 187 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 188 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 189 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 190 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 191 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 192 exit(0);
mas01cr@0 193 }
mas01cr@0 194
mas01cr@0 195 if(args_info.help_given){
mas01cr@0 196 cmdline_parser_print_help();
mas01cr@0 197 exit(0);
mas01cr@0 198 }
mas01cr@0 199
mas01cr@0 200 if(args_info.verbosity_given){
mas01cr@239 201 verbosity = args_info.verbosity_arg;
mas01cr@239 202 if(verbosity < 0 || verbosity > 10){
mas01cr@239 203 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
mas01cr@239 204 verbosity = 1;
mas01cr@0 205 }
mas01cr@0 206 }
mas01cr@0 207
mas01cr@129 208 if(args_info.size_given) {
mas01cr@256 209 if(args_info.datasize_given) {
mas01cr@256 210 error("both --size and --datasize given", "");
mas01cr@256 211 }
mas01cr@256 212 if(args_info.ntracks_given) {
mas01cr@256 213 error("both --size and --ntracks given", "");
mas01cr@256 214 }
mas01cr@256 215 if(args_info.datadim_given) {
mas01cr@256 216 error("both --size and --datadim given", "");
mas01cr@256 217 }
mas01cr@196 218 if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
mas01cr@129 219 error("Size out of range", "");
mas01cr@129 220 }
mas01cr@256 221 double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
mas01cr@256 222 /* FIXME: what's the safe way of doing this? */
mas01cr@256 223 datasize = (unsigned int) ceil(datasize * ratio);
mas01cr@256 224 ntracks = (unsigned int) ceil(ntracks * ratio);
mas01cr@256 225 } else {
mas01cr@256 226 if(args_info.datasize_given) {
mas01cr@256 227 datasize = args_info.datasize_arg;
mas01cr@256 228 }
mas01cr@256 229 if(args_info.ntracks_given) {
mas01cr@256 230 ntracks = args_info.ntracks_arg;
mas01cr@256 231 }
mas01cr@256 232 if(args_info.datadim_given) {
mas01cr@256 233 datadim = args_info.datadim_arg;
mas01cr@256 234 }
mas01cr@129 235 }
mas01cr@129 236
mas01cr@239 237 if(args_info.radius_given) {
mas01cr@239 238 radius = args_info.radius_arg;
mas01mc@307 239 if(radius < 0 || radius > 1000000000) {
mas01cr@77 240 error("radius out of range");
mas01cr@239 241 } else {
mas01cr@239 242 VERB_LOG(3, "Setting radius to %f\n", radius);
mas01mc@17 243 }
mas01mc@17 244 }
mas01mc@17 245
mas01mc@292 246 sequenceLength = args_info.sequencelength_arg;
mas01mc@292 247 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01mc@292 248 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01mc@292 249 }
mas01mc@292 250 sequenceHop = args_info.sequencehop_arg;
mas01mc@292 251 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01mc@292 252 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01mc@292 253 }
mas01mc@292 254
mas01mc@292 255 if (args_info.absolute_threshold_given) {
mas01mc@292 256 if (args_info.absolute_threshold_arg >= 0) {
mas01mc@292 257 error("absolute threshold out of range: should be negative");
mas01mc@292 258 }
mas01mc@292 259 use_absolute_threshold = true;
mas01mc@292 260 absolute_threshold = args_info.absolute_threshold_arg;
mas01mc@292 261 }
mas01mc@292 262 if (args_info.relative_threshold_given) {
mas01mc@292 263 use_relative_threshold = true;
mas01mc@292 264 relative_threshold = args_info.relative_threshold_arg;
mas01mc@292 265 }
mas01mc@292 266
mas01mc@324 267 if (args_info.adb_root_given){
mas01mc@324 268 adb_root = args_info.adb_root_arg;
mas01mc@324 269 }
mas01mc@324 270
mas01mc@324 271 if (args_info.adb_feature_root_given){
mas01mc@324 272 adb_feature_root = args_info.adb_feature_root_arg;
mas01mc@324 273 }
mas01mc@324 274
mas01mc@324 275 // perform dbName path prefix SERVER-side subsitution
mas01mc@324 276 if(SERVER_ADB_ROOT && !adb_root)
mas01mc@324 277 adb_root = SERVER_ADB_ROOT;
mas01mc@324 278 if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root)
mas01mc@324 279 adb_feature_root = SERVER_ADB_FEATURE_ROOT;
mas01mc@339 280
mas01cr@0 281 if(args_info.SERVER_given){
mas01cr@0 282 command=COM_SERVER;
mas01cr@0 283 port=args_info.SERVER_arg;
mas01cr@0 284 if(port<100 || port > 100000)
mas01cr@0 285 error("port out of range");
mas01cr@105 286 #if defined(O2_DEBUG)
mas01cr@104 287 struct sigaction sa;
mas01cr@104 288 sa.sa_sigaction = sigterm_action;
mas01cr@104 289 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 290 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 291 sa.sa_sigaction = sighup_action;
mas01cr@104 292 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 293 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 294 #endif
mas01mc@308 295 if(args_info.load_index_given){
mas01mc@308 296 if(!args_info.database_given)
mas01mc@308 297 error("load_index requires a --database argument");
mas01mc@308 298 else
mas01mc@308 299 dbName=args_info.database_arg;
mas01mc@308 300 if(!args_info.radius_given)
mas01mc@308 301 error("load_index requires a --radius argument");
mas01mc@308 302 if(!args_info.sequencelength_given)
mas01mc@308 303 error("load_index requires a --sequenceLength argument");
mas01mc@308 304 WS_load_index = true;
mas01mc@308 305 }
mas01cr@0 306 return 0;
mas01cr@0 307 }
mas01cr@0 308
mas01cr@0 309 // No return on client command, find database command
mas01cr@105 310 if(args_info.client_given){
mas01cr@105 311 command=COM_CLIENT;
mas01cr@105 312 hostport=args_info.client_arg;
mas01cr@105 313 isClient=1;
mas01cr@105 314 }
mas01cr@0 315
mas01cr@105 316 if(args_info.NEW_given){
mas01cr@105 317 command=COM_CREATE;
mas01cr@105 318 dbName=args_info.database_arg;
mas01cr@105 319 return 0;
mas01cr@105 320 }
mas01cr@0 321
mas01cr@105 322 if(args_info.STATUS_given){
mas01cr@105 323 command=COM_STATUS;
mas01cr@105 324 dbName=args_info.database_arg;
mas01cr@105 325 return 0;
mas01cr@105 326 }
mas01cr@0 327
mas01cr@280 328 if(args_info.SAMPLE_given) {
mas01cr@280 329 command = COM_SAMPLE;
mas01cr@280 330 dbName = args_info.database_arg;
mas01cr@280 331 sequenceLength = args_info.sequencelength_arg;
mas01cr@280 332 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@280 333 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@280 334 }
mas01cr@659 335 if(args_info.nsamples_given) {
mas01cr@659 336 nsamples = args_info.nsamples_arg;
mas01cr@659 337 } else if(args_info.resultlength_given) {
mas01cr@659 338 nsamples = args_info.resultlength_arg;
mas01cr@659 339 } else {
mas01cr@659 340 nsamples = args_info.nsamples_arg;
mas01cr@659 341 }
mas01cr@659 342 if(args_info.key_given) {
mas01cr@659 343 query_from_key = true;
mas01cr@659 344 key = args_info.key_arg;
mas01cr@697 345 } else if (args_info.features_given) {
mas01cr@697 346 inFile = args_info.features_arg;
mas01cr@659 347 }
mas01cr@696 348 if(!args_info.exhaustive_flag){
mas01cr@696 349 queryPoint = args_info.qpoint_arg;
mas01cr@696 350 usingQueryPoint=1;
mas01cr@696 351 if(queryPoint<0 || queryPoint >O2_MAX_VECTORS)
mas01cr@696 352 error("queryPoint out of range: 0 <= queryPoint <= O2_MAX_VECTORS");
mas01cr@696 353 }
mas01cr@696 354
mas01cr@659 355
mas01cr@280 356 return 0;
mas01cr@280 357 }
mas01cr@280 358
mas01cr@105 359 if(args_info.DUMP_given){
mas01cr@105 360 command=COM_DUMP;
mas01cr@105 361 dbName=args_info.database_arg;
mas01cr@131 362 output = args_info.output_arg;
mas01cr@105 363 return 0;
mas01cr@105 364 }
mas01cr@0 365
mas01cr@105 366 if(args_info.L2NORM_given){
mas01cr@105 367 command=COM_L2NORM;
mas01cr@105 368 dbName=args_info.database_arg;
mas01cr@105 369 return 0;
mas01cr@105 370 }
mas01cr@0 371
mas01cr@193 372 if(args_info.POWER_given){
mas01cr@193 373 command=COM_POWER;
mas01cr@193 374 dbName=args_info.database_arg;
mas01cr@193 375 return 0;
mas01cr@193 376 }
mas01cr@193 377
mas01cr@370 378 if(args_info.INSERT_given) {
mas01cr@105 379 command=COM_INSERT;
mas01cr@105 380 dbName=args_info.database_arg;
mas01cr@105 381 inFile=args_info.features_arg;
mas01cr@370 382 if(args_info.key_given) {
mas01cr@370 383 if(!args_info.features_given) {
mas01mc@292 384 error("INSERT: '-k key' argument depends on '-f features'");
mas01cr@370 385 } else {
mas01mc@292 386 key=args_info.key_arg;
mas01cr@370 387 }
mas01cr@370 388 }
mas01cr@370 389 if(args_info.times_given) {
mas01cr@105 390 timesFileName=args_info.times_arg;
mas01cr@370 391 if(strlen(timesFileName)>0) {
mas01cr@370 392 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in))) {
mas01cr@105 393 error("Could not open times file for reading", timesFileName);
mas01cr@370 394 }
mas01cr@105 395 usingTimes=1;
mas01cr@105 396 }
mas01cr@105 397 }
mas01cr@193 398 if (args_info.power_given) {
mas01cr@193 399 powerFileName = args_info.power_arg;
mas01cr@193 400 if (strlen(powerFileName) > 0) {
mas01cr@193 401 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 402 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 403 }
mas01cr@193 404 usingPower = 1;
mas01cr@193 405 }
mas01cr@193 406 }
mas01cr@105 407 return 0;
mas01cr@105 408 }
mas01cr@105 409
mas01cr@370 410 if(args_info.BATCHINSERT_given) {
mas01cr@105 411 command=COM_BATCHINSERT;
mas01cr@105 412 dbName=args_info.database_arg;
mas01cr@105 413 inFile=args_info.featureList_arg;
mas01cr@370 414 if(args_info.keyList_given) {
mas01cr@370 415 if(!args_info.featureList_given) {
mas01tc@300 416 error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'");
mas01cr@370 417 } else {
mas01cr@304 418 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@370 419 }
mas01cr@370 420 }
mas01cr@105 421 /* TO DO: REPLACE WITH
mas01cr@0 422 if(args_info.keyList_given){
mas01mc@18 423 trackFileName=args_info.keyList_arg;
mas01cr@239 424 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01mc@18 425 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 426 }
mas01cr@0 427 AND UPDATE BATCHINSERT()
mas01cr@105 428 */
mas01cr@105 429
mas01cr@370 430 if(args_info.timesList_given) {
mas01cr@105 431 timesFileName=args_info.timesList_arg;
mas01cr@370 432 if(strlen(timesFileName)>0) {
mas01cr@239 433 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 434 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 435 usingTimes=1;
mas01cr@105 436 }
mas01cr@105 437 }
mas01cr@370 438 if(args_info.powerList_given) {
mas01cr@193 439 powerFileName=args_info.powerList_arg;
mas01cr@370 440 if(strlen(powerFileName)>0) {
mas01cr@239 441 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
mas01cr@193 442 error("Could not open powerList file for reading", powerFileName);
mas01cr@193 443 usingPower=1;
mas01cr@193 444 }
mas01cr@193 445 }
mas01cr@105 446 return 0;
mas01cr@105 447 }
mas01mc@292 448
mas01mc@292 449 // Set no_unit_norm flag
mas01mc@768 450 distance_kullback = args_info.distance_kullback_flag;
mas01mc@292 451 no_unit_norming = args_info.no_unit_norming_flag;
mas01mc@292 452 lsh_use_u_functions = args_info.lsh_use_u_functions_flag;
mas01mc@292 453
mas01mc@292 454 // LSH Index Command
mas01mc@292 455 if(args_info.INDEX_given){
mas01mc@292 456 if(radius <= 0 )
mas01mc@292 457 error("INDEXing requires a Radius argument");
mas01mc@292 458 if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN))
mas01mc@292 459 error("INDEXing requires 1 <= sequenceLength <= 1000");
mas01mc@292 460 command=COM_INDEX;
mas01mc@337 461 if(!args_info.database_given)
mas01mc@337 462 error("INDEXing requires a database");
mas01mc@292 463 dbName=args_info.database_arg;
mas01mc@292 464
mas01mc@292 465 // Whether to store LSH hash tables for query in core (FORMAT2)
mas01mc@297 466 lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested
mas01mc@292 467
mas01mc@292 468 lsh_param_w = args_info.lsh_w_arg;
mas01mc@292 469 if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH))
mas01mc@292 470 error("Indexing parameter w out of range (0.0 < w <= 100.0)");
mas01mc@292 471
mas01mc@292 472 lsh_param_k = args_info.lsh_k_arg;
mas01mc@292 473 if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS))
mas01mc@292 474 error("Indexing parameter k out of range (1 <= k <= 100)");
mas01mc@292 475
mas01mc@292 476 lsh_param_m = args_info.lsh_m_arg;
mas01mc@292 477 if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0))
mas01mc@292 478 error("Indexing parameter m out of range (1 <= m <= 20)");
mas01mc@292 479
mas01mc@292 480 lsh_param_N = args_info.lsh_N_arg;
mas01mc@292 481 if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS))
mas01mc@292 482 error("Indexing parameter N out of range (1 <= N <= 1000000)");
mas01mc@292 483
mas01mc@292 484 lsh_param_b = args_info.lsh_b_arg;
mas01mc@292 485 if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH))
mas01mc@292 486 error("Indexing parameter b out of range (1 <= b <= 10000)");
mas01mc@292 487
mas01mc@296 488 lsh_param_ncols = args_info.lsh_ncols_arg;
mas01mc@296 489 if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing
mas01mc@296 490 lsh_param_ncols = O2_SERIAL_MAX_COLS;
mas01mc@292 491 if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS))
mas01mc@292 492 error("Indexing parameter ncols out of range (1 <= ncols <= 1000");
mas01mc@292 493
mas01mc@292 494 return 0;
mas01mc@292 495 }
mas01mc@292 496
mas01cr@105 497 // Query command and arguments
mas01cr@105 498 if(args_info.QUERY_given){
mas01cr@105 499 command=COM_QUERY;
mas01cr@105 500 dbName=args_info.database_arg;
mas01mc@292 501 // XOR features and key search
mas01cr@370 502 if((!args_info.features_given && !args_info.key_given) || (args_info.features_given && args_info.key_given))
mas01mc@292 503 error("QUERY requires exactly one of either -f features or -k key");
mas01mc@292 504 if(args_info.features_given)
mas01mc@292 505 inFile=args_info.features_arg; // query from file
mas01mc@292 506 else{
mas01mc@292 507 query_from_key = true;
mas01mc@292 508 key=args_info.key_arg; // query from key
mas01mc@292 509 }
mas01mc@292 510
mas01cr@105 511 if(args_info.keyList_given){
mas01cr@105 512 trackFileName=args_info.keyList_arg;
mas01cr@239 513 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01cr@105 514 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 515 }
mas01cr@105 516
mas01cr@105 517 if(args_info.times_given){
mas01cr@105 518 timesFileName=args_info.times_arg;
mas01cr@105 519 if(strlen(timesFileName)>0){
mas01cr@239 520 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 521 error("Could not open times file for reading", timesFileName);
mas01cr@105 522 usingTimes=1;
mas01cr@105 523 }
mas01cr@105 524 }
mas01cr@193 525
mas01cr@193 526 if(args_info.power_given){
mas01cr@193 527 powerFileName=args_info.power_arg;
mas01cr@193 528 if(strlen(powerFileName)>0){
mas01cr@193 529 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 530 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 531 }
mas01cr@193 532 usingPower = 1;
mas01cr@193 533 }
mas01cr@193 534 }
mas01cr@105 535
mas01cr@105 536 // query type
mas01cr@105 537 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 538 queryType=O2_TRACK_QUERY;
mas01cr@105 539 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 540 queryType=O2_POINT_QUERY;
mas01cr@105 541 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 542 queryType=O2_SEQUENCE_QUERY;
mas01mc@248 543 else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
mas01mc@248 544 queryType=O2_N_SEQUENCE_QUERY;
mas01mc@263 545 else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
mas01mc@263 546 queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
mas01cr@105 547 else
mas01cr@105 548 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 549
mas01cr@105 550 if(!args_info.exhaustive_flag){
mas01cr@105 551 queryPoint = args_info.qpoint_arg;
mas01cr@105 552 usingQueryPoint=1;
mas01mc@467 553 if(queryPoint<0 || queryPoint >O2_MAX_VECTORS)
mas01mc@467 554 error("queryPoint out of range: 0 <= queryPoint <= O2_MAX_VECTORS");
mas01cr@105 555 }
mas01mc@292 556
mas01mc@296 557 // Whether to pre-load LSH hash tables for query (default on, if flag set then off)
mas01mc@297 558 lsh_in_core = !args_info.lsh_on_disk_flag;
mas01mc@292 559
mas01mc@292 560 // Whether to perform exact evaluation of points returned by LSH
mas01mc@292 561 lsh_exact = args_info.lsh_exact_flag;
mas01mc@292 562
mas01cr@105 563 pointNN = args_info.pointnn_arg;
mas01mc@263 564 if(pointNN < 1 || pointNN > O2_MAXNN) {
mas01mc@263 565 error("pointNN out of range: 1 <= pointNN <= 1000000");
mas01cr@105 566 }
mas01cr@105 567 trackNN = args_info.resultlength_arg;
mas01mc@263 568 if(trackNN < 1 || trackNN > O2_MAXNN) {
mas01mc@263 569 error("resultlength out of range: 1 <= resultlength <= 1000000");
mas01cr@105 570 }
mas01cr@105 571 return 0;
mas01cr@105 572 }
mas01mc@334 573
mas01mc@334 574 if(args_info.LISZT_given){
mas01mc@334 575 command = COM_LISZT;
mas01mc@334 576 dbName=args_info.database_arg;
mas01mc@334 577 lisztOffset = args_info.lisztOffset_arg;
mas01mc@334 578 lisztLength = args_info.lisztLength_arg;
mas01mc@334 579 if(args_info.lisztOffset_arg<0) // check upper bound later when database is opened
mas01mc@334 580 error("lisztOffset cannot be negative");
mas01mc@334 581 if(args_info.lisztLength_arg<0)
mas01mc@334 582 error("lisztLength cannot be negative");
mas01mc@334 583 if(lisztLength >1000000)
mas01mc@334 584 error("lisztLength too large (>1000000)");
mas01mc@334 585 return 0;
mas01mc@334 586 }
mas01mc@334 587
mas01cr@105 588 return -1; // no command found
mas01cr@0 589 }
mas01cr@0 590
mas01cr@133 591 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@498 592 adb_status_t status;
mas01cr@498 593 if(!adb) {
mas01cr@498 594 if(!(adb = audiodb_open(dbName, O_RDONLY))) {
mas01cr@498 595 error("Failed to open database file", dbName);
mas01cr@0 596 }
mas01cr@0 597 }
mas01cr@498 598 if(audiodb_status(adb, &status)) {
mas01cr@498 599 error("Failed to retrieve database status", dbName);
mas01cr@498 600 }
mas01cr@76 601
mas01cr@133 602 if(adbStatusResponse == 0) {
mas01cr@498 603 std::cout << "num files:" << status.numFiles << std::endl;
mas01cr@498 604 std::cout << "data dim:" << status.dim <<std::endl;
mas01cr@498 605 if(status.dim > 0) {
mas01cr@498 606 size_t bytes_per_vector = sizeof(double) * status.dim;
mas01cr@498 607 off_t nvectors = status.length / bytes_per_vector;
mas01cr@498 608 off_t data_region_vectors = status.data_region_size / bytes_per_vector;
mas01cr@498 609 std::cout << "total vectors:" << nvectors << std::endl;
mas01cr@498 610 std::cout << "vectors available:";
mas01cr@498 611 if(status.flags & O2_FLAG_LARGE_ADB) {
mas01cr@498 612 std::cout << O2_MAX_VECTORS - nvectors << std::endl;
mas01cr@498 613 } else {
mas01cr@498 614 std::cout << data_region_vectors - nvectors << std::endl;
mas01cr@498 615 }
mas01cr@76 616 }
mas01cr@498 617 if(!(status.flags & O2_FLAG_LARGE_ADB)) {
mas01cr@498 618 double used_frac = ((double) status.length) / status.data_region_size;
mas01cr@498 619 std::cout << "total bytes:" << status.length <<
mas01cr@498 620 " (" << (100.0*used_frac) << "%)" << std::endl;
mas01cr@498 621 std::cout << "bytes available:" << status.data_region_size - status.length <<
mas01cr@498 622 " (" << (100.0*(1-used_frac)) << "%)" << std::endl;
mas01mc@324 623 }
mas01cr@498 624 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(status.flags&O2_FLAG_L2NORM)
mas01cr@498 625 << "] minmax[" << DISPLAY_FLAG(status.flags&O2_FLAG_MINMAX)
mas01cr@498 626 << "] power[" << DISPLAY_FLAG(status.flags&O2_FLAG_POWER)
mas01cr@498 627 << "] times[" << DISPLAY_FLAG(status.flags&O2_FLAG_TIMES)
mas01cr@498 628 << "] largeADB[" << DISPLAY_FLAG(status.flags&O2_FLAG_LARGE_ADB)
mas01mc@324 629 << "]" << endl;
mas01mc@324 630
mas01cr@498 631 std::cout << "null count: " << status.nullCount << " small sequence count " << status.dudCount-status.nullCount << std::endl;
mas01cr@76 632 } else {
mas01cr@498 633 adbStatusResponse->result.numFiles = status.numFiles;
mas01cr@498 634 adbStatusResponse->result.dim = status.dim;
mas01cr@498 635 adbStatusResponse->result.length = status.length;
mas01cr@498 636 adbStatusResponse->result.dudCount = status.dudCount;
mas01cr@498 637 adbStatusResponse->result.nullCount = status.nullCount;
mas01cr@498 638 adbStatusResponse->result.flags = status.flags;
mas01cr@76 639 }
mas01cr@0 640 }
mas01cr@0 641
mas01cr@498 642 void audioDB::l2norm(const char* dbName) {
mas01cr@498 643 if(!adb) {
mas01cr@498 644 if(!(adb = audiodb_open(dbName, O_RDWR))) {
mas01cr@498 645 error("Failed to open database file", dbName);
mas01ik@355 646 }
mas01ik@355 647 }
mas01cr@498 648 if(audiodb_l2norm(adb)) {
mas01cr@498 649 error("failed to turn on l2norm flag for database", dbName);
mas01cr@0 650 }
mas01cr@0 651 }
mas01cr@193 652
mas01cr@193 653 void audioDB::power_flag(const char *dbName) {
mas01cr@498 654 if(!adb) {
mas01cr@498 655 if(!(adb = audiodb_open(dbName, O_RDWR))) {
mas01cr@498 656 error("Failed to open database file", dbName);
mas01cr@498 657 }
mas01cr@193 658 }
mas01cr@498 659 if(audiodb_power(adb)) {
mas01cr@498 660 error("can't turn on power flag for database", dbName);
mas01cr@498 661 }
mas01cr@193 662 }
mas01cr@193 663
mas01cr@498 664 void audioDB::create(const char *dbName) {
mas01cr@498 665 if(adb) {
mas01cr@498 666 error("Already have an adb in this object", "");
mas01cr@498 667 }
mas01cr@498 668 if(!(adb = audiodb_create(dbName, datasize, ntracks, datadim))) {
mas01cr@498 669 error("Failed to create database file", dbName);
mas01cr@498 670 }
mas01cr@498 671 }
mas01cr@0 672
mas01cr@498 673 void audioDB::dump(const char *dbName) {
mas01cr@498 674 if(!adb) {
mas01cr@498 675 if(!(adb = audiodb_open(dbName, O_RDONLY))) {
mas01cr@498 676 error("Failed to open database file", dbName);
mas01cr@498 677 }
mas01cr@498 678 }
mas01cr@498 679 if(audiodb_dump(adb, output)) {
mas01cr@498 680 error("Failed to dump database to ", output);
mas01cr@498 681 }
mas01cr@498 682 status(dbName);
mas01cr@498 683 }
mas01cr@0 684
mas01cr@498 685 void audioDB::insert(const char* dbName, const char* inFile) {
mas01cr@498 686 if(!adb) {
mas01cr@498 687 if(!(adb = audiodb_open(dbName, O_RDWR))) {
mas01cr@498 688 error("failed to open database", dbName);
mas01cr@498 689 }
mas01cr@498 690 }
mas01cr@0 691
mas01cr@498 692 /* at this point, we have powerfd (an fd), timesFile (a
mas01cr@498 693 * std::ifstream *) and inFile (a char *). Wacky, huh? Ignore
mas01cr@498 694 * the wackiness and just use the names. */
mas01cr@498 695 adb_insert_t insert;
mas01cr@498 696 insert.features = inFile;
mas01cr@498 697 insert.times = timesFileName;
mas01cr@498 698 insert.power = powerFileName;
mas01cr@498 699 insert.key = key;
mas01cr@0 700
mas01cr@498 701 if(audiodb_insert(adb, &insert)) {
mas01cr@498 702 error("insertion failure", inFile);
mas01cr@498 703 }
mas01cr@498 704 status(dbName);
mas01cr@498 705 }
mas01cr@0 706
mas01cr@498 707 void audioDB::batchinsert(const char* dbName, const char* inFile) {
mas01cr@498 708 if(!adb) {
mas01cr@498 709 if(!(adb = audiodb_open(dbName, O_RDWR))) {
mas01cr@498 710 error("failed to open database", dbName);
mas01cr@498 711 }
mas01cr@498 712 }
mas01cr@0 713
mas01cr@498 714 if(!key)
mas01cr@498 715 key=inFile;
mas01cr@498 716 std::ifstream *filesIn = 0;
mas01cr@498 717 std::ifstream *keysIn = 0;
mas01cr@498 718
mas01cr@498 719 if(!(filesIn = new std::ifstream(inFile)))
mas01cr@498 720 error("Could not open batch in file", inFile);
mas01cr@498 721 if(key && key!=inFile)
mas01cr@498 722 if(!(keysIn = new std::ifstream(key)))
mas01cr@498 723 error("Could not open batch key file",key);
mas01cr@498 724
mas01cr@498 725 unsigned totalVectors=0;
mas01cr@498 726 char *thisFile = new char[MAXSTR];
mas01cr@498 727 char *thisKey = 0;
mas01cr@498 728 if (key && (key != inFile)) {
mas01cr@498 729 thisKey = new char[MAXSTR];
mas01cr@498 730 }
mas01cr@498 731 char *thisTimesFileName = new char[MAXSTR];
mas01cr@498 732 char *thisPowerFileName = new char[MAXSTR];
mas01cr@498 733
mas01cr@498 734 do {
mas01cr@498 735 filesIn->getline(thisFile,MAXSTR);
mas01cr@498 736 if(key && key!=inFile) {
mas01cr@498 737 keysIn->getline(thisKey,MAXSTR);
mas01cr@498 738 } else {
mas01cr@498 739 thisKey = thisFile;
mas01cr@0 740 }
mas01cr@498 741 if(usingTimes) {
mas01cr@498 742 timesFile->getline(thisTimesFileName,MAXSTR);
mas01cr@498 743 }
mas01cr@498 744 if(usingPower) {
mas01cr@498 745 powerFile->getline(thisPowerFileName, MAXSTR);
mas01cr@498 746 }
mas01cr@498 747
mas01cr@498 748 if(filesIn->eof()) {
mas01cr@498 749 break;
mas01cr@498 750 }
mas01cr@498 751 if(usingTimes){
mas01cr@498 752 if(timesFile->eof()) {
mas01cr@498 753 error("not enough timestamp files in timesList", timesFileName);
mas01cr@498 754 }
mas01cr@498 755 }
mas01cr@498 756 if (usingPower) {
mas01cr@498 757 if(powerFile->eof()) {
mas01cr@498 758 error("not enough power files in powerList", powerFileName);
mas01cr@498 759 }
mas01cr@498 760 }
mas01cr@498 761 adb_insert_t insert;
mas01cr@498 762 insert.features = thisFile;
mas01cr@498 763 insert.times = usingTimes ? thisTimesFileName : NULL;
mas01cr@498 764 insert.power = usingPower ? thisPowerFileName : NULL;
mas01cr@498 765 insert.key = thisKey;
mas01cr@498 766 if(audiodb_insert(adb, &insert)) {
mas01cr@498 767 error("insertion failure", thisFile);
mas01cr@498 768 }
mas01cr@498 769 } while(!filesIn->eof());
mas01cr@498 770
mas01mc@537 771 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * adb->header->dim * sizeof(double)));
mas01cr@498 772
mas01cr@498 773 delete [] thisPowerFileName;
mas01cr@498 774 if(key && (key != inFile)) {
mas01cr@498 775 delete [] thisKey;
mas01cr@0 776 }
mas01cr@498 777 delete [] thisFile;
mas01cr@498 778 delete [] thisTimesFileName;
mas01cr@498 779
mas01cr@498 780 delete filesIn;
mas01cr@498 781 delete keysIn;
mas01cr@498 782
mas01cr@498 783 // Report status
mas01cr@498 784 status(dbName);
mas01cr@498 785 }
mas01cr@498 786
mas01cr@697 787 void audioDB::datumFromFiles(adb_datum_t *datum) {
mas01cr@697 788 int fd;
mas01cr@697 789 struct stat st;
mas01cr@697 790
mas01cr@697 791 /* FIXME: around here error conditions will cause all sorts of
mas01cr@697 792 hideous leaks. */
mas01cr@697 793 fd = open(inFile, O_RDONLY);
mas01cr@697 794 if(fd < 0) {
mas01cr@697 795 error("failed to open feature file", inFile);
mas01cr@697 796 }
mas01cr@697 797 fstat(fd, &st);
mas01cr@697 798 read(fd, &(datum->dim), sizeof(uint32_t));
mas01cr@697 799 datum->nvectors = (st.st_size - sizeof(uint32_t)) / (datum->dim * sizeof(double));
mas01cr@697 800 datum->data = (double *) malloc(st.st_size - sizeof(uint32_t));
mas01cr@697 801 read(fd, datum->data, st.st_size - sizeof(uint32_t));
mas01cr@697 802 close(fd);
mas01cr@697 803 if(usingPower) {
mas01cr@697 804 uint32_t one;
mas01cr@697 805 fd = open(powerFileName, O_RDONLY);
mas01cr@697 806 if(fd < 0) {
mas01cr@697 807 error("failed to open power file", powerFileName);
mas01cr@697 808 }
mas01cr@697 809 read(fd, &one, sizeof(uint32_t));
mas01cr@697 810 if(one != 1) {
mas01cr@697 811 error("malformed power file dimensionality", powerFileName);
mas01cr@697 812 }
mas01cr@697 813 datum->power = (double *) malloc(datum->nvectors * sizeof(double));
mas01cr@697 814 if(read(fd, datum->power, datum->nvectors * sizeof(double)) != (ssize_t) (datum->nvectors * sizeof(double))) {
mas01cr@697 815 error("malformed power file", powerFileName);
mas01cr@697 816 }
mas01cr@697 817 close(fd);
mas01cr@697 818 }
mas01cr@697 819 if(usingTimes) {
mas01cr@697 820 datum->times = (double *) malloc(2 * datum->nvectors * sizeof(double));
mas01cr@697 821 insertTimeStamps(datum->nvectors, timesFile, datum->times);
mas01cr@697 822 }
mas01cr@697 823 }
mas01cr@697 824
mas01cr@508 825 void audioDB::query(const char* dbName, const char* inFile, struct soap *soap, adb__queryResponse *adbQueryResponse) {
mas01cr@498 826
mas01cr@498 827 if(!adb) {
mas01cr@515 828 if(!(adb = audiodb_open(dbName, O_RDONLY))) {
mas01cr@498 829 error("failed to open database", dbName);
mas01cr@498 830 }
mas01cr@498 831 }
mas01cr@498 832
mas01cr@498 833 /* FIXME: we only need this for getting nfiles, which we only need
mas01cr@498 834 * because the reporters aren't desperately well implemented,
mas01cr@498 835 * relying on statically-sized vectors rather than adjustable data
mas01cr@498 836 * structures. Rework reporter.h to be less lame. */
mas01cr@498 837 adb_status_t status;
mas01cr@498 838 audiodb_status(adb, &status);
mas01cr@498 839 uint32_t nfiles = status.numFiles;
mas01cr@498 840
mas01cr@498 841 adb_query_spec_t qspec;
mas01cr@498 842 adb_datum_t datum = {0};
mas01cr@498 843
mas01cr@498 844 qspec.refine.flags = 0;
mas01cr@498 845 if(trackFile) {
mas01cr@498 846 qspec.refine.flags |= ADB_REFINE_INCLUDE_KEYLIST;
mas01cr@498 847 std::vector<const char *> v;
mas01cr@498 848 char *k = new char[MAXSTR];
mas01cr@498 849 trackFile->getline(k, MAXSTR);
mas01cr@498 850 while(!trackFile->eof()) {
mas01cr@498 851 v.push_back(k);
mas01cr@498 852 k = new char[MAXSTR];
mas01cr@498 853 trackFile->getline(k, MAXSTR);
mas01cr@498 854 }
mas01cr@498 855 delete [] k;
mas01cr@498 856 qspec.refine.include.nkeys = v.size();
mas01cr@498 857 qspec.refine.include.keys = new const char *[qspec.refine.include.nkeys];
mas01cr@498 858 for(unsigned int k = 0; k < qspec.refine.include.nkeys; k++) {
mas01cr@498 859 qspec.refine.include.keys[k] = v[k];
mas01cr@498 860 }
mas01cr@498 861 }
mas01cr@498 862 if(query_from_key) {
mas01cr@498 863 qspec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST;
mas01cr@498 864 qspec.refine.exclude.nkeys = 1;
mas01cr@498 865 qspec.refine.exclude.keys = &key;
mas01cr@498 866 }
mas01cr@498 867 if(radius) {
mas01cr@498 868 qspec.refine.flags |= ADB_REFINE_RADIUS;
mas01cr@498 869 qspec.refine.radius = radius;
mas01cr@498 870 }
mas01cr@498 871 if(use_absolute_threshold) {
mas01cr@498 872 qspec.refine.flags |= ADB_REFINE_ABSOLUTE_THRESHOLD;
mas01cr@498 873 qspec.refine.absolute_threshold = absolute_threshold;
mas01cr@498 874 }
mas01cr@498 875 if(use_relative_threshold) {
mas01cr@498 876 qspec.refine.flags |= ADB_REFINE_RELATIVE_THRESHOLD;
mas01cr@498 877 qspec.refine.relative_threshold = relative_threshold;
mas01cr@498 878 }
mas01cr@498 879 if(usingTimes) {
mas01cr@498 880 qspec.refine.flags |= ADB_REFINE_DURATION_RATIO;
mas01cr@498 881 qspec.refine.duration_ratio = timesTol;
mas01cr@498 882 }
mas01cr@675 883
mas01cr@675 884 qspec.refine.qhopsize = sequenceHop;
mas01cr@675 885 qspec.refine.ihopsize = sequenceHop;
mas01cr@498 886 if(sequenceHop != 1) {
mas01cr@498 887 qspec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@498 888 }
mas01cr@498 889
mas01cr@498 890 if(query_from_key) {
mas01cr@498 891 datum.key = key;
mas01cr@84 892 } else {
mas01cr@697 893 datumFromFiles(&datum);
mas01cr@84 894 }
mas01cr@498 895
mas01cr@498 896 qspec.qid.datum = &datum;
mas01cr@498 897 qspec.qid.sequence_length = sequenceLength;
mas01cr@498 898 qspec.qid.flags = 0;
mas01cr@498 899 qspec.qid.flags |= usingQueryPoint ? 0 : ADB_QID_FLAG_EXHAUSTIVE;
mas01cr@498 900 qspec.qid.flags |= lsh_exact ? 0 : ADB_QID_FLAG_ALLOW_FALSE_POSITIVES;
mas01cr@498 901 qspec.qid.sequence_start = queryPoint;
mas01cr@498 902
mas01cr@498 903 switch(queryType) {
mas01cr@498 904 case O2_POINT_QUERY:
mas01cr@498 905 qspec.qid.sequence_length = 1;
mas01cr@498 906 qspec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@498 907 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@498 908 qspec.params.npoints = pointNN;
mas01cr@498 909 qspec.params.ntracks = 0;
mas01cr@498 910 reporter = new pointQueryReporter< std::greater < NNresult > >(pointNN);
mas01cr@498 911 break;
mas01cr@498 912 case O2_TRACK_QUERY:
mas01cr@498 913 qspec.qid.sequence_length = 1;
mas01cr@498 914 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01cr@498 915 qspec.params.distance = ADB_DISTANCE_DOT_PRODUCT;
mas01cr@498 916 qspec.params.npoints = pointNN;
mas01cr@498 917 qspec.params.ntracks = trackNN;
mas01cr@498 918 reporter = new trackAveragingReporter< std::greater< NNresult > >(pointNN, trackNN, nfiles);
mas01cr@498 919 break;
mas01cr@498 920 case O2_SEQUENCE_QUERY:
mas01cr@498 921 case O2_N_SEQUENCE_QUERY:
mas01cr@498 922 qspec.params.accumulation = ADB_ACCUMULATION_PER_TRACK;
mas01mc@768 923 if (distance_kullback)
mas01mc@768 924 qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE;
mas01mc@768 925 else
mas01mc@768 926 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@498 927 qspec.params.npoints = pointNN;
mas01cr@498 928 qspec.params.ntracks = trackNN;
mas01cr@498 929 switch(queryType) {
mas01cr@498 930 case O2_SEQUENCE_QUERY:
mas01cr@498 931 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@498 932 reporter = new trackAveragingReporter< std::less< NNresult > >(pointNN, trackNN, nfiles);
mas01cr@498 933 } else {
mas01cr@498 934 reporter = new trackSequenceQueryRadReporter(trackNN, nfiles);
mas01cr@498 935 }
mas01cr@498 936 break;
mas01cr@498 937 case O2_N_SEQUENCE_QUERY:
mas01cr@498 938 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@498 939 reporter = new trackSequenceQueryNNReporter< std::less < NNresult > >(pointNN, trackNN, nfiles);
mas01cr@498 940 } else {
mas01cr@498 941 reporter = new trackSequenceQueryRadNNReporter(pointNN, trackNN, nfiles);
mas01cr@498 942 }
mas01cr@498 943 break;
mas01cr@498 944 }
mas01cr@498 945 break;
mas01cr@498 946 case O2_ONE_TO_ONE_N_SEQUENCE_QUERY:
mas01cr@498 947 qspec.params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
mas01mc@768 948 if (distance_kullback)
mas01mc@768 949 qspec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE;
mas01mc@768 950 else
mas01mc@768 951 qspec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@498 952 qspec.params.npoints = 0;
mas01cr@498 953 qspec.params.ntracks = 0;
mas01cr@498 954 if(!(qspec.refine.flags & ADB_REFINE_RADIUS)) {
mas01cr@498 955 error("query-type not yet supported");
mas01cr@498 956 } else {
mas01mc@537 957 reporter = new trackSequenceQueryRadNNReporterOneToOne(pointNN,trackNN, adb->header->numFiles);
mas01cr@498 958 }
mas01cr@498 959 break;
mas01cr@498 960 default:
mas01cr@498 961 error("unrecognized queryType");
mas01cr@498 962 }
mas01cr@498 963
mas01cr@498 964 adb_query_results_t *rs = audiodb_query_spec(adb, &qspec);
mas01cr@498 965
mas01cr@507 966 // FIXME: we don't yet free everything up if there are error
mas01cr@507 967 // conditions during the construction of the query spec (including
mas01cr@507 968 // the datum itself).
mas01cr@507 969 if(datum.data) {
mas01cr@507 970 free(datum.data);
mas01cr@507 971 datum.data = NULL;
mas01cr@507 972 }
mas01cr@507 973 if(datum.power) {
mas01cr@577 974 free(datum.power);
mas01cr@577 975 datum.power = NULL;
mas01cr@507 976 }
mas01cr@507 977 if(datum.times) {
mas01cr@577 978 free(datum.times);
mas01cr@577 979 datum.times = NULL;
mas01cr@507 980 }
mas01cr@498 981
mas01cr@498 982 if(rs == NULL) {
mas01cr@498 983 error("audiodb_query_spec failed");
mas01cr@498 984 }
mas01cr@498 985
mas01cr@498 986 for(unsigned int k = 0; k < rs->nresults; k++) {
mas01cr@498 987 adb_result_t r = rs->results[k];
mas01cr@672 988 reporter->add_point(audiodb_key_index(adb, r.ikey), r.qpos, r.ipos, r.dist);
mas01cr@498 989 }
mas01cr@498 990 audiodb_query_free_results(adb, &qspec, rs);
mas01cr@498 991
mas01cr@508 992 reporter->report(adb, soap, adbQueryResponse);
mas01cr@193 993 }
mas01cr@193 994
mas01cr@548 995 void audioDB::liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap, adb__lisztResponse* adbLisztResponse) {
mas01cr@548 996 if(!adb) {
mas01cr@548 997 if(!(adb = audiodb_open(dbName, O_RDONLY))) {
mas01cr@548 998 error("failed to open database", dbName);
mas01cr@548 999 }
mas01cr@548 1000 }
mas01cr@548 1001
mas01cr@548 1002 adb_liszt_results_t *results = audiodb_liszt(adb);
mas01cr@548 1003 if(!results) {
mas01cr@548 1004 error("audiodb_liszt() failed");
mas01cr@548 1005 }
mas01cr@548 1006
mas01cr@548 1007 if(offset > results->nresults) {
mas01cr@548 1008 audiodb_liszt_free_results(adb, results);
mas01cr@548 1009 error("listKeys offset out of range");
mas01cr@548 1010 }
mas01cr@548 1011
mas01cr@548 1012 if(!adbLisztResponse){
mas01cr@548 1013 for(uint32_t k = 0; k < numLines && offset + k < results->nresults; k++) {
mas01cr@548 1014 uint32_t index = offset + k;
mas01cr@548 1015 printf("[%d] %s (%d)\n", index, results->entries[index].key, results->entries[index].nvectors);
mas01cr@548 1016 }
mas01cr@548 1017 } else {
mas01cr@548 1018 adbLisztResponse->result.Rkey = (char **) soap_malloc(soap, numLines * sizeof(char *));
mas01cr@548 1019 adbLisztResponse->result.Rlen = (unsigned int *) soap_malloc(soap, numLines * sizeof(unsigned int));
mas01cr@548 1020 uint32_t k;
mas01cr@548 1021 for(k = 0; k < numLines && offset + k < results->nresults; k++) {
mas01cr@548 1022 uint32_t index = offset + k;
mas01cr@548 1023 adbLisztResponse->result.Rkey[k] = (char *) soap_malloc(soap, O2_MAXFILESTR);
mas01cr@548 1024 snprintf(adbLisztResponse->result.Rkey[k], O2_MAXFILESTR, "%s", results->entries[index].key);
mas01cr@548 1025 adbLisztResponse->result.Rlen[k] = results->entries[index].nvectors;
mas01cr@548 1026 }
mas01cr@548 1027 adbLisztResponse->result.__sizeRkey = k;
mas01cr@548 1028 adbLisztResponse->result.__sizeRlen = k;
mas01cr@548 1029 }
mas01cr@548 1030 audiodb_liszt_free_results(adb, results);
mas01cr@548 1031 }
mas01cr@548 1032
mas01cr@693 1033 static
mas01cr@693 1034 double yfun(double d) {
mas01cr@693 1035 return gsl_sf_log(d) - gsl_sf_psi(d);
mas01cr@693 1036 }
mas01cr@693 1037
mas01cr@693 1038 static
mas01cr@693 1039 double yinv(double y) {
mas01cr@693 1040 double a = 1.0e-5;
mas01cr@693 1041 double b = 1000.0;
mas01cr@693 1042
mas01cr@693 1043 double ay = yfun(a);
mas01cr@693 1044 double by = yfun(b);
mas01cr@693 1045
mas01cr@693 1046 double c = 0;
mas01cr@693 1047 double cy;
mas01cr@693 1048
mas01cr@693 1049 /* FIXME: simple binary search; there's probably some clever solver
mas01cr@693 1050 in gsl somewhere which is less sucky. */
mas01cr@693 1051 while ((b - a) > 1.0e-5) {
mas01cr@693 1052 c = (a + b) / 2;
mas01cr@693 1053 cy = yfun(c);
mas01cr@693 1054 if (cy > y) {
mas01cr@693 1055 a = c;
mas01cr@693 1056 ay = cy;
mas01cr@693 1057 } else {
mas01cr@693 1058 b = c;
mas01cr@693 1059 by = cy;
mas01cr@693 1060 }
mas01cr@693 1061 }
mas01cr@693 1062
mas01cr@693 1063 return c;
mas01cr@693 1064 }
mas01cr@693 1065
mas01cr@693 1066 void audioDB::sample(const char *dbName) {
mas01cr@693 1067 if(!adb) {
mas01cr@693 1068 if(!(adb = audiodb_open(dbName, O_RDONLY))) {
mas01cr@693 1069 error("failed to open database", dbName);
mas01cr@693 1070 }
mas01cr@693 1071 }
mas01cr@693 1072
mas01cr@693 1073 adb_status_t status;
mas01cr@693 1074 if(audiodb_status(adb, &status)) {
mas01cr@693 1075 error("error getting status");
mas01cr@693 1076 }
mas01cr@693 1077
mas01cr@693 1078 double sumdist = 0;
mas01cr@693 1079 double sumlogdist = 0;
mas01cr@693 1080
mas01cr@693 1081 adb_query_results_t *results;
mas01cr@693 1082 adb_query_spec_t spec = {{0},{0},{0}};
mas01cr@693 1083 adb_datum_t datum = {0};
mas01cr@693 1084
mas01cr@693 1085 spec.refine.qhopsize = sequenceHop;
mas01cr@693 1086 spec.refine.ihopsize = sequenceHop;
mas01cr@693 1087 if(sequenceHop != 1) {
mas01cr@693 1088 spec.refine.flags |= ADB_REFINE_HOP_SIZE;
mas01cr@693 1089 }
mas01cr@693 1090
mas01cr@693 1091 if(query_from_key) {
mas01cr@693 1092 datum.key = key;
mas01cr@693 1093 spec.qid.datum = &datum;
mas01cr@693 1094 spec.refine.flags |= ADB_REFINE_EXCLUDE_KEYLIST;
mas01cr@693 1095 spec.refine.exclude.nkeys = 1;
mas01cr@693 1096 spec.refine.exclude.keys = &key;
mas01cr@693 1097 } else if(inFile) {
mas01cr@697 1098 datumFromFiles(&datum);
mas01cr@697 1099 spec.qid.datum = &datum;
mas01cr@693 1100 } else {
mas01cr@693 1101 spec.qid.datum = NULL; /* full db sample */
mas01cr@693 1102 }
mas01cr@693 1103 spec.qid.sequence_length = sequenceLength;
mas01cr@693 1104 spec.qid.flags |= usingQueryPoint ? 0 : ADB_QID_FLAG_EXHAUSTIVE;
mas01cr@693 1105 spec.qid.sequence_start = queryPoint;
mas01mc@768 1106 if (distance_kullback)
mas01mc@768 1107 spec.params.distance = ADB_DISTANCE_KULLBACK_LEIBLER_DIVERGENCE;
mas01mc@768 1108 else
mas01mc@768 1109 spec.params.distance = no_unit_norming ? ADB_DISTANCE_EUCLIDEAN : ADB_DISTANCE_EUCLIDEAN_NORMED;
mas01cr@693 1110 spec.params.accumulation = ADB_ACCUMULATION_DB;
mas01cr@693 1111 spec.params.npoints = nsamples;
mas01cr@693 1112
mas01cr@693 1113 if(!(results = audiodb_sample_spec(adb, &spec))) {
mas01cr@693 1114 error("error in audiodb_sample_spec");
mas01cr@693 1115 }
mas01cr@693 1116
mas01cr@697 1117 if(datum.data) {
mas01cr@697 1118 free(datum.data);
mas01cr@697 1119 datum.data = NULL;
mas01cr@697 1120 }
mas01cr@697 1121 if(datum.power) {
mas01cr@697 1122 free(datum.power);
mas01cr@697 1123 datum.power = NULL;
mas01cr@697 1124 }
mas01cr@697 1125 if(datum.times) {
mas01cr@697 1126 free(datum.times);
mas01cr@697 1127 datum.times = NULL;
mas01cr@697 1128 }
mas01cr@697 1129
mas01cr@693 1130 if(results->nresults != nsamples) {
mas01cr@693 1131 error("mismatch in sample count");
mas01cr@693 1132 }
mas01cr@693 1133
mas01cr@693 1134 for(uint32_t i = 0; i < nsamples; i++) {
mas01cr@693 1135 double d = results->results[i].dist;
mas01cr@693 1136 sumdist += d;
mas01cr@693 1137 sumlogdist += log(d);
mas01cr@693 1138 }
mas01cr@693 1139
mas01cr@693 1140 audiodb_query_free_results(adb, &spec, results);
mas01cr@693 1141
mas01cr@693 1142 unsigned total = 0;
mas01cr@693 1143 unsigned count = 0;
mas01cr@693 1144 adb_liszt_results_t *liszt;
mas01cr@693 1145 if(!(liszt = audiodb_liszt(adb))) {
mas01cr@693 1146 error("liszt failed");
mas01cr@693 1147 }
mas01cr@693 1148 for(uint32_t i = 0; i < liszt->nresults; i++) {
mas01cr@767 1149 int prop = (liszt->entries[i].nvectors - sequenceLength)/sequenceHop + 1;
mas01cr@693 1150 prop = prop > 0 ? prop : 0;
mas01cr@693 1151 if (prop > 0) {
mas01cr@693 1152 count++;
mas01cr@693 1153 }
mas01cr@693 1154 total += prop;
mas01cr@693 1155 }
mas01cr@695 1156 audiodb_liszt_free_results(adb, liszt);
mas01cr@693 1157
mas01cr@693 1158 /* FIXME: the mean isn't really what we should be using here; it's
mas01cr@693 1159 more a question of "how many independent sequences of length
mas01cr@693 1160 sequenceLength are there in the database? */
mas01cr@693 1161 unsigned meanN = total / count;
mas01cr@693 1162
mas01cr@693 1163 double sigma2 = sumdist / (sequenceLength * status.dim * nsamples);
mas01cr@693 1164 double d = 2 * yinv(log(sumdist/nsamples) - sumlogdist/nsamples);
mas01cr@693 1165
mas01cr@693 1166 std::cout << "Summary statistics" << std::endl;
mas01cr@693 1167 std::cout << "number of samples: " << nsamples << std::endl;
mas01cr@693 1168 std::cout << "sum of distances (S): " << sumdist << std::endl;
mas01cr@693 1169 std::cout << "sum of log distances (L): " << sumlogdist << std::endl;
mas01cr@693 1170
mas01cr@693 1171 /* FIXME: we'll also want some more summary statistics based on
mas01cr@693 1172 propTable, for the minimum-of-X estimate */
mas01cr@693 1173 std::cout << "mean number of applicable sequences (N): " << meanN << std::endl;
mas01cr@693 1174 std::cout << std::endl;
mas01cr@693 1175 std::cout << "Estimated parameters" << std::endl;
mas01cr@693 1176 std::cout << "sigma^2: " << sigma2 << "; ";
mas01cr@693 1177 std::cout << "Msigma^2: " << sumdist / nsamples << std::endl;
mas01cr@693 1178 std::cout << "d: " << d << std::endl;
mas01cr@693 1179
mas01cr@693 1180 double logw = (2 / d) * gsl_sf_log(-gsl_sf_log(0.99));
mas01cr@693 1181 double logxthresh = gsl_sf_log(sumdist / nsamples) + logw
mas01cr@693 1182 - (2 / d) * gsl_sf_log(meanN)
mas01cr@693 1183 - gsl_sf_log(d/2)
mas01cr@693 1184 - (2 / d) * gsl_sf_log(2 / d)
mas01cr@693 1185 + (2 / d) * gsl_sf_lngamma(d / 2);
mas01cr@693 1186
mas01cr@693 1187 std::cout << "track xthresh: " << exp(logxthresh) << std::endl;
mas01cr@693 1188 }
mas01cr@693 1189
mas01cr@693 1190
mas01mc@308 1191 // This entry point is visited once per instance
mas01mc@308 1192 // so it is a good place to set any global state variables
mas01cr@370 1193 int main(const int argc, const char* argv[]){
mas01mc@324 1194 SERVER_ADB_ROOT = 0; // Server-side database root prefix
mas01mc@324 1195 SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix
mas01cr@0 1196 audioDB(argc, argv);
mas01cr@0 1197 }