annotate audioDB.cpp @ 328:7ff56cce3297

Added WS support for Radius search by featureFileName via adb__sequenceQueryByKey(). Added relative path support for non-large ADB.
author mas01mc
date Sat, 30 Aug 2008 13:41:50 +0000
parents c93be2f3a674
children 8f11ea4d9cd2
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01mc@308 3 LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@324 4 char* SERVER_ADB_ROOT;
mas01mc@324 5 char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 6
mas01mc@292 7 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
mas01mc@292 8
mas01mc@292 9 bool operator<(const PointPair& a, const PointPair& b){
mas01mc@324 10 return ( (a.trackID<b.trackID) ||
mas01mc@324 11 ( (a.trackID==b.trackID) &&
mas01mc@324 12 ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) );
mas01mc@292 13 }
mas01mc@292 14
mas01mc@292 15 bool operator>(const PointPair& a, const PointPair& b){
mas01mc@324 16 return ( (a.trackID>b.trackID) ||
mas01mc@324 17 ( (a.trackID==b.trackID) &&
mas01mc@324 18 ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) );
mas01mc@292 19 }
mas01mc@292 20
mas01mc@292 21 bool operator==(const PointPair& a, const PointPair& b){
mas01mc@292 22 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) );
mas01mc@292 23 }
mas01mc@292 24
mas01cr@76 25 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 26 {
mas01cr@0 27 if(processArgs(argc, argv)<0){
mas01cr@0 28 printf("No command found.\n");
mas01cr@0 29 cmdline_parser_print_version ();
mas01cr@0 30 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 31 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 32 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 33 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 34 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 35 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 36 error("No command found");
mas01cr@0 37 }
mas01cr@77 38
mas01mc@324 39 // Perform database prefix substitution
mas01mc@328 40 if(dbName && adb_root)
mas01mc@324 41 prefix_name((char** const)&dbName, adb_root);
mas01mc@324 42
mas01mc@328 43 if(inFile && adb_feature_root)
mas01mc@328 44 prefix_name((char** const)&inFile, adb_feature_root);
mas01mc@328 45
mas01cr@0 46 if(O2_ACTION(COM_SERVER))
mas01cr@0 47 startServer();
mas01cr@0 48
mas01cr@0 49 else if(O2_ACTION(COM_CREATE))
mas01cr@0 50 create(dbName);
mas01cr@0 51
mas01cr@0 52 else if(O2_ACTION(COM_INSERT))
mas01cr@0 53 insert(dbName, inFile);
mas01cr@0 54
mas01cr@0 55 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 56 batchinsert(dbName, inFile);
mas01cr@0 57
mas01cr@0 58 else if(O2_ACTION(COM_QUERY))
mas01mc@307 59 if(isClient){
mas01mc@307 60 if(query_from_key)
mas01mc@328 61 ws_query_by_key(dbName, key, inFile, (char*)hostport);
mas01mc@307 62 else
mas01mc@307 63 ws_query(dbName, inFile, (char*)hostport);
mas01mc@307 64 }
mas01cr@0 65 else
mas01cr@76 66 query(dbName, inFile);
mas01cr@0 67
mas01cr@0 68 else if(O2_ACTION(COM_STATUS))
mas01cr@0 69 if(isClient)
mas01cr@0 70 ws_status(dbName,(char*)hostport);
mas01cr@0 71 else
mas01cr@0 72 status(dbName);
mas01cr@280 73
mas01cr@280 74 else if(O2_ACTION(COM_SAMPLE))
mas01cr@280 75 sample(dbName);
mas01cr@0 76
mas01cr@0 77 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 78 l2norm(dbName);
mas01cr@0 79
mas01cr@193 80 else if(O2_ACTION(COM_POWER))
mas01cr@193 81 power_flag(dbName);
mas01cr@193 82
mas01cr@0 83 else if(O2_ACTION(COM_DUMP))
mas01cr@0 84 dump(dbName);
mas01mc@292 85
mas01mc@292 86 else if(O2_ACTION(COM_INDEX))
mas01mc@292 87 index_index_db(dbName);
mas01cr@0 88
mas01cr@0 89 else
mas01cr@0 90 error("Unrecognized command",command);
mas01cr@0 91 }
mas01cr@0 92
mas01cr@133 93 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 94 {
mas01cr@97 95 try {
mas01cr@151 96 isServer = 1; // FIXME: Hack
mas01cr@97 97 processArgs(argc, argv);
mas01mc@324 98 // Perform database prefix substitution
mas01mc@328 99 if(dbName && adb_root)
mas01mc@324 100 prefix_name((char** const)&dbName, adb_root);
mas01mc@328 101 if(inFile && adb_feature_root)
mas01mc@328 102 prefix_name((char** const)&inFile, adb_feature_root);
mas01cr@97 103 assert(O2_ACTION(COM_QUERY));
mas01cr@133 104 query(dbName, inFile, adbQueryResponse);
mas01cr@97 105 } catch(char *err) {
mas01cr@97 106 cleanup();
mas01cr@97 107 throw(err);
mas01cr@97 108 }
mas01cr@76 109 }
mas01cr@76 110
mas01cr@133 111 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 112 {
mas01cr@97 113 try {
mas01cr@151 114 isServer = 1; // FIXME: Hack
mas01cr@97 115 processArgs(argc, argv);
mas01mc@324 116 // Perform database prefix substitution
mas01mc@328 117 if(dbName && adb_root)
mas01mc@324 118 prefix_name((char** const)&dbName, adb_root);
mas01mc@328 119 if(inFile && adb_feature_root)
mas01mc@328 120 prefix_name((char** const)&inFile, adb_feature_root);
mas01cr@97 121 assert(O2_ACTION(COM_STATUS));
mas01cr@133 122 status(dbName, adbStatusResponse);
mas01cr@97 123 } catch(char *err) {
mas01cr@97 124 cleanup();
mas01cr@97 125 throw(err);
mas01cr@97 126 }
mas01cr@76 127 }
mas01cr@76 128
mas01cr@97 129 void audioDB::cleanup() {
mas01cr@122 130 cmdline_parser_free(&args_info);
mas01cr@0 131 if(indata)
mas01cr@0 132 munmap(indata,statbuf.st_size);
mas01cr@0 133 if(db)
mas01cr@196 134 munmap(db,getpagesize());
mas01cr@196 135 if(fileTable)
mas01cr@196 136 munmap(fileTable, fileTableLength);
mas01cr@196 137 if(trackTable)
mas01cr@196 138 munmap(trackTable, trackTableLength);
mas01cr@196 139 if(dataBuf)
mas01cr@196 140 munmap(dataBuf, dataBufLength);
mas01cr@196 141 if(timesTable)
mas01cr@196 142 munmap(timesTable, timesTableLength);
mas01mc@314 143 if(powerTable)
mas01mc@314 144 munmap(powerTable, powerTableLength);
mas01cr@196 145 if(l2normTable)
mas01cr@196 146 munmap(l2normTable, l2normTableLength);
mas01mc@324 147 if(featureFileNameTable)
mas01mc@324 148 munmap(featureFileNameTable, fileTableLength);
mas01mc@324 149 if(timesFileNameTable)
mas01mc@324 150 munmap(timesFileNameTable, fileTableLength);
mas01mc@324 151 if(powerFileNameTable)
mas01mc@324 152 munmap(powerFileNameTable, fileTableLength);
mas01mc@292 153 if(trackOffsetTable)
mas01mc@292 154 delete trackOffsetTable;
mas01mc@292 155 if(reporter)
mas01mc@292 156 delete reporter;
mas01mc@292 157 if(exact_evaluation_queue)
mas01mc@292 158 delete exact_evaluation_queue;
mas01cr@284 159 if(rng)
mas01cr@284 160 gsl_rng_free(rng);
mas01mc@292 161 if(vv)
mas01mc@292 162 delete vv;
mas01cr@0 163 if(dbfid>0)
mas01cr@0 164 close(dbfid);
mas01cr@0 165 if(infid>0)
mas01cr@0 166 close(infid);
mas01cr@0 167 if(dbH)
mas01cr@0 168 delete dbH;
mas01mc@308 169 if(lsh!=SERVER_LSH_INDEX_SINGLETON)
mas01mc@308 170 delete lsh;
mas01cr@0 171 }
mas01cr@0 172
mas01cr@97 173 audioDB::~audioDB(){
mas01cr@97 174 cleanup();
mas01cr@97 175 }
mas01cr@97 176
mas01cr@0 177 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 178
mas01cr@0 179 if(argc<2){
mas01cr@0 180 cmdline_parser_print_version ();
mas01cr@0 181 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 182 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 183 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 184 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 185 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 186 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 187 exit(0);
mas01cr@0 188 }
mas01cr@0 189
mas01cr@0 190 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@151 191 error("Error parsing command line");
mas01cr@0 192
mas01cr@0 193 if(args_info.help_given){
mas01cr@0 194 cmdline_parser_print_help();
mas01cr@0 195 exit(0);
mas01cr@0 196 }
mas01cr@0 197
mas01cr@0 198 if(args_info.verbosity_given){
mas01cr@239 199 verbosity = args_info.verbosity_arg;
mas01cr@239 200 if(verbosity < 0 || verbosity > 10){
mas01cr@239 201 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
mas01cr@239 202 verbosity = 1;
mas01cr@0 203 }
mas01cr@0 204 }
mas01cr@0 205
mas01cr@129 206 if(args_info.size_given) {
mas01cr@256 207 if(args_info.datasize_given) {
mas01cr@256 208 error("both --size and --datasize given", "");
mas01cr@256 209 }
mas01cr@256 210 if(args_info.ntracks_given) {
mas01cr@256 211 error("both --size and --ntracks given", "");
mas01cr@256 212 }
mas01cr@256 213 if(args_info.datadim_given) {
mas01cr@256 214 error("both --size and --datadim given", "");
mas01cr@256 215 }
mas01cr@196 216 if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
mas01cr@129 217 error("Size out of range", "");
mas01cr@129 218 }
mas01cr@256 219 double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
mas01cr@256 220 /* FIXME: what's the safe way of doing this? */
mas01cr@256 221 datasize = (unsigned int) ceil(datasize * ratio);
mas01cr@256 222 ntracks = (unsigned int) ceil(ntracks * ratio);
mas01cr@256 223 } else {
mas01cr@256 224 if(args_info.datasize_given) {
mas01cr@256 225 datasize = args_info.datasize_arg;
mas01cr@256 226 }
mas01cr@256 227 if(args_info.ntracks_given) {
mas01cr@256 228 ntracks = args_info.ntracks_arg;
mas01cr@256 229 }
mas01cr@256 230 if(args_info.datadim_given) {
mas01cr@256 231 datadim = args_info.datadim_arg;
mas01cr@256 232 }
mas01cr@129 233 }
mas01cr@129 234
mas01cr@239 235 if(args_info.radius_given) {
mas01cr@239 236 radius = args_info.radius_arg;
mas01mc@307 237 if(radius < 0 || radius > 1000000000) {
mas01cr@77 238 error("radius out of range");
mas01cr@239 239 } else {
mas01cr@239 240 VERB_LOG(3, "Setting radius to %f\n", radius);
mas01mc@17 241 }
mas01mc@17 242 }
mas01mc@17 243
mas01mc@292 244 sequenceLength = args_info.sequencelength_arg;
mas01mc@292 245 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01mc@292 246 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01mc@292 247 }
mas01mc@292 248 sequenceHop = args_info.sequencehop_arg;
mas01mc@292 249 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01mc@292 250 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01mc@292 251 }
mas01mc@292 252
mas01mc@292 253 if (args_info.absolute_threshold_given) {
mas01mc@292 254 if (args_info.absolute_threshold_arg >= 0) {
mas01mc@292 255 error("absolute threshold out of range: should be negative");
mas01mc@292 256 }
mas01mc@292 257 use_absolute_threshold = true;
mas01mc@292 258 absolute_threshold = args_info.absolute_threshold_arg;
mas01mc@292 259 }
mas01mc@292 260 if (args_info.relative_threshold_given) {
mas01mc@292 261 use_relative_threshold = true;
mas01mc@292 262 relative_threshold = args_info.relative_threshold_arg;
mas01mc@292 263 }
mas01mc@292 264
mas01mc@324 265 if (args_info.adb_root_given){
mas01mc@324 266 adb_root = args_info.adb_root_arg;
mas01mc@324 267 }
mas01mc@324 268
mas01mc@324 269 if (args_info.adb_feature_root_given){
mas01mc@324 270 adb_feature_root = args_info.adb_feature_root_arg;
mas01mc@324 271 }
mas01mc@324 272
mas01mc@324 273 // perform dbName path prefix SERVER-side subsitution
mas01mc@324 274 if(SERVER_ADB_ROOT && !adb_root)
mas01mc@324 275 adb_root = SERVER_ADB_ROOT;
mas01mc@324 276 if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root)
mas01mc@324 277 adb_feature_root = SERVER_ADB_FEATURE_ROOT;
mas01mc@324 278
mas01cr@0 279 if(args_info.SERVER_given){
mas01cr@0 280 command=COM_SERVER;
mas01cr@0 281 port=args_info.SERVER_arg;
mas01cr@0 282 if(port<100 || port > 100000)
mas01cr@0 283 error("port out of range");
mas01cr@151 284 isServer = 1;
mas01cr@105 285 #if defined(O2_DEBUG)
mas01cr@104 286 struct sigaction sa;
mas01cr@104 287 sa.sa_sigaction = sigterm_action;
mas01cr@104 288 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 289 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 290 sa.sa_sigaction = sighup_action;
mas01cr@104 291 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 292 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 293 #endif
mas01mc@308 294 if(args_info.load_index_given){
mas01mc@308 295 if(!args_info.database_given)
mas01mc@308 296 error("load_index requires a --database argument");
mas01mc@308 297 else
mas01mc@308 298 dbName=args_info.database_arg;
mas01mc@308 299 if(!args_info.radius_given)
mas01mc@308 300 error("load_index requires a --radius argument");
mas01mc@308 301 if(!args_info.sequencelength_given)
mas01mc@308 302 error("load_index requires a --sequenceLength argument");
mas01mc@308 303 WS_load_index = true;
mas01mc@308 304 }
mas01cr@0 305 return 0;
mas01cr@0 306 }
mas01cr@0 307
mas01cr@0 308 // No return on client command, find database command
mas01cr@105 309 if(args_info.client_given){
mas01cr@105 310 command=COM_CLIENT;
mas01cr@105 311 hostport=args_info.client_arg;
mas01cr@105 312 isClient=1;
mas01cr@105 313 }
mas01cr@0 314
mas01cr@105 315 if(args_info.NEW_given){
mas01cr@105 316 command=COM_CREATE;
mas01cr@105 317 dbName=args_info.database_arg;
mas01cr@105 318 return 0;
mas01cr@105 319 }
mas01cr@0 320
mas01cr@105 321 if(args_info.STATUS_given){
mas01cr@105 322 command=COM_STATUS;
mas01cr@105 323 dbName=args_info.database_arg;
mas01cr@105 324 return 0;
mas01cr@105 325 }
mas01cr@0 326
mas01cr@280 327 if(args_info.SAMPLE_given) {
mas01cr@280 328 command = COM_SAMPLE;
mas01cr@280 329 dbName = args_info.database_arg;
mas01cr@280 330 sequenceLength = args_info.sequencelength_arg;
mas01cr@280 331 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@280 332 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@280 333 }
mas01cr@280 334 nsamples = args_info.nsamples_arg;
mas01cr@280 335 return 0;
mas01cr@280 336 }
mas01cr@280 337
mas01cr@105 338 if(args_info.DUMP_given){
mas01cr@105 339 command=COM_DUMP;
mas01cr@105 340 dbName=args_info.database_arg;
mas01cr@131 341 output = args_info.output_arg;
mas01cr@105 342 return 0;
mas01cr@105 343 }
mas01cr@0 344
mas01cr@105 345 if(args_info.L2NORM_given){
mas01cr@105 346 command=COM_L2NORM;
mas01cr@105 347 dbName=args_info.database_arg;
mas01cr@105 348 return 0;
mas01cr@105 349 }
mas01cr@0 350
mas01cr@193 351 if(args_info.POWER_given){
mas01cr@193 352 command=COM_POWER;
mas01cr@193 353 dbName=args_info.database_arg;
mas01cr@193 354 return 0;
mas01cr@193 355 }
mas01cr@193 356
mas01cr@105 357 if(args_info.INSERT_given){
mas01cr@105 358 command=COM_INSERT;
mas01cr@105 359 dbName=args_info.database_arg;
mas01cr@105 360 inFile=args_info.features_arg;
mas01cr@105 361 if(args_info.key_given)
mas01mc@292 362 if(!args_info.features_given)
mas01mc@292 363 error("INSERT: '-k key' argument depends on '-f features'");
mas01mc@292 364 else
mas01mc@292 365 key=args_info.key_arg;
mas01cr@105 366 if(args_info.times_given){
mas01cr@105 367 timesFileName=args_info.times_arg;
mas01cr@105 368 if(strlen(timesFileName)>0){
mas01cr@239 369 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 370 error("Could not open times file for reading", timesFileName);
mas01cr@105 371 usingTimes=1;
mas01cr@105 372 }
mas01cr@105 373 }
mas01cr@193 374 if (args_info.power_given) {
mas01cr@193 375 powerFileName = args_info.power_arg;
mas01cr@193 376 if (strlen(powerFileName) > 0) {
mas01cr@193 377 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 378 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 379 }
mas01cr@193 380 usingPower = 1;
mas01cr@193 381 }
mas01cr@193 382 }
mas01cr@105 383 return 0;
mas01cr@105 384 }
mas01cr@105 385
mas01cr@105 386 if(args_info.BATCHINSERT_given){
mas01cr@105 387 command=COM_BATCHINSERT;
mas01cr@105 388 dbName=args_info.database_arg;
mas01cr@105 389 inFile=args_info.featureList_arg;
mas01cr@105 390 if(args_info.keyList_given)
mas01tc@298 391 if(!args_info.featureList_given)
mas01tc@300 392 error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'");
mas01mc@292 393 else
mas01cr@304 394 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 395
mas01cr@105 396 /* TO DO: REPLACE WITH
mas01cr@0 397 if(args_info.keyList_given){
mas01mc@18 398 trackFileName=args_info.keyList_arg;
mas01cr@239 399 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01mc@18 400 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 401 }
mas01cr@0 402 AND UPDATE BATCHINSERT()
mas01cr@105 403 */
mas01cr@105 404
mas01cr@105 405 if(args_info.timesList_given){
mas01cr@105 406 timesFileName=args_info.timesList_arg;
mas01cr@105 407 if(strlen(timesFileName)>0){
mas01cr@239 408 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 409 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 410 usingTimes=1;
mas01cr@105 411 }
mas01cr@105 412 }
mas01cr@193 413 if(args_info.powerList_given){
mas01cr@193 414 powerFileName=args_info.powerList_arg;
mas01cr@193 415 if(strlen(powerFileName)>0){
mas01cr@239 416 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
mas01cr@193 417 error("Could not open powerList file for reading", powerFileName);
mas01cr@193 418 usingPower=1;
mas01cr@193 419 }
mas01cr@193 420 }
mas01cr@105 421 return 0;
mas01cr@105 422 }
mas01mc@292 423
mas01mc@292 424 // Set no_unit_norm flag
mas01mc@292 425 no_unit_norming = args_info.no_unit_norming_flag;
mas01mc@292 426 lsh_use_u_functions = args_info.lsh_use_u_functions_flag;
mas01mc@292 427
mas01mc@292 428 // LSH Index Command
mas01mc@292 429 if(args_info.INDEX_given){
mas01mc@292 430 if(radius <= 0 )
mas01mc@292 431 error("INDEXing requires a Radius argument");
mas01mc@292 432 if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN))
mas01mc@292 433 error("INDEXing requires 1 <= sequenceLength <= 1000");
mas01mc@292 434 command=COM_INDEX;
mas01mc@292 435 dbName=args_info.database_arg;
mas01mc@292 436
mas01mc@292 437 // Whether to store LSH hash tables for query in core (FORMAT2)
mas01mc@297 438 lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested
mas01mc@292 439
mas01mc@292 440 lsh_param_w = args_info.lsh_w_arg;
mas01mc@292 441 if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH))
mas01mc@292 442 error("Indexing parameter w out of range (0.0 < w <= 100.0)");
mas01mc@292 443
mas01mc@292 444 lsh_param_k = args_info.lsh_k_arg;
mas01mc@292 445 if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS))
mas01mc@292 446 error("Indexing parameter k out of range (1 <= k <= 100)");
mas01mc@292 447
mas01mc@292 448 lsh_param_m = args_info.lsh_m_arg;
mas01mc@292 449 if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0))
mas01mc@292 450 error("Indexing parameter m out of range (1 <= m <= 20)");
mas01mc@292 451
mas01mc@292 452 lsh_param_N = args_info.lsh_N_arg;
mas01mc@292 453 if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS))
mas01mc@292 454 error("Indexing parameter N out of range (1 <= N <= 1000000)");
mas01mc@292 455
mas01mc@292 456 lsh_param_b = args_info.lsh_b_arg;
mas01mc@292 457 if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH))
mas01mc@292 458 error("Indexing parameter b out of range (1 <= b <= 10000)");
mas01mc@292 459
mas01mc@296 460 lsh_param_ncols = args_info.lsh_ncols_arg;
mas01mc@296 461 if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing
mas01mc@296 462 lsh_param_ncols = O2_SERIAL_MAX_COLS;
mas01mc@292 463 if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS))
mas01mc@292 464 error("Indexing parameter ncols out of range (1 <= ncols <= 1000");
mas01mc@292 465
mas01mc@292 466 return 0;
mas01mc@292 467 }
mas01mc@292 468
mas01cr@105 469 // Query command and arguments
mas01cr@105 470 if(args_info.QUERY_given){
mas01cr@105 471 command=COM_QUERY;
mas01cr@105 472 dbName=args_info.database_arg;
mas01mc@292 473 // XOR features and key search
mas01mc@292 474 if(!args_info.features_given && !args_info.key_given || (args_info.features_given && args_info.key_given))
mas01mc@292 475 error("QUERY requires exactly one of either -f features or -k key");
mas01mc@292 476 if(args_info.features_given)
mas01mc@292 477 inFile=args_info.features_arg; // query from file
mas01mc@292 478 else{
mas01mc@292 479 query_from_key = true;
mas01mc@292 480 key=args_info.key_arg; // query from key
mas01mc@292 481 }
mas01mc@292 482
mas01cr@105 483 if(args_info.keyList_given){
mas01cr@105 484 trackFileName=args_info.keyList_arg;
mas01cr@239 485 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01cr@105 486 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 487 }
mas01cr@105 488
mas01cr@105 489 if(args_info.times_given){
mas01cr@105 490 timesFileName=args_info.times_arg;
mas01cr@105 491 if(strlen(timesFileName)>0){
mas01cr@239 492 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 493 error("Could not open times file for reading", timesFileName);
mas01cr@105 494 usingTimes=1;
mas01cr@105 495 }
mas01cr@105 496 }
mas01cr@193 497
mas01cr@193 498 if(args_info.power_given){
mas01cr@193 499 powerFileName=args_info.power_arg;
mas01cr@193 500 if(strlen(powerFileName)>0){
mas01cr@193 501 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 502 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 503 }
mas01cr@193 504 usingPower = 1;
mas01cr@193 505 }
mas01cr@193 506 }
mas01cr@105 507
mas01cr@105 508 // query type
mas01cr@105 509 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 510 queryType=O2_TRACK_QUERY;
mas01cr@105 511 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 512 queryType=O2_POINT_QUERY;
mas01cr@105 513 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 514 queryType=O2_SEQUENCE_QUERY;
mas01mc@248 515 else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
mas01mc@248 516 queryType=O2_N_SEQUENCE_QUERY;
mas01mc@263 517 else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
mas01mc@263 518 queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
mas01cr@105 519 else
mas01cr@105 520 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 521
mas01cr@105 522 if(!args_info.exhaustive_flag){
mas01cr@105 523 queryPoint = args_info.qpoint_arg;
mas01cr@105 524 usingQueryPoint=1;
mas01cr@105 525 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 526 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 527 }
mas01mc@292 528
mas01mc@296 529 // Whether to pre-load LSH hash tables for query (default on, if flag set then off)
mas01mc@297 530 lsh_in_core = !args_info.lsh_on_disk_flag;
mas01mc@292 531
mas01mc@292 532 // Whether to perform exact evaluation of points returned by LSH
mas01mc@292 533 lsh_exact = args_info.lsh_exact_flag;
mas01mc@292 534
mas01cr@105 535 pointNN = args_info.pointnn_arg;
mas01mc@263 536 if(pointNN < 1 || pointNN > O2_MAXNN) {
mas01mc@263 537 error("pointNN out of range: 1 <= pointNN <= 1000000");
mas01cr@105 538 }
mas01cr@105 539 trackNN = args_info.resultlength_arg;
mas01mc@263 540 if(trackNN < 1 || trackNN > O2_MAXNN) {
mas01mc@263 541 error("resultlength out of range: 1 <= resultlength <= 1000000");
mas01cr@105 542 }
mas01cr@105 543 return 0;
mas01cr@105 544 }
mas01cr@105 545 return -1; // no command found
mas01cr@0 546 }
mas01cr@0 547
mas01cr@133 548 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 549 if(!dbH)
mas01cr@196 550 initTables(dbName, 0);
mas01cr@0 551
mas01cr@0 552 unsigned dudCount=0;
mas01cr@0 553 unsigned nullCount=0;
mas01cr@0 554 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 555 if(trackTable[k]<sequenceLength){
mas01cr@0 556 dudCount++;
mas01mc@18 557 if(!trackTable[k])
mas01cr@76 558 nullCount++;
mas01cr@0 559 }
mas01cr@0 560 }
mas01cr@76 561
mas01cr@133 562 if(adbStatusResponse == 0) {
mas01cr@76 563
mas01cr@76 564 // Update Header information
mas01cr@239 565 std::cout << "num files:" << dbH->numFiles << std::endl;
mas01cr@239 566 std::cout << "data dim:" << dbH->dim <<std::endl;
mas01cr@76 567 if(dbH->dim>0){
mas01cr@239 568 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
mas01mc@324 569 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@324 570 std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl;
mas01mc@324 571 else
mas01mc@324 572 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
mas01cr@76 573 }
mas01mc@324 574 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@324 575 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@324 576 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01mc@324 577 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@324 578 }
mas01mc@301 579 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
mas01mc@301 580 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
mas01mc@301 581 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
mas01mc@324 582 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES)
mas01mc@324 583 << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB)
mas01mc@324 584 << "]" << endl;
mas01mc@324 585
mas01cr@239 586 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
mas01cr@76 587 } else {
mas01cr@133 588 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 589 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 590 adbStatusResponse->result.length = dbH->length;
mas01cr@133 591 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 592 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 593 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 594 }
mas01cr@0 595 }
mas01cr@0 596
mas01cr@196 597 void audioDB::l2norm(const char* dbName) {
mas01cr@196 598 forWrite = true;
mas01cr@196 599 initTables(dbName, 0);
mas01mc@324 600 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@196 601 /* FIXME: should probably be uint64_t */
mas01cr@0 602 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@196 603 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@0 604 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 605 }
mas01cr@0 606 // Update database flags
mas01cr@0 607 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 608 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 609 }
mas01cr@193 610
mas01cr@193 611 void audioDB::power_flag(const char *dbName) {
mas01cr@196 612 forWrite = true;
mas01mc@324 613 initTables(dbName, 0);
mas01mc@324 614 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@193 615 error("cannot turn on power storage for non-empty database", dbName);
mas01cr@193 616 }
mas01cr@193 617 dbH->flags |= O2_FLAG_POWER;
mas01cr@193 618 memcpy(db, dbH, O2_HEADERSIZE);
mas01cr@193 619 }
mas01cr@193 620
mas01cr@239 621 // Unit norm block of features
mas01cr@0 622
mas01cr@239 623 /* FIXME: in fact this does not unit norm a block of features, it just
mas01cr@239 624 records the L2 norms somewhere. unitNorm() does in fact unit norm
mas01cr@239 625 a block of features. */
mas01cr@0 626 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 627 unsigned d;
mas01cr@59 628 double *p;
mas01cr@0 629 unsigned nn = n;
mas01cr@0 630
mas01cr@0 631 assert(l2normTable);
mas01cr@0 632
mas01mc@324 633 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 634 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 635
mas01cr@239 636 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@0 637
mas01cr@0 638 double* l2buf = new double[n];
mas01cr@0 639 double* l2ptr = l2buf;
mas01cr@0 640 assert(l2buf);
mas01cr@0 641 assert(X);
mas01cr@0 642
mas01cr@0 643 while(nn--){
mas01cr@0 644 p=X;
mas01cr@0 645 *l2ptr=0.0;
mas01cr@0 646 d=dim;
mas01cr@0 647 while(d--){
mas01cr@0 648 *l2ptr+=*p**p;
mas01cr@0 649 p++;
mas01cr@0 650 }
mas01mc@17 651 l2ptr++;
mas01mc@17 652 X+=dim;
mas01cr@0 653 }
mas01cr@0 654 unsigned offset;
mas01cr@84 655 if(append) {
mas01cr@84 656 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 657 // been inserted, and dbH->length has already been updated. We
mas01cr@84 658 // need to subtract off again the number of vectors that we've
mas01cr@84 659 // inserted this time...
mas01cr@84 660 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 661 } else {
mas01cr@0 662 offset=0;
mas01cr@84 663 }
mas01cr@0 664 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 665 if(l2buf)
mas01mc@17 666 delete[] l2buf;
mas01cr@239 667 VERB_LOG(2, " done.");
mas01cr@193 668 }
mas01cr@193 669
mas01mc@308 670 // This entry point is visited once per instance
mas01mc@308 671 // so it is a good place to set any global state variables
mas01cr@0 672 int main(const unsigned argc, char* const argv[]){
mas01mc@308 673 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
mas01mc@324 674 SERVER_ADB_ROOT = 0; // Server-side database root prefix
mas01mc@324 675 SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix
mas01cr@0 676 audioDB(argc, argv);
mas01cr@0 677 }