annotate audioDB.cpp @ 321:da2272e029b3 large_adb

Added --adb_feature_root=path filename prefix for locating LARGE_ADB features with relative paths at QUERY time. Also added convenience argument --adb_root=path prefix for -d database command option.
author mas01mc
date Thu, 21 Aug 2008 19:16:21 +0000
parents a995e5ad999a
children 634959ef98f2
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01mc@308 3 LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@308 4
mas01mc@292 5 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
mas01mc@292 6
mas01mc@292 7 bool operator<(const PointPair& a, const PointPair& b){
mas01mc@320 8 return ( (a.trackID<b.trackID) ||
mas01mc@320 9 ( (a.trackID==b.trackID) &&
mas01mc@320 10 ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) );
mas01mc@292 11 }
mas01mc@292 12
mas01mc@292 13 bool operator>(const PointPair& a, const PointPair& b){
mas01mc@320 14 return ( (a.trackID>b.trackID) ||
mas01mc@320 15 ( (a.trackID==b.trackID) &&
mas01mc@320 16 ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) );
mas01mc@292 17 }
mas01mc@292 18
mas01mc@292 19 bool operator==(const PointPair& a, const PointPair& b){
mas01mc@292 20 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) );
mas01mc@292 21 }
mas01mc@292 22
mas01cr@76 23 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 24 {
mas01cr@0 25 if(processArgs(argc, argv)<0){
mas01cr@0 26 printf("No command found.\n");
mas01cr@0 27 cmdline_parser_print_version ();
mas01cr@0 28 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 29 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 30 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 31 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 32 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 33 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 34 error("No command found");
mas01cr@0 35 }
mas01cr@77 36
mas01mc@321 37 // perform dbName path prefix subbsitution
mas01mc@321 38 if(adb_root)
mas01mc@321 39 prefix_name((char** const)&dbName, adb_root);
mas01mc@321 40
mas01cr@0 41 if(O2_ACTION(COM_SERVER))
mas01cr@0 42 startServer();
mas01cr@0 43
mas01cr@0 44 else if(O2_ACTION(COM_CREATE))
mas01cr@0 45 create(dbName);
mas01cr@0 46
mas01cr@0 47 else if(O2_ACTION(COM_INSERT))
mas01cr@0 48 insert(dbName, inFile);
mas01cr@0 49
mas01cr@0 50 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 51 batchinsert(dbName, inFile);
mas01cr@0 52
mas01cr@0 53 else if(O2_ACTION(COM_QUERY))
mas01mc@307 54 if(isClient){
mas01mc@307 55 if(query_from_key)
mas01mc@307 56 ws_query_by_key(dbName, key, (char*)hostport);
mas01mc@307 57 else
mas01mc@307 58 ws_query(dbName, inFile, (char*)hostport);
mas01mc@307 59 }
mas01cr@0 60 else
mas01cr@76 61 query(dbName, inFile);
mas01cr@0 62
mas01cr@0 63 else if(O2_ACTION(COM_STATUS))
mas01cr@0 64 if(isClient)
mas01cr@0 65 ws_status(dbName,(char*)hostport);
mas01cr@0 66 else
mas01cr@0 67 status(dbName);
mas01cr@280 68
mas01cr@280 69 else if(O2_ACTION(COM_SAMPLE))
mas01cr@280 70 sample(dbName);
mas01cr@0 71
mas01cr@0 72 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 73 l2norm(dbName);
mas01cr@0 74
mas01cr@193 75 else if(O2_ACTION(COM_POWER))
mas01cr@193 76 power_flag(dbName);
mas01cr@193 77
mas01cr@0 78 else if(O2_ACTION(COM_DUMP))
mas01cr@0 79 dump(dbName);
mas01mc@292 80
mas01mc@292 81 else if(O2_ACTION(COM_INDEX))
mas01mc@292 82 index_index_db(dbName);
mas01cr@0 83
mas01cr@0 84 else
mas01cr@0 85 error("Unrecognized command",command);
mas01cr@0 86 }
mas01cr@0 87
mas01cr@133 88 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 89 {
mas01cr@97 90 try {
mas01cr@151 91 isServer = 1; // FIXME: Hack
mas01cr@97 92 processArgs(argc, argv);
mas01cr@97 93 assert(O2_ACTION(COM_QUERY));
mas01cr@133 94 query(dbName, inFile, adbQueryResponse);
mas01cr@97 95 } catch(char *err) {
mas01cr@97 96 cleanup();
mas01cr@97 97 throw(err);
mas01cr@97 98 }
mas01cr@76 99 }
mas01cr@76 100
mas01cr@133 101 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 102 {
mas01cr@97 103 try {
mas01cr@151 104 isServer = 1; // FIXME: Hack
mas01cr@97 105 processArgs(argc, argv);
mas01cr@97 106 assert(O2_ACTION(COM_STATUS));
mas01cr@133 107 status(dbName, adbStatusResponse);
mas01cr@97 108 } catch(char *err) {
mas01cr@97 109 cleanup();
mas01cr@97 110 throw(err);
mas01cr@97 111 }
mas01cr@76 112 }
mas01cr@76 113
mas01cr@97 114 void audioDB::cleanup() {
mas01cr@122 115 cmdline_parser_free(&args_info);
mas01cr@0 116 if(indata)
mas01cr@0 117 munmap(indata,statbuf.st_size);
mas01cr@0 118 if(db)
mas01cr@196 119 munmap(db,getpagesize());
mas01cr@196 120 if(fileTable)
mas01cr@196 121 munmap(fileTable, fileTableLength);
mas01cr@196 122 if(trackTable)
mas01cr@196 123 munmap(trackTable, trackTableLength);
mas01cr@196 124 if(dataBuf)
mas01cr@196 125 munmap(dataBuf, dataBufLength);
mas01cr@196 126 if(timesTable)
mas01cr@196 127 munmap(timesTable, timesTableLength);
mas01mc@314 128 if(powerTable)
mas01mc@314 129 munmap(powerTable, powerTableLength);
mas01cr@196 130 if(l2normTable)
mas01cr@196 131 munmap(l2normTable, l2normTableLength);
mas01mc@318 132 if(featureFileNameTable)
mas01mc@318 133 munmap(featureFileNameTable, fileTableLength);
mas01mc@318 134 if(timesFileNameTable)
mas01mc@318 135 munmap(timesFileNameTable, fileTableLength);
mas01mc@318 136 if(powerFileNameTable)
mas01mc@318 137 munmap(powerFileNameTable, fileTableLength);
mas01mc@292 138 if(trackOffsetTable)
mas01mc@292 139 delete trackOffsetTable;
mas01mc@292 140 if(reporter)
mas01mc@292 141 delete reporter;
mas01mc@292 142 if(exact_evaluation_queue)
mas01mc@292 143 delete exact_evaluation_queue;
mas01cr@284 144 if(rng)
mas01cr@284 145 gsl_rng_free(rng);
mas01mc@292 146 if(vv)
mas01mc@292 147 delete vv;
mas01cr@0 148 if(dbfid>0)
mas01cr@0 149 close(dbfid);
mas01cr@0 150 if(infid>0)
mas01cr@0 151 close(infid);
mas01cr@0 152 if(dbH)
mas01cr@0 153 delete dbH;
mas01mc@308 154 if(lsh!=SERVER_LSH_INDEX_SINGLETON)
mas01mc@308 155 delete lsh;
mas01cr@0 156 }
mas01cr@0 157
mas01cr@97 158 audioDB::~audioDB(){
mas01cr@97 159 cleanup();
mas01cr@97 160 }
mas01cr@97 161
mas01cr@0 162 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 163
mas01cr@0 164 if(argc<2){
mas01cr@0 165 cmdline_parser_print_version ();
mas01cr@0 166 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 167 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 168 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 169 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 170 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 171 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 172 exit(0);
mas01cr@0 173 }
mas01cr@0 174
mas01cr@0 175 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@151 176 error("Error parsing command line");
mas01cr@0 177
mas01cr@0 178 if(args_info.help_given){
mas01cr@0 179 cmdline_parser_print_help();
mas01cr@0 180 exit(0);
mas01cr@0 181 }
mas01cr@0 182
mas01cr@0 183 if(args_info.verbosity_given){
mas01cr@239 184 verbosity = args_info.verbosity_arg;
mas01cr@239 185 if(verbosity < 0 || verbosity > 10){
mas01cr@239 186 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
mas01cr@239 187 verbosity = 1;
mas01cr@0 188 }
mas01cr@0 189 }
mas01cr@0 190
mas01cr@129 191 if(args_info.size_given) {
mas01cr@256 192 if(args_info.datasize_given) {
mas01cr@256 193 error("both --size and --datasize given", "");
mas01cr@256 194 }
mas01cr@256 195 if(args_info.ntracks_given) {
mas01cr@256 196 error("both --size and --ntracks given", "");
mas01cr@256 197 }
mas01cr@256 198 if(args_info.datadim_given) {
mas01cr@256 199 error("both --size and --datadim given", "");
mas01cr@256 200 }
mas01cr@196 201 if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
mas01cr@129 202 error("Size out of range", "");
mas01cr@129 203 }
mas01cr@256 204 double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
mas01cr@256 205 /* FIXME: what's the safe way of doing this? */
mas01cr@256 206 datasize = (unsigned int) ceil(datasize * ratio);
mas01cr@256 207 ntracks = (unsigned int) ceil(ntracks * ratio);
mas01cr@256 208 } else {
mas01cr@256 209 if(args_info.datasize_given) {
mas01cr@256 210 datasize = args_info.datasize_arg;
mas01cr@256 211 }
mas01cr@256 212 if(args_info.ntracks_given) {
mas01cr@256 213 ntracks = args_info.ntracks_arg;
mas01cr@256 214 }
mas01cr@256 215 if(args_info.datadim_given) {
mas01cr@256 216 datadim = args_info.datadim_arg;
mas01cr@256 217 }
mas01cr@129 218 }
mas01cr@129 219
mas01cr@239 220 if(args_info.radius_given) {
mas01cr@239 221 radius = args_info.radius_arg;
mas01mc@307 222 if(radius < 0 || radius > 1000000000) {
mas01cr@77 223 error("radius out of range");
mas01cr@239 224 } else {
mas01cr@239 225 VERB_LOG(3, "Setting radius to %f\n", radius);
mas01mc@17 226 }
mas01mc@17 227 }
mas01mc@17 228
mas01mc@292 229 sequenceLength = args_info.sequencelength_arg;
mas01mc@292 230 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01mc@292 231 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01mc@292 232 }
mas01mc@292 233 sequenceHop = args_info.sequencehop_arg;
mas01mc@292 234 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01mc@292 235 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01mc@292 236 }
mas01mc@292 237
mas01mc@292 238 if (args_info.absolute_threshold_given) {
mas01mc@292 239 if (args_info.absolute_threshold_arg >= 0) {
mas01mc@292 240 error("absolute threshold out of range: should be negative");
mas01mc@292 241 }
mas01mc@292 242 use_absolute_threshold = true;
mas01mc@292 243 absolute_threshold = args_info.absolute_threshold_arg;
mas01mc@292 244 }
mas01mc@292 245 if (args_info.relative_threshold_given) {
mas01mc@292 246 use_relative_threshold = true;
mas01mc@292 247 relative_threshold = args_info.relative_threshold_arg;
mas01mc@292 248 }
mas01mc@292 249
mas01mc@321 250 if (args_info.adb_root_given){
mas01mc@321 251 adb_root = args_info.adb_root_arg;
mas01mc@321 252 }
mas01mc@321 253
mas01mc@321 254 if (args_info.adb_feature_root_given){
mas01mc@321 255 adb_feature_root = args_info.adb_feature_root_arg;
mas01mc@321 256 }
mas01mc@321 257
mas01cr@0 258 if(args_info.SERVER_given){
mas01cr@0 259 command=COM_SERVER;
mas01cr@0 260 port=args_info.SERVER_arg;
mas01cr@0 261 if(port<100 || port > 100000)
mas01cr@0 262 error("port out of range");
mas01cr@151 263 isServer = 1;
mas01cr@105 264 #if defined(O2_DEBUG)
mas01cr@104 265 struct sigaction sa;
mas01cr@104 266 sa.sa_sigaction = sigterm_action;
mas01cr@104 267 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 268 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 269 sa.sa_sigaction = sighup_action;
mas01cr@104 270 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 271 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 272 #endif
mas01mc@308 273 if(args_info.load_index_given){
mas01mc@308 274 if(!args_info.database_given)
mas01mc@308 275 error("load_index requires a --database argument");
mas01mc@308 276 else
mas01mc@308 277 dbName=args_info.database_arg;
mas01mc@308 278 if(!args_info.radius_given)
mas01mc@308 279 error("load_index requires a --radius argument");
mas01mc@308 280 if(!args_info.sequencelength_given)
mas01mc@308 281 error("load_index requires a --sequenceLength argument");
mas01mc@308 282 WS_load_index = true;
mas01mc@308 283 }
mas01cr@0 284 return 0;
mas01cr@0 285 }
mas01cr@0 286
mas01cr@0 287 // No return on client command, find database command
mas01cr@105 288 if(args_info.client_given){
mas01cr@105 289 command=COM_CLIENT;
mas01cr@105 290 hostport=args_info.client_arg;
mas01cr@105 291 isClient=1;
mas01cr@105 292 }
mas01cr@0 293
mas01cr@105 294 if(args_info.NEW_given){
mas01cr@105 295 command=COM_CREATE;
mas01cr@105 296 dbName=args_info.database_arg;
mas01cr@105 297 return 0;
mas01cr@105 298 }
mas01cr@0 299
mas01cr@105 300 if(args_info.STATUS_given){
mas01cr@105 301 command=COM_STATUS;
mas01cr@105 302 dbName=args_info.database_arg;
mas01cr@105 303 return 0;
mas01cr@105 304 }
mas01cr@0 305
mas01cr@280 306 if(args_info.SAMPLE_given) {
mas01cr@280 307 command = COM_SAMPLE;
mas01cr@280 308 dbName = args_info.database_arg;
mas01cr@280 309 sequenceLength = args_info.sequencelength_arg;
mas01cr@280 310 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@280 311 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@280 312 }
mas01cr@280 313 nsamples = args_info.nsamples_arg;
mas01cr@280 314 return 0;
mas01cr@280 315 }
mas01cr@280 316
mas01cr@105 317 if(args_info.DUMP_given){
mas01cr@105 318 command=COM_DUMP;
mas01cr@105 319 dbName=args_info.database_arg;
mas01cr@131 320 output = args_info.output_arg;
mas01cr@105 321 return 0;
mas01cr@105 322 }
mas01cr@0 323
mas01cr@105 324 if(args_info.L2NORM_given){
mas01cr@105 325 command=COM_L2NORM;
mas01cr@105 326 dbName=args_info.database_arg;
mas01cr@105 327 return 0;
mas01cr@105 328 }
mas01cr@0 329
mas01cr@193 330 if(args_info.POWER_given){
mas01cr@193 331 command=COM_POWER;
mas01cr@193 332 dbName=args_info.database_arg;
mas01cr@193 333 return 0;
mas01cr@193 334 }
mas01cr@193 335
mas01cr@105 336 if(args_info.INSERT_given){
mas01cr@105 337 command=COM_INSERT;
mas01cr@105 338 dbName=args_info.database_arg;
mas01cr@105 339 inFile=args_info.features_arg;
mas01cr@105 340 if(args_info.key_given)
mas01mc@292 341 if(!args_info.features_given)
mas01mc@292 342 error("INSERT: '-k key' argument depends on '-f features'");
mas01mc@292 343 else
mas01mc@292 344 key=args_info.key_arg;
mas01cr@105 345 if(args_info.times_given){
mas01cr@105 346 timesFileName=args_info.times_arg;
mas01cr@105 347 if(strlen(timesFileName)>0){
mas01cr@239 348 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 349 error("Could not open times file for reading", timesFileName);
mas01cr@105 350 usingTimes=1;
mas01cr@105 351 }
mas01cr@105 352 }
mas01cr@193 353 if (args_info.power_given) {
mas01cr@193 354 powerFileName = args_info.power_arg;
mas01cr@193 355 if (strlen(powerFileName) > 0) {
mas01cr@193 356 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 357 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 358 }
mas01cr@193 359 usingPower = 1;
mas01cr@193 360 }
mas01cr@193 361 }
mas01cr@105 362 return 0;
mas01cr@105 363 }
mas01cr@105 364
mas01cr@105 365 if(args_info.BATCHINSERT_given){
mas01cr@105 366 command=COM_BATCHINSERT;
mas01cr@105 367 dbName=args_info.database_arg;
mas01cr@105 368 inFile=args_info.featureList_arg;
mas01cr@105 369 if(args_info.keyList_given)
mas01tc@298 370 if(!args_info.featureList_given)
mas01tc@300 371 error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'");
mas01mc@292 372 else
mas01cr@304 373 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 374
mas01cr@105 375 /* TO DO: REPLACE WITH
mas01cr@0 376 if(args_info.keyList_given){
mas01mc@18 377 trackFileName=args_info.keyList_arg;
mas01cr@239 378 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01mc@18 379 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 380 }
mas01cr@0 381 AND UPDATE BATCHINSERT()
mas01cr@105 382 */
mas01cr@105 383
mas01cr@105 384 if(args_info.timesList_given){
mas01cr@105 385 timesFileName=args_info.timesList_arg;
mas01cr@105 386 if(strlen(timesFileName)>0){
mas01cr@239 387 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 388 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 389 usingTimes=1;
mas01cr@105 390 }
mas01cr@105 391 }
mas01cr@193 392 if(args_info.powerList_given){
mas01cr@193 393 powerFileName=args_info.powerList_arg;
mas01cr@193 394 if(strlen(powerFileName)>0){
mas01cr@239 395 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
mas01cr@193 396 error("Could not open powerList file for reading", powerFileName);
mas01cr@193 397 usingPower=1;
mas01cr@193 398 }
mas01cr@193 399 }
mas01cr@105 400 return 0;
mas01cr@105 401 }
mas01mc@292 402
mas01mc@292 403 // Set no_unit_norm flag
mas01mc@292 404 no_unit_norming = args_info.no_unit_norming_flag;
mas01mc@292 405 lsh_use_u_functions = args_info.lsh_use_u_functions_flag;
mas01mc@292 406
mas01mc@292 407 // LSH Index Command
mas01mc@292 408 if(args_info.INDEX_given){
mas01mc@292 409 if(radius <= 0 )
mas01mc@292 410 error("INDEXing requires a Radius argument");
mas01mc@292 411 if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN))
mas01mc@292 412 error("INDEXing requires 1 <= sequenceLength <= 1000");
mas01mc@292 413 command=COM_INDEX;
mas01mc@292 414 dbName=args_info.database_arg;
mas01mc@292 415
mas01mc@292 416 // Whether to store LSH hash tables for query in core (FORMAT2)
mas01mc@297 417 lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested
mas01mc@292 418
mas01mc@292 419 lsh_param_w = args_info.lsh_w_arg;
mas01mc@292 420 if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH))
mas01mc@292 421 error("Indexing parameter w out of range (0.0 < w <= 100.0)");
mas01mc@292 422
mas01mc@292 423 lsh_param_k = args_info.lsh_k_arg;
mas01mc@292 424 if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS))
mas01mc@292 425 error("Indexing parameter k out of range (1 <= k <= 100)");
mas01mc@292 426
mas01mc@292 427 lsh_param_m = args_info.lsh_m_arg;
mas01mc@292 428 if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0))
mas01mc@292 429 error("Indexing parameter m out of range (1 <= m <= 20)");
mas01mc@292 430
mas01mc@292 431 lsh_param_N = args_info.lsh_N_arg;
mas01mc@292 432 if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS))
mas01mc@292 433 error("Indexing parameter N out of range (1 <= N <= 1000000)");
mas01mc@292 434
mas01mc@292 435 lsh_param_b = args_info.lsh_b_arg;
mas01mc@292 436 if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH))
mas01mc@292 437 error("Indexing parameter b out of range (1 <= b <= 10000)");
mas01mc@292 438
mas01mc@296 439 lsh_param_ncols = args_info.lsh_ncols_arg;
mas01mc@296 440 if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing
mas01mc@296 441 lsh_param_ncols = O2_SERIAL_MAX_COLS;
mas01mc@292 442 if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS))
mas01mc@292 443 error("Indexing parameter ncols out of range (1 <= ncols <= 1000");
mas01mc@292 444
mas01mc@292 445 return 0;
mas01mc@292 446 }
mas01mc@292 447
mas01cr@105 448 // Query command and arguments
mas01cr@105 449 if(args_info.QUERY_given){
mas01cr@105 450 command=COM_QUERY;
mas01cr@105 451 dbName=args_info.database_arg;
mas01mc@292 452 // XOR features and key search
mas01mc@292 453 if(!args_info.features_given && !args_info.key_given || (args_info.features_given && args_info.key_given))
mas01mc@292 454 error("QUERY requires exactly one of either -f features or -k key");
mas01mc@292 455 if(args_info.features_given)
mas01mc@292 456 inFile=args_info.features_arg; // query from file
mas01mc@292 457 else{
mas01mc@292 458 query_from_key = true;
mas01mc@292 459 key=args_info.key_arg; // query from key
mas01mc@292 460 }
mas01mc@292 461
mas01cr@105 462 if(args_info.keyList_given){
mas01cr@105 463 trackFileName=args_info.keyList_arg;
mas01cr@239 464 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01cr@105 465 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 466 }
mas01cr@105 467
mas01cr@105 468 if(args_info.times_given){
mas01cr@105 469 timesFileName=args_info.times_arg;
mas01cr@105 470 if(strlen(timesFileName)>0){
mas01cr@239 471 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 472 error("Could not open times file for reading", timesFileName);
mas01cr@105 473 usingTimes=1;
mas01cr@105 474 }
mas01cr@105 475 }
mas01cr@193 476
mas01cr@193 477 if(args_info.power_given){
mas01cr@193 478 powerFileName=args_info.power_arg;
mas01cr@193 479 if(strlen(powerFileName)>0){
mas01cr@193 480 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 481 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 482 }
mas01cr@193 483 usingPower = 1;
mas01cr@193 484 }
mas01cr@193 485 }
mas01cr@105 486
mas01cr@105 487 // query type
mas01cr@105 488 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 489 queryType=O2_TRACK_QUERY;
mas01cr@105 490 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 491 queryType=O2_POINT_QUERY;
mas01cr@105 492 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 493 queryType=O2_SEQUENCE_QUERY;
mas01mc@248 494 else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
mas01mc@248 495 queryType=O2_N_SEQUENCE_QUERY;
mas01mc@263 496 else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
mas01mc@263 497 queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
mas01cr@105 498 else
mas01cr@105 499 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 500
mas01cr@105 501 if(!args_info.exhaustive_flag){
mas01cr@105 502 queryPoint = args_info.qpoint_arg;
mas01cr@105 503 usingQueryPoint=1;
mas01cr@105 504 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 505 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 506 }
mas01mc@292 507
mas01mc@296 508 // Whether to pre-load LSH hash tables for query (default on, if flag set then off)
mas01mc@297 509 lsh_in_core = !args_info.lsh_on_disk_flag;
mas01mc@292 510
mas01mc@292 511 // Whether to perform exact evaluation of points returned by LSH
mas01mc@292 512 lsh_exact = args_info.lsh_exact_flag;
mas01mc@292 513
mas01cr@105 514 pointNN = args_info.pointnn_arg;
mas01mc@263 515 if(pointNN < 1 || pointNN > O2_MAXNN) {
mas01mc@263 516 error("pointNN out of range: 1 <= pointNN <= 1000000");
mas01cr@105 517 }
mas01cr@105 518 trackNN = args_info.resultlength_arg;
mas01mc@263 519 if(trackNN < 1 || trackNN > O2_MAXNN) {
mas01mc@263 520 error("resultlength out of range: 1 <= resultlength <= 1000000");
mas01cr@105 521 }
mas01cr@105 522 return 0;
mas01cr@105 523 }
mas01cr@105 524 return -1; // no command found
mas01cr@0 525 }
mas01cr@0 526
mas01cr@133 527 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 528 if(!dbH)
mas01cr@196 529 initTables(dbName, 0);
mas01cr@0 530
mas01cr@0 531 unsigned dudCount=0;
mas01cr@0 532 unsigned nullCount=0;
mas01cr@0 533 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 534 if(trackTable[k]<sequenceLength){
mas01cr@0 535 dudCount++;
mas01mc@18 536 if(!trackTable[k])
mas01cr@76 537 nullCount++;
mas01cr@0 538 }
mas01cr@0 539 }
mas01cr@76 540
mas01cr@133 541 if(adbStatusResponse == 0) {
mas01cr@76 542
mas01cr@76 543 // Update Header information
mas01cr@239 544 std::cout << "num files:" << dbH->numFiles << std::endl;
mas01cr@239 545 std::cout << "data dim:" << dbH->dim <<std::endl;
mas01cr@76 546 if(dbH->dim>0){
mas01cr@239 547 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
mas01mc@317 548 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@317 549 std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl;
mas01mc@317 550 else
mas01mc@317 551 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
mas01cr@76 552 }
mas01mc@317 553 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@317 554 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@317 555 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01mc@317 556 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@317 557 }
mas01mc@301 558 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
mas01mc@301 559 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
mas01mc@301 560 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
mas01mc@317 561 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES)
mas01mc@317 562 << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB)
mas01mc@318 563 << "]" << endl;
mas01mc@317 564
mas01cr@239 565 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
mas01cr@76 566 } else {
mas01cr@133 567 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 568 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 569 adbStatusResponse->result.length = dbH->length;
mas01cr@133 570 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 571 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 572 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 573 }
mas01cr@0 574 }
mas01cr@0 575
mas01cr@196 576 void audioDB::l2norm(const char* dbName) {
mas01cr@196 577 forWrite = true;
mas01cr@196 578 initTables(dbName, 0);
mas01mc@318 579 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@196 580 /* FIXME: should probably be uint64_t */
mas01cr@0 581 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@196 582 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@0 583 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 584 }
mas01cr@0 585 // Update database flags
mas01cr@0 586 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 587 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 588 }
mas01cr@193 589
mas01cr@193 590 void audioDB::power_flag(const char *dbName) {
mas01cr@196 591 forWrite = true;
mas01mc@318 592 initTables(dbName, 0);
mas01mc@318 593 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@193 594 error("cannot turn on power storage for non-empty database", dbName);
mas01cr@193 595 }
mas01cr@193 596 dbH->flags |= O2_FLAG_POWER;
mas01cr@193 597 memcpy(db, dbH, O2_HEADERSIZE);
mas01cr@193 598 }
mas01cr@193 599
mas01cr@239 600 // Unit norm block of features
mas01cr@0 601
mas01cr@239 602 /* FIXME: in fact this does not unit norm a block of features, it just
mas01cr@239 603 records the L2 norms somewhere. unitNorm() does in fact unit norm
mas01cr@239 604 a block of features. */
mas01cr@0 605 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 606 unsigned d;
mas01cr@59 607 double *p;
mas01cr@0 608 unsigned nn = n;
mas01cr@0 609
mas01cr@0 610 assert(l2normTable);
mas01cr@0 611
mas01mc@319 612 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 613 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 614
mas01cr@239 615 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@0 616
mas01cr@0 617 double* l2buf = new double[n];
mas01cr@0 618 double* l2ptr = l2buf;
mas01cr@0 619 assert(l2buf);
mas01cr@0 620 assert(X);
mas01cr@0 621
mas01cr@0 622 while(nn--){
mas01cr@0 623 p=X;
mas01cr@0 624 *l2ptr=0.0;
mas01cr@0 625 d=dim;
mas01cr@0 626 while(d--){
mas01cr@0 627 *l2ptr+=*p**p;
mas01cr@0 628 p++;
mas01cr@0 629 }
mas01mc@17 630 l2ptr++;
mas01mc@17 631 X+=dim;
mas01cr@0 632 }
mas01cr@0 633 unsigned offset;
mas01cr@84 634 if(append) {
mas01cr@84 635 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 636 // been inserted, and dbH->length has already been updated. We
mas01cr@84 637 // need to subtract off again the number of vectors that we've
mas01cr@84 638 // inserted this time...
mas01cr@84 639 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 640 } else {
mas01cr@0 641 offset=0;
mas01cr@84 642 }
mas01cr@0 643 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 644 if(l2buf)
mas01mc@17 645 delete[] l2buf;
mas01cr@239 646 VERB_LOG(2, " done.");
mas01cr@193 647 }
mas01cr@193 648
mas01mc@308 649 // This entry point is visited once per instance
mas01mc@308 650 // so it is a good place to set any global state variables
mas01cr@0 651 int main(const unsigned argc, char* const argv[]){
mas01mc@308 652 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
mas01cr@0 653 audioDB(argc, argv);
mas01cr@0 654 }