annotate audioDB.cpp @ 322:634959ef98f2 large_adb

Added --adb_root and --adb_feature_root support to server-side Web Services instance. Client does not have to know location of database and features on the server when server performs path substitutions with these arguments.
author mas01mc
date Thu, 21 Aug 2008 19:58:55 +0000
parents da2272e029b3
children
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01mc@308 3 LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@322 4 char* SERVER_ADB_ROOT;
mas01mc@322 5 char* SERVER_ADB_FEATURE_ROOT;
mas01mc@308 6
mas01mc@292 7 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
mas01mc@292 8
mas01mc@292 9 bool operator<(const PointPair& a, const PointPair& b){
mas01mc@320 10 return ( (a.trackID<b.trackID) ||
mas01mc@320 11 ( (a.trackID==b.trackID) &&
mas01mc@320 12 ( (a.spos<b.spos) || ( (a.spos==b.spos) && (a.qpos < b.qpos) )) ) );
mas01mc@292 13 }
mas01mc@292 14
mas01mc@292 15 bool operator>(const PointPair& a, const PointPair& b){
mas01mc@320 16 return ( (a.trackID>b.trackID) ||
mas01mc@320 17 ( (a.trackID==b.trackID) &&
mas01mc@320 18 ( (a.spos>b.spos) || ( (a.spos==b.spos) && (a.qpos > b.qpos) )) ) );
mas01mc@292 19 }
mas01mc@292 20
mas01mc@292 21 bool operator==(const PointPair& a, const PointPair& b){
mas01mc@292 22 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) );
mas01mc@292 23 }
mas01mc@292 24
mas01cr@76 25 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 26 {
mas01cr@0 27 if(processArgs(argc, argv)<0){
mas01cr@0 28 printf("No command found.\n");
mas01cr@0 29 cmdline_parser_print_version ();
mas01cr@0 30 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 31 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 32 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 33 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 34 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 35 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 36 error("No command found");
mas01cr@0 37 }
mas01cr@77 38
mas01mc@322 39 // Perform database prefix substitution
mas01mc@321 40 if(adb_root)
mas01mc@321 41 prefix_name((char** const)&dbName, adb_root);
mas01mc@321 42
mas01cr@0 43 if(O2_ACTION(COM_SERVER))
mas01cr@0 44 startServer();
mas01cr@0 45
mas01cr@0 46 else if(O2_ACTION(COM_CREATE))
mas01cr@0 47 create(dbName);
mas01cr@0 48
mas01cr@0 49 else if(O2_ACTION(COM_INSERT))
mas01cr@0 50 insert(dbName, inFile);
mas01cr@0 51
mas01cr@0 52 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 53 batchinsert(dbName, inFile);
mas01cr@0 54
mas01cr@0 55 else if(O2_ACTION(COM_QUERY))
mas01mc@307 56 if(isClient){
mas01mc@307 57 if(query_from_key)
mas01mc@307 58 ws_query_by_key(dbName, key, (char*)hostport);
mas01mc@307 59 else
mas01mc@307 60 ws_query(dbName, inFile, (char*)hostport);
mas01mc@307 61 }
mas01cr@0 62 else
mas01cr@76 63 query(dbName, inFile);
mas01cr@0 64
mas01cr@0 65 else if(O2_ACTION(COM_STATUS))
mas01cr@0 66 if(isClient)
mas01cr@0 67 ws_status(dbName,(char*)hostport);
mas01cr@0 68 else
mas01cr@0 69 status(dbName);
mas01cr@280 70
mas01cr@280 71 else if(O2_ACTION(COM_SAMPLE))
mas01cr@280 72 sample(dbName);
mas01cr@0 73
mas01cr@0 74 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 75 l2norm(dbName);
mas01cr@0 76
mas01cr@193 77 else if(O2_ACTION(COM_POWER))
mas01cr@193 78 power_flag(dbName);
mas01cr@193 79
mas01cr@0 80 else if(O2_ACTION(COM_DUMP))
mas01cr@0 81 dump(dbName);
mas01mc@292 82
mas01mc@292 83 else if(O2_ACTION(COM_INDEX))
mas01mc@292 84 index_index_db(dbName);
mas01cr@0 85
mas01cr@0 86 else
mas01cr@0 87 error("Unrecognized command",command);
mas01cr@0 88 }
mas01cr@0 89
mas01cr@133 90 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 91 {
mas01cr@97 92 try {
mas01cr@151 93 isServer = 1; // FIXME: Hack
mas01cr@97 94 processArgs(argc, argv);
mas01mc@322 95 // Perform database prefix substitution
mas01mc@322 96 if(adb_root)
mas01mc@322 97 prefix_name((char** const)&dbName, adb_root);
mas01cr@97 98 assert(O2_ACTION(COM_QUERY));
mas01cr@133 99 query(dbName, inFile, adbQueryResponse);
mas01cr@97 100 } catch(char *err) {
mas01cr@97 101 cleanup();
mas01cr@97 102 throw(err);
mas01cr@97 103 }
mas01cr@76 104 }
mas01cr@76 105
mas01cr@133 106 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 107 {
mas01cr@97 108 try {
mas01cr@151 109 isServer = 1; // FIXME: Hack
mas01cr@97 110 processArgs(argc, argv);
mas01mc@322 111 // Perform database prefix substitution
mas01mc@322 112 if(adb_root)
mas01mc@322 113 prefix_name((char** const)&dbName, adb_root);
mas01cr@97 114 assert(O2_ACTION(COM_STATUS));
mas01cr@133 115 status(dbName, adbStatusResponse);
mas01cr@97 116 } catch(char *err) {
mas01cr@97 117 cleanup();
mas01cr@97 118 throw(err);
mas01cr@97 119 }
mas01cr@76 120 }
mas01cr@76 121
mas01cr@97 122 void audioDB::cleanup() {
mas01cr@122 123 cmdline_parser_free(&args_info);
mas01cr@0 124 if(indata)
mas01cr@0 125 munmap(indata,statbuf.st_size);
mas01cr@0 126 if(db)
mas01cr@196 127 munmap(db,getpagesize());
mas01cr@196 128 if(fileTable)
mas01cr@196 129 munmap(fileTable, fileTableLength);
mas01cr@196 130 if(trackTable)
mas01cr@196 131 munmap(trackTable, trackTableLength);
mas01cr@196 132 if(dataBuf)
mas01cr@196 133 munmap(dataBuf, dataBufLength);
mas01cr@196 134 if(timesTable)
mas01cr@196 135 munmap(timesTable, timesTableLength);
mas01mc@314 136 if(powerTable)
mas01mc@314 137 munmap(powerTable, powerTableLength);
mas01cr@196 138 if(l2normTable)
mas01cr@196 139 munmap(l2normTable, l2normTableLength);
mas01mc@318 140 if(featureFileNameTable)
mas01mc@318 141 munmap(featureFileNameTable, fileTableLength);
mas01mc@318 142 if(timesFileNameTable)
mas01mc@318 143 munmap(timesFileNameTable, fileTableLength);
mas01mc@318 144 if(powerFileNameTable)
mas01mc@318 145 munmap(powerFileNameTable, fileTableLength);
mas01mc@292 146 if(trackOffsetTable)
mas01mc@292 147 delete trackOffsetTable;
mas01mc@292 148 if(reporter)
mas01mc@292 149 delete reporter;
mas01mc@292 150 if(exact_evaluation_queue)
mas01mc@292 151 delete exact_evaluation_queue;
mas01cr@284 152 if(rng)
mas01cr@284 153 gsl_rng_free(rng);
mas01mc@292 154 if(vv)
mas01mc@292 155 delete vv;
mas01cr@0 156 if(dbfid>0)
mas01cr@0 157 close(dbfid);
mas01cr@0 158 if(infid>0)
mas01cr@0 159 close(infid);
mas01cr@0 160 if(dbH)
mas01cr@0 161 delete dbH;
mas01mc@308 162 if(lsh!=SERVER_LSH_INDEX_SINGLETON)
mas01mc@308 163 delete lsh;
mas01cr@0 164 }
mas01cr@0 165
mas01cr@97 166 audioDB::~audioDB(){
mas01cr@97 167 cleanup();
mas01cr@97 168 }
mas01cr@97 169
mas01cr@0 170 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 171
mas01cr@0 172 if(argc<2){
mas01cr@0 173 cmdline_parser_print_version ();
mas01cr@0 174 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 175 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 176 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 177 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 178 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 179 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 180 exit(0);
mas01cr@0 181 }
mas01cr@0 182
mas01cr@0 183 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@151 184 error("Error parsing command line");
mas01cr@0 185
mas01cr@0 186 if(args_info.help_given){
mas01cr@0 187 cmdline_parser_print_help();
mas01cr@0 188 exit(0);
mas01cr@0 189 }
mas01cr@0 190
mas01cr@0 191 if(args_info.verbosity_given){
mas01cr@239 192 verbosity = args_info.verbosity_arg;
mas01cr@239 193 if(verbosity < 0 || verbosity > 10){
mas01cr@239 194 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
mas01cr@239 195 verbosity = 1;
mas01cr@0 196 }
mas01cr@0 197 }
mas01cr@0 198
mas01cr@129 199 if(args_info.size_given) {
mas01cr@256 200 if(args_info.datasize_given) {
mas01cr@256 201 error("both --size and --datasize given", "");
mas01cr@256 202 }
mas01cr@256 203 if(args_info.ntracks_given) {
mas01cr@256 204 error("both --size and --ntracks given", "");
mas01cr@256 205 }
mas01cr@256 206 if(args_info.datadim_given) {
mas01cr@256 207 error("both --size and --datadim given", "");
mas01cr@256 208 }
mas01cr@196 209 if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
mas01cr@129 210 error("Size out of range", "");
mas01cr@129 211 }
mas01cr@256 212 double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
mas01cr@256 213 /* FIXME: what's the safe way of doing this? */
mas01cr@256 214 datasize = (unsigned int) ceil(datasize * ratio);
mas01cr@256 215 ntracks = (unsigned int) ceil(ntracks * ratio);
mas01cr@256 216 } else {
mas01cr@256 217 if(args_info.datasize_given) {
mas01cr@256 218 datasize = args_info.datasize_arg;
mas01cr@256 219 }
mas01cr@256 220 if(args_info.ntracks_given) {
mas01cr@256 221 ntracks = args_info.ntracks_arg;
mas01cr@256 222 }
mas01cr@256 223 if(args_info.datadim_given) {
mas01cr@256 224 datadim = args_info.datadim_arg;
mas01cr@256 225 }
mas01cr@129 226 }
mas01cr@129 227
mas01cr@239 228 if(args_info.radius_given) {
mas01cr@239 229 radius = args_info.radius_arg;
mas01mc@307 230 if(radius < 0 || radius > 1000000000) {
mas01cr@77 231 error("radius out of range");
mas01cr@239 232 } else {
mas01cr@239 233 VERB_LOG(3, "Setting radius to %f\n", radius);
mas01mc@17 234 }
mas01mc@17 235 }
mas01mc@17 236
mas01mc@292 237 sequenceLength = args_info.sequencelength_arg;
mas01mc@292 238 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01mc@292 239 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01mc@292 240 }
mas01mc@292 241 sequenceHop = args_info.sequencehop_arg;
mas01mc@292 242 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01mc@292 243 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01mc@292 244 }
mas01mc@292 245
mas01mc@292 246 if (args_info.absolute_threshold_given) {
mas01mc@292 247 if (args_info.absolute_threshold_arg >= 0) {
mas01mc@292 248 error("absolute threshold out of range: should be negative");
mas01mc@292 249 }
mas01mc@292 250 use_absolute_threshold = true;
mas01mc@292 251 absolute_threshold = args_info.absolute_threshold_arg;
mas01mc@292 252 }
mas01mc@292 253 if (args_info.relative_threshold_given) {
mas01mc@292 254 use_relative_threshold = true;
mas01mc@292 255 relative_threshold = args_info.relative_threshold_arg;
mas01mc@292 256 }
mas01mc@292 257
mas01mc@321 258 if (args_info.adb_root_given){
mas01mc@321 259 adb_root = args_info.adb_root_arg;
mas01mc@321 260 }
mas01mc@321 261
mas01mc@321 262 if (args_info.adb_feature_root_given){
mas01mc@321 263 adb_feature_root = args_info.adb_feature_root_arg;
mas01mc@321 264 }
mas01mc@322 265
mas01mc@322 266 // perform dbName path prefix SERVER-side subsitution
mas01mc@322 267 if(SERVER_ADB_ROOT && !adb_root)
mas01mc@322 268 adb_root = SERVER_ADB_ROOT;
mas01mc@322 269 if(SERVER_ADB_FEATURE_ROOT && !adb_feature_root)
mas01mc@322 270 adb_feature_root = SERVER_ADB_FEATURE_ROOT;
mas01mc@322 271
mas01cr@0 272 if(args_info.SERVER_given){
mas01cr@0 273 command=COM_SERVER;
mas01cr@0 274 port=args_info.SERVER_arg;
mas01cr@0 275 if(port<100 || port > 100000)
mas01cr@0 276 error("port out of range");
mas01cr@151 277 isServer = 1;
mas01cr@105 278 #if defined(O2_DEBUG)
mas01cr@104 279 struct sigaction sa;
mas01cr@104 280 sa.sa_sigaction = sigterm_action;
mas01cr@104 281 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 282 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 283 sa.sa_sigaction = sighup_action;
mas01cr@104 284 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 285 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 286 #endif
mas01mc@308 287 if(args_info.load_index_given){
mas01mc@308 288 if(!args_info.database_given)
mas01mc@308 289 error("load_index requires a --database argument");
mas01mc@308 290 else
mas01mc@308 291 dbName=args_info.database_arg;
mas01mc@308 292 if(!args_info.radius_given)
mas01mc@308 293 error("load_index requires a --radius argument");
mas01mc@308 294 if(!args_info.sequencelength_given)
mas01mc@308 295 error("load_index requires a --sequenceLength argument");
mas01mc@308 296 WS_load_index = true;
mas01mc@308 297 }
mas01cr@0 298 return 0;
mas01cr@0 299 }
mas01cr@0 300
mas01cr@0 301 // No return on client command, find database command
mas01cr@105 302 if(args_info.client_given){
mas01cr@105 303 command=COM_CLIENT;
mas01cr@105 304 hostport=args_info.client_arg;
mas01cr@105 305 isClient=1;
mas01cr@105 306 }
mas01cr@0 307
mas01cr@105 308 if(args_info.NEW_given){
mas01cr@105 309 command=COM_CREATE;
mas01cr@105 310 dbName=args_info.database_arg;
mas01cr@105 311 return 0;
mas01cr@105 312 }
mas01cr@0 313
mas01cr@105 314 if(args_info.STATUS_given){
mas01cr@105 315 command=COM_STATUS;
mas01cr@105 316 dbName=args_info.database_arg;
mas01cr@105 317 return 0;
mas01cr@105 318 }
mas01cr@0 319
mas01cr@280 320 if(args_info.SAMPLE_given) {
mas01cr@280 321 command = COM_SAMPLE;
mas01cr@280 322 dbName = args_info.database_arg;
mas01cr@280 323 sequenceLength = args_info.sequencelength_arg;
mas01cr@280 324 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@280 325 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@280 326 }
mas01cr@280 327 nsamples = args_info.nsamples_arg;
mas01cr@280 328 return 0;
mas01cr@280 329 }
mas01cr@280 330
mas01cr@105 331 if(args_info.DUMP_given){
mas01cr@105 332 command=COM_DUMP;
mas01cr@105 333 dbName=args_info.database_arg;
mas01cr@131 334 output = args_info.output_arg;
mas01cr@105 335 return 0;
mas01cr@105 336 }
mas01cr@0 337
mas01cr@105 338 if(args_info.L2NORM_given){
mas01cr@105 339 command=COM_L2NORM;
mas01cr@105 340 dbName=args_info.database_arg;
mas01cr@105 341 return 0;
mas01cr@105 342 }
mas01cr@0 343
mas01cr@193 344 if(args_info.POWER_given){
mas01cr@193 345 command=COM_POWER;
mas01cr@193 346 dbName=args_info.database_arg;
mas01cr@193 347 return 0;
mas01cr@193 348 }
mas01cr@193 349
mas01cr@105 350 if(args_info.INSERT_given){
mas01cr@105 351 command=COM_INSERT;
mas01cr@105 352 dbName=args_info.database_arg;
mas01cr@105 353 inFile=args_info.features_arg;
mas01cr@105 354 if(args_info.key_given)
mas01mc@292 355 if(!args_info.features_given)
mas01mc@292 356 error("INSERT: '-k key' argument depends on '-f features'");
mas01mc@292 357 else
mas01mc@292 358 key=args_info.key_arg;
mas01cr@105 359 if(args_info.times_given){
mas01cr@105 360 timesFileName=args_info.times_arg;
mas01cr@105 361 if(strlen(timesFileName)>0){
mas01cr@239 362 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 363 error("Could not open times file for reading", timesFileName);
mas01cr@105 364 usingTimes=1;
mas01cr@105 365 }
mas01cr@105 366 }
mas01cr@193 367 if (args_info.power_given) {
mas01cr@193 368 powerFileName = args_info.power_arg;
mas01cr@193 369 if (strlen(powerFileName) > 0) {
mas01cr@193 370 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 371 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 372 }
mas01cr@193 373 usingPower = 1;
mas01cr@193 374 }
mas01cr@193 375 }
mas01cr@105 376 return 0;
mas01cr@105 377 }
mas01cr@105 378
mas01cr@105 379 if(args_info.BATCHINSERT_given){
mas01cr@105 380 command=COM_BATCHINSERT;
mas01cr@105 381 dbName=args_info.database_arg;
mas01cr@105 382 inFile=args_info.featureList_arg;
mas01cr@105 383 if(args_info.keyList_given)
mas01tc@298 384 if(!args_info.featureList_given)
mas01tc@300 385 error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'");
mas01mc@292 386 else
mas01cr@304 387 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 388
mas01cr@105 389 /* TO DO: REPLACE WITH
mas01cr@0 390 if(args_info.keyList_given){
mas01mc@18 391 trackFileName=args_info.keyList_arg;
mas01cr@239 392 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01mc@18 393 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 394 }
mas01cr@0 395 AND UPDATE BATCHINSERT()
mas01cr@105 396 */
mas01cr@105 397
mas01cr@105 398 if(args_info.timesList_given){
mas01cr@105 399 timesFileName=args_info.timesList_arg;
mas01cr@105 400 if(strlen(timesFileName)>0){
mas01cr@239 401 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 402 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 403 usingTimes=1;
mas01cr@105 404 }
mas01cr@105 405 }
mas01cr@193 406 if(args_info.powerList_given){
mas01cr@193 407 powerFileName=args_info.powerList_arg;
mas01cr@193 408 if(strlen(powerFileName)>0){
mas01cr@239 409 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
mas01cr@193 410 error("Could not open powerList file for reading", powerFileName);
mas01cr@193 411 usingPower=1;
mas01cr@193 412 }
mas01cr@193 413 }
mas01cr@105 414 return 0;
mas01cr@105 415 }
mas01mc@292 416
mas01mc@292 417 // Set no_unit_norm flag
mas01mc@292 418 no_unit_norming = args_info.no_unit_norming_flag;
mas01mc@292 419 lsh_use_u_functions = args_info.lsh_use_u_functions_flag;
mas01mc@292 420
mas01mc@292 421 // LSH Index Command
mas01mc@292 422 if(args_info.INDEX_given){
mas01mc@292 423 if(radius <= 0 )
mas01mc@292 424 error("INDEXing requires a Radius argument");
mas01mc@292 425 if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN))
mas01mc@292 426 error("INDEXing requires 1 <= sequenceLength <= 1000");
mas01mc@292 427 command=COM_INDEX;
mas01mc@292 428 dbName=args_info.database_arg;
mas01mc@292 429
mas01mc@292 430 // Whether to store LSH hash tables for query in core (FORMAT2)
mas01mc@297 431 lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested
mas01mc@292 432
mas01mc@292 433 lsh_param_w = args_info.lsh_w_arg;
mas01mc@292 434 if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH))
mas01mc@292 435 error("Indexing parameter w out of range (0.0 < w <= 100.0)");
mas01mc@292 436
mas01mc@292 437 lsh_param_k = args_info.lsh_k_arg;
mas01mc@292 438 if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS))
mas01mc@292 439 error("Indexing parameter k out of range (1 <= k <= 100)");
mas01mc@292 440
mas01mc@292 441 lsh_param_m = args_info.lsh_m_arg;
mas01mc@292 442 if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0))
mas01mc@292 443 error("Indexing parameter m out of range (1 <= m <= 20)");
mas01mc@292 444
mas01mc@292 445 lsh_param_N = args_info.lsh_N_arg;
mas01mc@292 446 if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS))
mas01mc@292 447 error("Indexing parameter N out of range (1 <= N <= 1000000)");
mas01mc@292 448
mas01mc@292 449 lsh_param_b = args_info.lsh_b_arg;
mas01mc@292 450 if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH))
mas01mc@292 451 error("Indexing parameter b out of range (1 <= b <= 10000)");
mas01mc@292 452
mas01mc@296 453 lsh_param_ncols = args_info.lsh_ncols_arg;
mas01mc@296 454 if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing
mas01mc@296 455 lsh_param_ncols = O2_SERIAL_MAX_COLS;
mas01mc@292 456 if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS))
mas01mc@292 457 error("Indexing parameter ncols out of range (1 <= ncols <= 1000");
mas01mc@292 458
mas01mc@292 459 return 0;
mas01mc@292 460 }
mas01mc@292 461
mas01cr@105 462 // Query command and arguments
mas01cr@105 463 if(args_info.QUERY_given){
mas01cr@105 464 command=COM_QUERY;
mas01cr@105 465 dbName=args_info.database_arg;
mas01mc@292 466 // XOR features and key search
mas01mc@292 467 if(!args_info.features_given && !args_info.key_given || (args_info.features_given && args_info.key_given))
mas01mc@292 468 error("QUERY requires exactly one of either -f features or -k key");
mas01mc@292 469 if(args_info.features_given)
mas01mc@292 470 inFile=args_info.features_arg; // query from file
mas01mc@292 471 else{
mas01mc@292 472 query_from_key = true;
mas01mc@292 473 key=args_info.key_arg; // query from key
mas01mc@292 474 }
mas01mc@292 475
mas01cr@105 476 if(args_info.keyList_given){
mas01cr@105 477 trackFileName=args_info.keyList_arg;
mas01cr@239 478 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01cr@105 479 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 480 }
mas01cr@105 481
mas01cr@105 482 if(args_info.times_given){
mas01cr@105 483 timesFileName=args_info.times_arg;
mas01cr@105 484 if(strlen(timesFileName)>0){
mas01cr@239 485 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 486 error("Could not open times file for reading", timesFileName);
mas01cr@105 487 usingTimes=1;
mas01cr@105 488 }
mas01cr@105 489 }
mas01cr@193 490
mas01cr@193 491 if(args_info.power_given){
mas01cr@193 492 powerFileName=args_info.power_arg;
mas01cr@193 493 if(strlen(powerFileName)>0){
mas01cr@193 494 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 495 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 496 }
mas01cr@193 497 usingPower = 1;
mas01cr@193 498 }
mas01cr@193 499 }
mas01cr@105 500
mas01cr@105 501 // query type
mas01cr@105 502 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 503 queryType=O2_TRACK_QUERY;
mas01cr@105 504 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 505 queryType=O2_POINT_QUERY;
mas01cr@105 506 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 507 queryType=O2_SEQUENCE_QUERY;
mas01mc@248 508 else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
mas01mc@248 509 queryType=O2_N_SEQUENCE_QUERY;
mas01mc@263 510 else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
mas01mc@263 511 queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
mas01cr@105 512 else
mas01cr@105 513 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 514
mas01cr@105 515 if(!args_info.exhaustive_flag){
mas01cr@105 516 queryPoint = args_info.qpoint_arg;
mas01cr@105 517 usingQueryPoint=1;
mas01cr@105 518 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 519 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 520 }
mas01mc@292 521
mas01mc@296 522 // Whether to pre-load LSH hash tables for query (default on, if flag set then off)
mas01mc@297 523 lsh_in_core = !args_info.lsh_on_disk_flag;
mas01mc@292 524
mas01mc@292 525 // Whether to perform exact evaluation of points returned by LSH
mas01mc@292 526 lsh_exact = args_info.lsh_exact_flag;
mas01mc@292 527
mas01cr@105 528 pointNN = args_info.pointnn_arg;
mas01mc@263 529 if(pointNN < 1 || pointNN > O2_MAXNN) {
mas01mc@263 530 error("pointNN out of range: 1 <= pointNN <= 1000000");
mas01cr@105 531 }
mas01cr@105 532 trackNN = args_info.resultlength_arg;
mas01mc@263 533 if(trackNN < 1 || trackNN > O2_MAXNN) {
mas01mc@263 534 error("resultlength out of range: 1 <= resultlength <= 1000000");
mas01cr@105 535 }
mas01cr@105 536 return 0;
mas01cr@105 537 }
mas01cr@105 538 return -1; // no command found
mas01cr@0 539 }
mas01cr@0 540
mas01cr@133 541 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 542 if(!dbH)
mas01cr@196 543 initTables(dbName, 0);
mas01cr@0 544
mas01cr@0 545 unsigned dudCount=0;
mas01cr@0 546 unsigned nullCount=0;
mas01cr@0 547 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 548 if(trackTable[k]<sequenceLength){
mas01cr@0 549 dudCount++;
mas01mc@18 550 if(!trackTable[k])
mas01cr@76 551 nullCount++;
mas01cr@0 552 }
mas01cr@0 553 }
mas01cr@76 554
mas01cr@133 555 if(adbStatusResponse == 0) {
mas01cr@76 556
mas01cr@76 557 // Update Header information
mas01cr@239 558 std::cout << "num files:" << dbH->numFiles << std::endl;
mas01cr@239 559 std::cout << "data dim:" << dbH->dim <<std::endl;
mas01cr@76 560 if(dbH->dim>0){
mas01cr@239 561 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
mas01mc@317 562 if(dbH->flags & O2_FLAG_LARGE_ADB)
mas01mc@317 563 std::cout << "vectors available:" << O2_MAX_VECTORS - (dbH->length / (sizeof(double)*dbH->dim)) << std::endl;
mas01mc@317 564 else
mas01mc@317 565 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
mas01cr@76 566 }
mas01mc@317 567 if( ! (dbH->flags & O2_FLAG_LARGE_ADB) ){
mas01mc@317 568 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@317 569 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01mc@317 570 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@317 571 }
mas01mc@301 572 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
mas01mc@301 573 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
mas01mc@301 574 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
mas01mc@317 575 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES)
mas01mc@317 576 << "] largeADB[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_LARGE_ADB)
mas01mc@318 577 << "]" << endl;
mas01mc@317 578
mas01cr@239 579 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
mas01cr@76 580 } else {
mas01cr@133 581 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 582 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 583 adbStatusResponse->result.length = dbH->length;
mas01cr@133 584 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 585 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 586 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 587 }
mas01cr@0 588 }
mas01cr@0 589
mas01cr@196 590 void audioDB::l2norm(const char* dbName) {
mas01cr@196 591 forWrite = true;
mas01cr@196 592 initTables(dbName, 0);
mas01mc@318 593 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@196 594 /* FIXME: should probably be uint64_t */
mas01cr@0 595 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@196 596 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@0 597 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 598 }
mas01cr@0 599 // Update database flags
mas01cr@0 600 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 601 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 602 }
mas01cr@193 603
mas01cr@193 604 void audioDB::power_flag(const char *dbName) {
mas01cr@196 605 forWrite = true;
mas01mc@318 606 initTables(dbName, 0);
mas01mc@318 607 if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){
mas01cr@193 608 error("cannot turn on power storage for non-empty database", dbName);
mas01cr@193 609 }
mas01cr@193 610 dbH->flags |= O2_FLAG_POWER;
mas01cr@193 611 memcpy(db, dbH, O2_HEADERSIZE);
mas01cr@193 612 }
mas01cr@193 613
mas01cr@239 614 // Unit norm block of features
mas01cr@0 615
mas01cr@239 616 /* FIXME: in fact this does not unit norm a block of features, it just
mas01cr@239 617 records the L2 norms somewhere. unitNorm() does in fact unit norm
mas01cr@239 618 a block of features. */
mas01cr@0 619 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 620 unsigned d;
mas01cr@59 621 double *p;
mas01cr@0 622 unsigned nn = n;
mas01cr@0 623
mas01cr@0 624 assert(l2normTable);
mas01cr@0 625
mas01mc@319 626 if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 627 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 628
mas01cr@239 629 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@0 630
mas01cr@0 631 double* l2buf = new double[n];
mas01cr@0 632 double* l2ptr = l2buf;
mas01cr@0 633 assert(l2buf);
mas01cr@0 634 assert(X);
mas01cr@0 635
mas01cr@0 636 while(nn--){
mas01cr@0 637 p=X;
mas01cr@0 638 *l2ptr=0.0;
mas01cr@0 639 d=dim;
mas01cr@0 640 while(d--){
mas01cr@0 641 *l2ptr+=*p**p;
mas01cr@0 642 p++;
mas01cr@0 643 }
mas01mc@17 644 l2ptr++;
mas01mc@17 645 X+=dim;
mas01cr@0 646 }
mas01cr@0 647 unsigned offset;
mas01cr@84 648 if(append) {
mas01cr@84 649 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 650 // been inserted, and dbH->length has already been updated. We
mas01cr@84 651 // need to subtract off again the number of vectors that we've
mas01cr@84 652 // inserted this time...
mas01cr@84 653 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 654 } else {
mas01cr@0 655 offset=0;
mas01cr@84 656 }
mas01cr@0 657 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 658 if(l2buf)
mas01mc@17 659 delete[] l2buf;
mas01cr@239 660 VERB_LOG(2, " done.");
mas01cr@193 661 }
mas01cr@193 662
mas01mc@308 663 // This entry point is visited once per instance
mas01mc@308 664 // so it is a good place to set any global state variables
mas01cr@0 665 int main(const unsigned argc, char* const argv[]){
mas01mc@308 666 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
mas01mc@322 667 SERVER_ADB_ROOT = 0; // Server-side database root prefix
mas01mc@322 668 SERVER_ADB_FEATURE_ROOT = 0; // Server-side features root prefix
mas01cr@0 669 audioDB(argc, argv);
mas01cr@0 670 }