annotate audioDB.cpp @ 308:896679d8cc39

Added server-side loading of persistent index (LSH hashtables) via --load_index -d dbName -R radius -l sequenceLength. Queries using these parameters will lookup the memory-resident hashtable instead of loading one from disk.
author mas01mc
date Thu, 07 Aug 2008 01:53:38 +0000
parents d1b8b2dec37e
children cac5b3465318
rev   line source
mas01cr@0 1 #include "audioDB.h"
mas01cr@0 2
mas01mc@308 3 LSH* SERVER_LSH_INDEX_SINGLETON;
mas01mc@308 4
mas01mc@292 5 PointPair::PointPair(Uns32T a, Uns32T b, Uns32T c):trackID(a),qpos(b),spos(c){};
mas01mc@292 6
mas01mc@292 7 bool operator<(const PointPair& a, const PointPair& b){
mas01mc@292 8 return ( (a.qpos<b.qpos) ||
mas01mc@292 9 ((a.qpos==b.qpos) &&
mas01mc@292 10 ( (a.trackID<b.trackID)) || ((a.trackID==b.trackID)&&(a.spos<b.spos)) ) );
mas01mc@292 11 }
mas01mc@292 12
mas01mc@292 13 bool operator>(const PointPair& a, const PointPair& b){
mas01mc@292 14 return ( (a.qpos>b.qpos) ||
mas01mc@292 15 ((a.qpos==b.qpos) &&
mas01mc@292 16 ( (a.trackID>b.trackID)) || ((a.trackID==b.trackID)&&(a.spos>b.spos)) ) );
mas01mc@292 17 }
mas01mc@292 18
mas01mc@292 19 bool operator==(const PointPair& a, const PointPair& b){
mas01mc@292 20 return ( (a.trackID==b.trackID) && (a.qpos==b.qpos) && (a.spos==b.spos) );
mas01mc@292 21 }
mas01mc@292 22
mas01cr@76 23 audioDB::audioDB(const unsigned argc, char* const argv[]): O2_AUDIODB_INITIALIZERS
mas01cr@76 24 {
mas01cr@0 25 if(processArgs(argc, argv)<0){
mas01cr@0 26 printf("No command found.\n");
mas01cr@0 27 cmdline_parser_print_version ();
mas01cr@0 28 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 29 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 30 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 31 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 32 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 33 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@151 34 error("No command found");
mas01cr@0 35 }
mas01cr@77 36
mas01cr@0 37 if(O2_ACTION(COM_SERVER))
mas01cr@0 38 startServer();
mas01cr@0 39
mas01cr@0 40 else if(O2_ACTION(COM_CREATE))
mas01cr@0 41 create(dbName);
mas01cr@0 42
mas01cr@0 43 else if(O2_ACTION(COM_INSERT))
mas01cr@0 44 insert(dbName, inFile);
mas01cr@0 45
mas01cr@0 46 else if(O2_ACTION(COM_BATCHINSERT))
mas01cr@0 47 batchinsert(dbName, inFile);
mas01cr@0 48
mas01cr@0 49 else if(O2_ACTION(COM_QUERY))
mas01mc@307 50 if(isClient){
mas01mc@307 51 if(query_from_key)
mas01mc@307 52 ws_query_by_key(dbName, key, (char*)hostport);
mas01mc@307 53 else
mas01mc@307 54 ws_query(dbName, inFile, (char*)hostport);
mas01mc@307 55 }
mas01cr@0 56 else
mas01cr@76 57 query(dbName, inFile);
mas01cr@0 58
mas01cr@0 59 else if(O2_ACTION(COM_STATUS))
mas01cr@0 60 if(isClient)
mas01cr@0 61 ws_status(dbName,(char*)hostport);
mas01cr@0 62 else
mas01cr@0 63 status(dbName);
mas01cr@280 64
mas01cr@280 65 else if(O2_ACTION(COM_SAMPLE))
mas01cr@280 66 sample(dbName);
mas01cr@0 67
mas01cr@0 68 else if(O2_ACTION(COM_L2NORM))
mas01cr@0 69 l2norm(dbName);
mas01cr@0 70
mas01cr@193 71 else if(O2_ACTION(COM_POWER))
mas01cr@193 72 power_flag(dbName);
mas01cr@193 73
mas01cr@0 74 else if(O2_ACTION(COM_DUMP))
mas01cr@0 75 dump(dbName);
mas01mc@292 76
mas01mc@292 77 else if(O2_ACTION(COM_INDEX))
mas01mc@292 78 index_index_db(dbName);
mas01cr@0 79
mas01cr@0 80 else
mas01cr@0 81 error("Unrecognized command",command);
mas01cr@0 82 }
mas01cr@0 83
mas01cr@133 84 audioDB::audioDB(const unsigned argc, char* const argv[], adb__queryResponse *adbQueryResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 85 {
mas01cr@97 86 try {
mas01cr@151 87 isServer = 1; // FIXME: Hack
mas01cr@97 88 processArgs(argc, argv);
mas01cr@97 89 assert(O2_ACTION(COM_QUERY));
mas01cr@133 90 query(dbName, inFile, adbQueryResponse);
mas01cr@97 91 } catch(char *err) {
mas01cr@97 92 cleanup();
mas01cr@97 93 throw(err);
mas01cr@97 94 }
mas01cr@76 95 }
mas01cr@76 96
mas01cr@133 97 audioDB::audioDB(const unsigned argc, char* const argv[], adb__statusResponse *adbStatusResponse): O2_AUDIODB_INITIALIZERS
mas01cr@76 98 {
mas01cr@97 99 try {
mas01cr@151 100 isServer = 1; // FIXME: Hack
mas01cr@97 101 processArgs(argc, argv);
mas01cr@97 102 assert(O2_ACTION(COM_STATUS));
mas01cr@133 103 status(dbName, adbStatusResponse);
mas01cr@97 104 } catch(char *err) {
mas01cr@97 105 cleanup();
mas01cr@97 106 throw(err);
mas01cr@97 107 }
mas01cr@76 108 }
mas01cr@76 109
mas01cr@97 110 void audioDB::cleanup() {
mas01cr@122 111 cmdline_parser_free(&args_info);
mas01cr@0 112 if(indata)
mas01cr@0 113 munmap(indata,statbuf.st_size);
mas01cr@0 114 if(db)
mas01cr@196 115 munmap(db,getpagesize());
mas01cr@196 116 if(fileTable)
mas01cr@196 117 munmap(fileTable, fileTableLength);
mas01cr@196 118 if(trackTable)
mas01cr@196 119 munmap(trackTable, trackTableLength);
mas01cr@196 120 if(dataBuf)
mas01cr@196 121 munmap(dataBuf, dataBufLength);
mas01cr@196 122 if(timesTable)
mas01cr@196 123 munmap(timesTable, timesTableLength);
mas01cr@196 124 if(l2normTable)
mas01cr@196 125 munmap(l2normTable, l2normTableLength);
mas01mc@292 126 if(trackOffsetTable)
mas01mc@292 127 delete trackOffsetTable;
mas01mc@292 128 if(reporter)
mas01mc@292 129 delete reporter;
mas01mc@292 130 if(exact_evaluation_queue)
mas01mc@292 131 delete exact_evaluation_queue;
mas01cr@284 132 if(rng)
mas01cr@284 133 gsl_rng_free(rng);
mas01mc@292 134 if(vv)
mas01mc@292 135 delete vv;
mas01cr@0 136 if(dbfid>0)
mas01cr@0 137 close(dbfid);
mas01cr@0 138 if(infid>0)
mas01cr@0 139 close(infid);
mas01cr@0 140 if(dbH)
mas01cr@0 141 delete dbH;
mas01mc@308 142 if(lsh!=SERVER_LSH_INDEX_SINGLETON)
mas01mc@308 143 delete lsh;
mas01cr@0 144 }
mas01cr@0 145
mas01cr@97 146 audioDB::~audioDB(){
mas01cr@97 147 cleanup();
mas01cr@97 148 }
mas01cr@97 149
mas01cr@0 150 int audioDB::processArgs(const unsigned argc, char* const argv[]){
mas01cr@0 151
mas01cr@0 152 if(argc<2){
mas01cr@0 153 cmdline_parser_print_version ();
mas01cr@0 154 if (strlen(gengetopt_args_info_purpose) > 0)
mas01cr@0 155 printf("%s\n", gengetopt_args_info_purpose);
mas01cr@0 156 printf("%s\n", gengetopt_args_info_usage);
mas01cr@0 157 printf("%s\n", gengetopt_args_info_help[1]);
mas01cr@0 158 printf("%s\n", gengetopt_args_info_help[2]);
mas01cr@0 159 printf("%s\n", gengetopt_args_info_help[0]);
mas01cr@0 160 exit(0);
mas01cr@0 161 }
mas01cr@0 162
mas01cr@0 163 if (cmdline_parser (argc, argv, &args_info) != 0)
mas01cr@151 164 error("Error parsing command line");
mas01cr@0 165
mas01cr@0 166 if(args_info.help_given){
mas01cr@0 167 cmdline_parser_print_help();
mas01cr@0 168 exit(0);
mas01cr@0 169 }
mas01cr@0 170
mas01cr@0 171 if(args_info.verbosity_given){
mas01cr@239 172 verbosity = args_info.verbosity_arg;
mas01cr@239 173 if(verbosity < 0 || verbosity > 10){
mas01cr@239 174 std::cerr << "Warning: verbosity out of range, setting to 1" << std::endl;
mas01cr@239 175 verbosity = 1;
mas01cr@0 176 }
mas01cr@0 177 }
mas01cr@0 178
mas01cr@129 179 if(args_info.size_given) {
mas01cr@256 180 if(args_info.datasize_given) {
mas01cr@256 181 error("both --size and --datasize given", "");
mas01cr@256 182 }
mas01cr@256 183 if(args_info.ntracks_given) {
mas01cr@256 184 error("both --size and --ntracks given", "");
mas01cr@256 185 }
mas01cr@256 186 if(args_info.datadim_given) {
mas01cr@256 187 error("both --size and --datadim given", "");
mas01cr@256 188 }
mas01cr@196 189 if (args_info.size_arg < 50 || args_info.size_arg > 32000) {
mas01cr@129 190 error("Size out of range", "");
mas01cr@129 191 }
mas01cr@256 192 double ratio = (double) args_info.size_arg * 1000000 / ((double) O2_DEFAULTDBSIZE);
mas01cr@256 193 /* FIXME: what's the safe way of doing this? */
mas01cr@256 194 datasize = (unsigned int) ceil(datasize * ratio);
mas01cr@256 195 ntracks = (unsigned int) ceil(ntracks * ratio);
mas01cr@256 196 } else {
mas01cr@256 197 if(args_info.datasize_given) {
mas01cr@256 198 datasize = args_info.datasize_arg;
mas01cr@256 199 }
mas01cr@256 200 if(args_info.ntracks_given) {
mas01cr@256 201 ntracks = args_info.ntracks_arg;
mas01cr@256 202 }
mas01cr@256 203 if(args_info.datadim_given) {
mas01cr@256 204 datadim = args_info.datadim_arg;
mas01cr@256 205 }
mas01cr@129 206 }
mas01cr@129 207
mas01cr@239 208 if(args_info.radius_given) {
mas01cr@239 209 radius = args_info.radius_arg;
mas01mc@307 210 if(radius < 0 || radius > 1000000000) {
mas01cr@77 211 error("radius out of range");
mas01cr@239 212 } else {
mas01cr@239 213 VERB_LOG(3, "Setting radius to %f\n", radius);
mas01mc@17 214 }
mas01mc@17 215 }
mas01mc@17 216
mas01mc@292 217 sequenceLength = args_info.sequencelength_arg;
mas01mc@292 218 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01mc@292 219 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01mc@292 220 }
mas01mc@292 221 sequenceHop = args_info.sequencehop_arg;
mas01mc@292 222 if(sequenceHop < 1 || sequenceHop > 1000) {
mas01mc@292 223 error("seqhop out of range: 1 <= seqhop <= 1000");
mas01mc@292 224 }
mas01mc@292 225
mas01mc@292 226 if (args_info.absolute_threshold_given) {
mas01mc@292 227 if (args_info.absolute_threshold_arg >= 0) {
mas01mc@292 228 error("absolute threshold out of range: should be negative");
mas01mc@292 229 }
mas01mc@292 230 use_absolute_threshold = true;
mas01mc@292 231 absolute_threshold = args_info.absolute_threshold_arg;
mas01mc@292 232 }
mas01mc@292 233 if (args_info.relative_threshold_given) {
mas01mc@292 234 use_relative_threshold = true;
mas01mc@292 235 relative_threshold = args_info.relative_threshold_arg;
mas01mc@292 236 }
mas01mc@292 237
mas01cr@0 238 if(args_info.SERVER_given){
mas01cr@0 239 command=COM_SERVER;
mas01cr@0 240 port=args_info.SERVER_arg;
mas01cr@0 241 if(port<100 || port > 100000)
mas01cr@0 242 error("port out of range");
mas01cr@151 243 isServer = 1;
mas01cr@105 244 #if defined(O2_DEBUG)
mas01cr@104 245 struct sigaction sa;
mas01cr@104 246 sa.sa_sigaction = sigterm_action;
mas01cr@104 247 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 248 sigaction(SIGTERM, &sa, NULL);
mas01cr@104 249 sa.sa_sigaction = sighup_action;
mas01cr@104 250 sa.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
mas01cr@104 251 sigaction(SIGHUP, &sa, NULL);
mas01cr@105 252 #endif
mas01mc@308 253 if(args_info.load_index_given){
mas01mc@308 254 if(!args_info.database_given)
mas01mc@308 255 error("load_index requires a --database argument");
mas01mc@308 256 else
mas01mc@308 257 dbName=args_info.database_arg;
mas01mc@308 258 if(!args_info.radius_given)
mas01mc@308 259 error("load_index requires a --radius argument");
mas01mc@308 260 if(!args_info.sequencelength_given)
mas01mc@308 261 error("load_index requires a --sequenceLength argument");
mas01mc@308 262 WS_load_index = true;
mas01mc@308 263 }
mas01cr@0 264 return 0;
mas01cr@0 265 }
mas01cr@0 266
mas01cr@0 267 // No return on client command, find database command
mas01cr@105 268 if(args_info.client_given){
mas01cr@105 269 command=COM_CLIENT;
mas01cr@105 270 hostport=args_info.client_arg;
mas01cr@105 271 isClient=1;
mas01cr@105 272 }
mas01cr@0 273
mas01cr@105 274 if(args_info.NEW_given){
mas01cr@105 275 command=COM_CREATE;
mas01cr@105 276 dbName=args_info.database_arg;
mas01cr@105 277 return 0;
mas01cr@105 278 }
mas01cr@0 279
mas01cr@105 280 if(args_info.STATUS_given){
mas01cr@105 281 command=COM_STATUS;
mas01cr@105 282 dbName=args_info.database_arg;
mas01cr@105 283 return 0;
mas01cr@105 284 }
mas01cr@0 285
mas01cr@280 286 if(args_info.SAMPLE_given) {
mas01cr@280 287 command = COM_SAMPLE;
mas01cr@280 288 dbName = args_info.database_arg;
mas01cr@280 289 sequenceLength = args_info.sequencelength_arg;
mas01cr@280 290 if(sequenceLength < 1 || sequenceLength > 1000) {
mas01cr@280 291 error("seqlen out of range: 1 <= seqlen <= 1000");
mas01cr@280 292 }
mas01cr@280 293 nsamples = args_info.nsamples_arg;
mas01cr@280 294 return 0;
mas01cr@280 295 }
mas01cr@280 296
mas01cr@105 297 if(args_info.DUMP_given){
mas01cr@105 298 command=COM_DUMP;
mas01cr@105 299 dbName=args_info.database_arg;
mas01cr@131 300 output = args_info.output_arg;
mas01cr@105 301 return 0;
mas01cr@105 302 }
mas01cr@0 303
mas01cr@105 304 if(args_info.L2NORM_given){
mas01cr@105 305 command=COM_L2NORM;
mas01cr@105 306 dbName=args_info.database_arg;
mas01cr@105 307 return 0;
mas01cr@105 308 }
mas01cr@0 309
mas01cr@193 310 if(args_info.POWER_given){
mas01cr@193 311 command=COM_POWER;
mas01cr@193 312 dbName=args_info.database_arg;
mas01cr@193 313 return 0;
mas01cr@193 314 }
mas01cr@193 315
mas01cr@105 316 if(args_info.INSERT_given){
mas01cr@105 317 command=COM_INSERT;
mas01cr@105 318 dbName=args_info.database_arg;
mas01cr@105 319 inFile=args_info.features_arg;
mas01cr@105 320 if(args_info.key_given)
mas01mc@292 321 if(!args_info.features_given)
mas01mc@292 322 error("INSERT: '-k key' argument depends on '-f features'");
mas01mc@292 323 else
mas01mc@292 324 key=args_info.key_arg;
mas01cr@105 325 if(args_info.times_given){
mas01cr@105 326 timesFileName=args_info.times_arg;
mas01cr@105 327 if(strlen(timesFileName)>0){
mas01cr@239 328 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 329 error("Could not open times file for reading", timesFileName);
mas01cr@105 330 usingTimes=1;
mas01cr@105 331 }
mas01cr@105 332 }
mas01cr@193 333 if (args_info.power_given) {
mas01cr@193 334 powerFileName = args_info.power_arg;
mas01cr@193 335 if (strlen(powerFileName) > 0) {
mas01cr@193 336 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 337 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 338 }
mas01cr@193 339 usingPower = 1;
mas01cr@193 340 }
mas01cr@193 341 }
mas01cr@105 342 return 0;
mas01cr@105 343 }
mas01cr@105 344
mas01cr@105 345 if(args_info.BATCHINSERT_given){
mas01cr@105 346 command=COM_BATCHINSERT;
mas01cr@105 347 dbName=args_info.database_arg;
mas01cr@105 348 inFile=args_info.featureList_arg;
mas01cr@105 349 if(args_info.keyList_given)
mas01tc@298 350 if(!args_info.featureList_given)
mas01tc@300 351 error("BATCHINSERT: '-K keyList' argument depends on '-F featureList'");
mas01mc@292 352 else
mas01cr@304 353 key=args_info.keyList_arg; // INCONSISTENT NO CHECK
mas01cr@0 354
mas01cr@105 355 /* TO DO: REPLACE WITH
mas01cr@0 356 if(args_info.keyList_given){
mas01mc@18 357 trackFileName=args_info.keyList_arg;
mas01cr@239 358 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01mc@18 359 error("Could not open keyList file for reading",trackFileName);
mas01cr@0 360 }
mas01cr@0 361 AND UPDATE BATCHINSERT()
mas01cr@105 362 */
mas01cr@105 363
mas01cr@105 364 if(args_info.timesList_given){
mas01cr@105 365 timesFileName=args_info.timesList_arg;
mas01cr@105 366 if(strlen(timesFileName)>0){
mas01cr@239 367 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 368 error("Could not open timesList file for reading", timesFileName);
mas01cr@105 369 usingTimes=1;
mas01cr@105 370 }
mas01cr@105 371 }
mas01cr@193 372 if(args_info.powerList_given){
mas01cr@193 373 powerFileName=args_info.powerList_arg;
mas01cr@193 374 if(strlen(powerFileName)>0){
mas01cr@239 375 if(!(powerFile = new std::ifstream(powerFileName,std::ios::in)))
mas01cr@193 376 error("Could not open powerList file for reading", powerFileName);
mas01cr@193 377 usingPower=1;
mas01cr@193 378 }
mas01cr@193 379 }
mas01cr@105 380 return 0;
mas01cr@105 381 }
mas01mc@292 382
mas01mc@292 383 // Set no_unit_norm flag
mas01mc@292 384 no_unit_norming = args_info.no_unit_norming_flag;
mas01mc@292 385 lsh_use_u_functions = args_info.lsh_use_u_functions_flag;
mas01mc@292 386
mas01mc@292 387 // LSH Index Command
mas01mc@292 388 if(args_info.INDEX_given){
mas01mc@292 389 if(radius <= 0 )
mas01mc@292 390 error("INDEXing requires a Radius argument");
mas01mc@292 391 if(!(sequenceLength>0 && sequenceLength <= O2_MAXSEQLEN))
mas01mc@292 392 error("INDEXing requires 1 <= sequenceLength <= 1000");
mas01mc@292 393 command=COM_INDEX;
mas01mc@292 394 dbName=args_info.database_arg;
mas01mc@292 395
mas01mc@292 396 // Whether to store LSH hash tables for query in core (FORMAT2)
mas01mc@297 397 lsh_in_core = !args_info.lsh_on_disk_flag; // This flag is set to 0 if on_disk requested
mas01mc@292 398
mas01mc@292 399 lsh_param_w = args_info.lsh_w_arg;
mas01mc@292 400 if(!(lsh_param_w>0 && lsh_param_w<=O2_SERIAL_MAX_BINWIDTH))
mas01mc@292 401 error("Indexing parameter w out of range (0.0 < w <= 100.0)");
mas01mc@292 402
mas01mc@292 403 lsh_param_k = args_info.lsh_k_arg;
mas01mc@292 404 if(!(lsh_param_k>0 && lsh_param_k<=O2_SERIAL_MAX_FUNS))
mas01mc@292 405 error("Indexing parameter k out of range (1 <= k <= 100)");
mas01mc@292 406
mas01mc@292 407 lsh_param_m = args_info.lsh_m_arg;
mas01mc@292 408 if(!(lsh_param_m>0 && lsh_param_m<= (1 + (sqrt(1 + O2_SERIAL_MAX_TABLES*8.0)))/2.0))
mas01mc@292 409 error("Indexing parameter m out of range (1 <= m <= 20)");
mas01mc@292 410
mas01mc@292 411 lsh_param_N = args_info.lsh_N_arg;
mas01mc@292 412 if(!(lsh_param_N>0 && lsh_param_N<=O2_SERIAL_MAX_ROWS))
mas01mc@292 413 error("Indexing parameter N out of range (1 <= N <= 1000000)");
mas01mc@292 414
mas01mc@292 415 lsh_param_b = args_info.lsh_b_arg;
mas01mc@292 416 if(!(lsh_param_b>0 && lsh_param_b<=O2_SERIAL_MAX_TRACKBATCH))
mas01mc@292 417 error("Indexing parameter b out of range (1 <= b <= 10000)");
mas01mc@292 418
mas01mc@296 419 lsh_param_ncols = args_info.lsh_ncols_arg;
mas01mc@296 420 if(lsh_in_core) // We don't want to block rows with FORMAT2 indexing
mas01mc@296 421 lsh_param_ncols = O2_SERIAL_MAX_COLS;
mas01mc@292 422 if( !(lsh_param_ncols>0 && lsh_param_ncols<=O2_SERIAL_MAX_COLS))
mas01mc@292 423 error("Indexing parameter ncols out of range (1 <= ncols <= 1000");
mas01mc@292 424
mas01mc@292 425 return 0;
mas01mc@292 426 }
mas01mc@292 427
mas01cr@105 428 // Query command and arguments
mas01cr@105 429 if(args_info.QUERY_given){
mas01cr@105 430 command=COM_QUERY;
mas01cr@105 431 dbName=args_info.database_arg;
mas01mc@292 432 // XOR features and key search
mas01mc@292 433 if(!args_info.features_given && !args_info.key_given || (args_info.features_given && args_info.key_given))
mas01mc@292 434 error("QUERY requires exactly one of either -f features or -k key");
mas01mc@292 435 if(args_info.features_given)
mas01mc@292 436 inFile=args_info.features_arg; // query from file
mas01mc@292 437 else{
mas01mc@292 438 query_from_key = true;
mas01mc@292 439 key=args_info.key_arg; // query from key
mas01mc@292 440 }
mas01mc@292 441
mas01cr@105 442 if(args_info.keyList_given){
mas01cr@105 443 trackFileName=args_info.keyList_arg;
mas01cr@239 444 if(strlen(trackFileName)>0 && !(trackFile = new std::ifstream(trackFileName,std::ios::in)))
mas01cr@105 445 error("Could not open keyList file for reading",trackFileName);
mas01cr@105 446 }
mas01cr@105 447
mas01cr@105 448 if(args_info.times_given){
mas01cr@105 449 timesFileName=args_info.times_arg;
mas01cr@105 450 if(strlen(timesFileName)>0){
mas01cr@239 451 if(!(timesFile = new std::ifstream(timesFileName,std::ios::in)))
mas01cr@105 452 error("Could not open times file for reading", timesFileName);
mas01cr@105 453 usingTimes=1;
mas01cr@105 454 }
mas01cr@105 455 }
mas01cr@193 456
mas01cr@193 457 if(args_info.power_given){
mas01cr@193 458 powerFileName=args_info.power_arg;
mas01cr@193 459 if(strlen(powerFileName)>0){
mas01cr@193 460 if (!(powerfd = open(powerFileName, O_RDONLY))) {
mas01cr@193 461 error("Could not open power file for reading", powerFileName, "open");
mas01cr@193 462 }
mas01cr@193 463 usingPower = 1;
mas01cr@193 464 }
mas01cr@193 465 }
mas01cr@105 466
mas01cr@105 467 // query type
mas01cr@105 468 if(strncmp(args_info.QUERY_arg, "track", MAXSTR)==0)
mas01cr@105 469 queryType=O2_TRACK_QUERY;
mas01cr@105 470 else if(strncmp(args_info.QUERY_arg, "point", MAXSTR)==0)
mas01cr@105 471 queryType=O2_POINT_QUERY;
mas01cr@105 472 else if(strncmp(args_info.QUERY_arg, "sequence", MAXSTR)==0)
mas01cr@105 473 queryType=O2_SEQUENCE_QUERY;
mas01mc@248 474 else if(strncmp(args_info.QUERY_arg, "nsequence", MAXSTR)==0)
mas01mc@248 475 queryType=O2_N_SEQUENCE_QUERY;
mas01mc@263 476 else if(strncmp(args_info.QUERY_arg, "onetoonensequence", MAXSTR)==0)
mas01mc@263 477 queryType=O2_ONE_TO_ONE_N_SEQUENCE_QUERY;
mas01cr@105 478 else
mas01cr@105 479 error("unsupported query type",args_info.QUERY_arg);
mas01cr@105 480
mas01cr@105 481 if(!args_info.exhaustive_flag){
mas01cr@105 482 queryPoint = args_info.qpoint_arg;
mas01cr@105 483 usingQueryPoint=1;
mas01cr@105 484 if(queryPoint<0 || queryPoint >10000)
mas01cr@105 485 error("queryPoint out of range: 0 <= queryPoint <= 10000");
mas01cr@105 486 }
mas01mc@292 487
mas01mc@296 488 // Whether to pre-load LSH hash tables for query (default on, if flag set then off)
mas01mc@297 489 lsh_in_core = !args_info.lsh_on_disk_flag;
mas01mc@292 490
mas01mc@292 491 // Whether to perform exact evaluation of points returned by LSH
mas01mc@292 492 lsh_exact = args_info.lsh_exact_flag;
mas01mc@292 493
mas01cr@105 494 pointNN = args_info.pointnn_arg;
mas01mc@263 495 if(pointNN < 1 || pointNN > O2_MAXNN) {
mas01mc@263 496 error("pointNN out of range: 1 <= pointNN <= 1000000");
mas01cr@105 497 }
mas01cr@105 498 trackNN = args_info.resultlength_arg;
mas01mc@263 499 if(trackNN < 1 || trackNN > O2_MAXNN) {
mas01mc@263 500 error("resultlength out of range: 1 <= resultlength <= 1000000");
mas01cr@105 501 }
mas01cr@105 502 return 0;
mas01cr@105 503 }
mas01cr@105 504 return -1; // no command found
mas01cr@0 505 }
mas01cr@0 506
mas01cr@133 507 void audioDB::status(const char* dbName, adb__statusResponse *adbStatusResponse){
mas01cr@0 508 if(!dbH)
mas01cr@196 509 initTables(dbName, 0);
mas01cr@0 510
mas01cr@0 511 unsigned dudCount=0;
mas01cr@0 512 unsigned nullCount=0;
mas01cr@0 513 for(unsigned k=0; k<dbH->numFiles; k++){
mas01mc@18 514 if(trackTable[k]<sequenceLength){
mas01cr@0 515 dudCount++;
mas01mc@18 516 if(!trackTable[k])
mas01cr@76 517 nullCount++;
mas01cr@0 518 }
mas01cr@0 519 }
mas01cr@76 520
mas01cr@133 521 if(adbStatusResponse == 0) {
mas01cr@76 522
mas01cr@76 523 // Update Header information
mas01cr@239 524 std::cout << "num files:" << dbH->numFiles << std::endl;
mas01cr@239 525 std::cout << "data dim:" << dbH->dim <<std::endl;
mas01cr@76 526 if(dbH->dim>0){
mas01cr@239 527 std::cout << "total vectors:" << dbH->length/(sizeof(double)*dbH->dim)<<std::endl;
mas01cr@239 528 std::cout << "vectors available:" << (dbH->timesTableOffset-(dbH->dataOffset+dbH->length))/(sizeof(double)*dbH->dim) << std::endl;
mas01cr@76 529 }
mas01cr@239 530 std::cout << "total bytes:" << dbH->length << " (" << (100.0*dbH->length)/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01cr@239 531 std::cout << "bytes available:" << dbH->timesTableOffset-(dbH->dataOffset+dbH->length) << " (" <<
mas01cr@239 532 (100.0*(dbH->timesTableOffset-(dbH->dataOffset+dbH->length)))/(dbH->timesTableOffset-dbH->dataOffset) << "%)" << std::endl;
mas01mc@301 533 std::cout << "flags:" << " l2norm[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_L2NORM)
mas01mc@301 534 << "] minmax[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_MINMAX)
mas01mc@301 535 << "] power[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_POWER)
mas01mc@301 536 << "] times[" << DISPLAY_FLAG(dbH->flags&O2_FLAG_TIMES) << "]" << endl;
mas01cr@239 537 std::cout << "null count: " << nullCount << " small sequence count " << dudCount-nullCount << std::endl;
mas01cr@76 538 } else {
mas01cr@133 539 adbStatusResponse->result.numFiles = dbH->numFiles;
mas01cr@133 540 adbStatusResponse->result.dim = dbH->dim;
mas01cr@133 541 adbStatusResponse->result.length = dbH->length;
mas01cr@133 542 adbStatusResponse->result.dudCount = dudCount;
mas01cr@133 543 adbStatusResponse->result.nullCount = nullCount;
mas01cr@133 544 adbStatusResponse->result.flags = dbH->flags;
mas01cr@76 545 }
mas01cr@0 546 }
mas01cr@0 547
mas01cr@196 548 void audioDB::l2norm(const char* dbName) {
mas01cr@196 549 forWrite = true;
mas01cr@196 550 initTables(dbName, 0);
mas01cr@0 551 if(dbH->length>0){
mas01cr@196 552 /* FIXME: should probably be uint64_t */
mas01cr@0 553 unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim);
mas01cr@196 554 CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength);
mas01cr@0 555 unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append
mas01cr@0 556 }
mas01cr@0 557 // Update database flags
mas01cr@0 558 dbH->flags = dbH->flags|O2_FLAG_L2NORM;
mas01cr@0 559 memcpy (db, dbH, O2_HEADERSIZE);
mas01cr@0 560 }
mas01cr@193 561
mas01cr@193 562 void audioDB::power_flag(const char *dbName) {
mas01cr@196 563 forWrite = true;
mas01cr@196 564 initTables(dbName, 0);
mas01cr@193 565 if (dbH->length > 0) {
mas01cr@193 566 error("cannot turn on power storage for non-empty database", dbName);
mas01cr@193 567 }
mas01cr@193 568 dbH->flags |= O2_FLAG_POWER;
mas01cr@193 569 memcpy(db, dbH, O2_HEADERSIZE);
mas01cr@193 570 }
mas01cr@193 571
mas01cr@239 572 // Unit norm block of features
mas01cr@0 573
mas01cr@239 574 /* FIXME: in fact this does not unit norm a block of features, it just
mas01cr@239 575 records the L2 norms somewhere. unitNorm() does in fact unit norm
mas01cr@239 576 a block of features. */
mas01cr@0 577 void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){
mas01cr@0 578 unsigned d;
mas01cr@59 579 double *p;
mas01cr@0 580 unsigned nn = n;
mas01cr@0 581
mas01cr@0 582 assert(l2normTable);
mas01cr@0 583
mas01cr@0 584 if( !append && (dbH->flags & O2_FLAG_L2NORM) )
mas01cr@0 585 error("Database is already L2 normed", "automatic norm on insert is enabled");
mas01cr@0 586
mas01cr@239 587 VERB_LOG(2, "norming %u vectors...", n);
mas01cr@0 588
mas01cr@0 589 double* l2buf = new double[n];
mas01cr@0 590 double* l2ptr = l2buf;
mas01cr@0 591 assert(l2buf);
mas01cr@0 592 assert(X);
mas01cr@0 593
mas01cr@0 594 while(nn--){
mas01cr@0 595 p=X;
mas01cr@0 596 *l2ptr=0.0;
mas01cr@0 597 d=dim;
mas01cr@0 598 while(d--){
mas01cr@0 599 *l2ptr+=*p**p;
mas01cr@0 600 p++;
mas01cr@0 601 }
mas01mc@17 602 l2ptr++;
mas01mc@17 603 X+=dim;
mas01cr@0 604 }
mas01cr@0 605 unsigned offset;
mas01cr@84 606 if(append) {
mas01cr@84 607 // FIXME: a hack, a very palpable hack: the vectors have already
mas01cr@84 608 // been inserted, and dbH->length has already been updated. We
mas01cr@84 609 // need to subtract off again the number of vectors that we've
mas01cr@84 610 // inserted this time...
mas01cr@84 611 offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors
mas01cr@84 612 } else {
mas01cr@0 613 offset=0;
mas01cr@84 614 }
mas01cr@0 615 memcpy(l2normTable+offset, l2buf, n*sizeof(double));
mas01cr@0 616 if(l2buf)
mas01mc@17 617 delete[] l2buf;
mas01cr@239 618 VERB_LOG(2, " done.");
mas01cr@193 619 }
mas01cr@193 620
mas01mc@308 621 // This entry point is visited once per instance
mas01mc@308 622 // so it is a good place to set any global state variables
mas01cr@0 623 int main(const unsigned argc, char* const argv[]){
mas01mc@308 624 SERVER_LSH_INDEX_SINGLETON = 0; // Initialize global variables
mas01cr@0 625 audioDB(argc, argv);
mas01cr@0 626 }