view audioDB.h @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents b9dbe4611dde
children
line wrap: on
line source
#ifndef __AUDIODB_H_
#define __AUDIODB_H_

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#if !defined(WIN32)
#include <sys/mman.h>
#endif
#include <fcntl.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <set>
#include <map>
#include <string>
#include <math.h>
#include <time.h>
#include <assert.h>
#include <float.h>
#include <signal.h>

// includes for LSH indexing
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"
#include "ReporterBase.h"
#include "accumulator.h"
#include "lshlib.h"

// includes for web services
#include "soapH.h"
#include "cmdline.h"

#define MAXSTR ADB_MAXSTR

// Databse PRIMARY commands
#define COM_CREATE "--NEW"
#define COM_INSERT "--INSERT"
#define COM_BATCHINSERT "--BATCHINSERT"
#define COM_QUERY "--QUERY"
#define COM_STATUS "--STATUS"
#define COM_L2NORM "--L2NORM"
#define COM_POWER "--POWER"
#define COM_DUMP "--DUMP"
#define COM_SERVER "--SERVER"
#define COM_INDEX "--INDEX"
#define COM_SAMPLE "--SAMPLE"
#define COM_LISZT "--LISZT"

// parameters
#define COM_CLIENT "--client"
#define COM_DATABASE "--database"
#define COM_QTYPE "--qtype"
#define COM_SEQLEN "--sequencelength"
#define COM_SEQHOP "--sequencehop"
#define COM_POINTNN "--pointnn"
#define COM_RADIUS "--radius"
#define COM_TRACKNN "--resultlength"
#define COM_QPOINT "--qpoint"
#define COM_FEATURES "--features"
#define COM_QUERYKEY "--key"
#define COM_KEYLIST "--keyList"
#define COM_TIMES "--times"
#define COM_QUERYPOWER "--power"
#define COM_RELATIVE_THRESH "--relative-threshold"
#define COM_ABSOLUTE_THRESH "--absolute-threshold"
#define COM_EXHAUSTIVE "--exhaustive"
#define COM_LSH_EXACT "--lsh_exact"
#define COM_NO_UNIT_NORMING "--no_unit_norming"
#define COM_DISTANCE_KULLBACK "--distance_kullback"

#define O2_DEFAULT_POINTNN (10U)
#define O2_DEFAULT_TRACKNN  (10U)

//#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size

#define O2_DEFAULT_DATASIZE (1355U) /* in MB */
#define O2_DEFAULT_NTRACKS (20000U)
#define O2_DEFAULT_DATADIM (9U)

// LIMIT PARAMETERS
#define O2_REALTYPE (double)
#define O2_MAXFILES (1000000U)
#define O2_MAXFILESTR ADB_FILETABLE_ENTRY_SIZE
#define O2_FILETABLE_ENTRY_SIZE ADB_FILETABLE_ENTRY_SIZE
#define O2_TRACKTABLE_ENTRY_SIZE ADB_TRACKTABLE_ENTRY_SIZE
#define O2_HEADERSIZE (sizeof(dbTableHeaderT))
#define O2_MEANNUMVECTORS (1000U)
#define O2_MAXDIM (20000U)
#define O2_MAXNN (1000000U)
#define O2_MAXSEQLEN (8000U)            // maximum feature vectors in a sequence
#define O2_MAXTRACKS (1000000U)           // maximum number of tracks

#define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB
#define O2_SERIAL_MAX_TRACKBATCH (1000000)
#define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
#define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally
#define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS )

// Flags
#define O2_FLAG_L2NORM ADB_HEADER_FLAG_L2NORM
#define O2_FLAG_MINMAX (0x2U)
#define O2_FLAG_POWER ADB_HEADER_FLAG_POWER
#define O2_FLAG_TIMES ADB_HEADER_FLAG_TIMES
#define O2_FLAG_LARGE_ADB ADB_HEADER_FLAG_REFERENCES
#define DISPLAY_FLAG(x) (x?"on":"off")

// Query types
#define O2_POINT_QUERY (0x4U)
#define O2_SEQUENCE_QUERY (0x8U)
#define O2_TRACK_QUERY (0x10U)
#define O2_N_SEQUENCE_QUERY (0x20U)
#define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)

// Error Codes
#define O2_ERR_KEYNOTFOUND (0xFFFFFF00)

// Macros
#define O2_ACTION(a) (strcmp(command,a)==0)

#define ALIGN_UP(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1))
#define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))

#define ALIGN_PAGE_UP(x) (((x) + (getpagesize()-1)) & ~(getpagesize()-1))
#define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))

#define ENSURE_STRING(x) ((x) ? (x) : "")

#define CHECKED_MMAP(type, var, start, length) \
  { void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
    if(tmp == (void *) -1) { \
      error("mmap error for db table", #var, "mmap"); \
    } \
    var = (type) tmp; \
  }

#define CHECKED_READ(fd, buf, count) \
  { size_t tmpcount = count; \
    ssize_t tmp = read(fd, buf, tmpcount); \
    if(tmp == -1) { \
      error("read error", "", "read"); \
    } else if((size_t) tmp != tmpcount) {	\
      error("short read", ""); \
    } \
  }

#define CHECKED_WRITE(fd, buf, count) \
  { size_t tmpcount = count; \
    ssize_t tmp = write(fd, buf, tmpcount); \
    if(tmp == -1) { \
      error("write error", "", "write"); \
    } else if((size_t) tmp != tmpcount) {	\
      error("short write", ""); \
    } \
  }

#define VERB_LOG(vv, ...) \
  if(verbosity > vv) { \
    fprintf(stderr, __VA_ARGS__); \
    fflush(stderr); \
  }

// We will only use this in a 32-bit address space
// So map the off_t down to 32-bits first
#define INSERT_FILETABLE_STRING(TABLE, STR) \
    strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));

#define SAFE_DELETE(PTR) delete PTR; PTR=0;
#define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;

extern char* SERVER_ADB_ROOT;
extern char* SERVER_ADB_FEATURE_ROOT;

class audioDB{  
 private:
  gengetopt_args_info args_info;
  unsigned dim;
  const char *dbName;
  const char *inFile;
  const char *hostport;
  const char *key;
  const char* trackFileName;
  std::ifstream *trackFile;
  const char *command;
  const char *output;
  const char *timesFileName;
  std::ifstream *timesFile;
  const char *powerFileName;
  std::ifstream *powerFile;
  const char* adb_root;
  const char* adb_feature_root;

  int powerfd;
  int dbfid;
  int lshfid;
  bool forWrite;
  int infid;
  struct stat statbuf;  
  struct adb_header *dbH;
  struct adb *adb;

  char* fileTable;
  unsigned* trackTable;
  double* l2normTable;
  double* timesTable;
  double* powerTable;

  char* featureFileNameTable;
  char* timesFileNameTable;
  char* powerFileNameTable;

  size_t fileTableLength;
  size_t trackTableLength;
  size_t timesTableLength;
  size_t powerTableLength;
  size_t l2normTableLength;

  // Flags and parameters
  unsigned verbosity;   // how much do we want to know?

  unsigned nsamples;

  //off_t size; // given size (for creation)
  unsigned datasize; // size in MB
  unsigned ntracks;
  unsigned datadim;

  unsigned queryType; // point queries default
  unsigned pointNN;   // how many point NNs ?
  unsigned trackNN;   // how many track NNs ?
  unsigned sequenceLength;
  unsigned sequenceHop;
  bool normalizedDistance;
  bool no_unit_norming;
  bool distance_kullback;
  unsigned queryPoint;
  unsigned usingQueryPoint;
  unsigned usingTimes;
  unsigned usingPower;
  unsigned isClient;
  unsigned isServer;
  unsigned port;
  double timesTol;
  double radius;
  bool query_from_key;
  bool use_absolute_threshold;
  double absolute_threshold;
  bool use_relative_threshold;
  double relative_threshold;
  
  ReporterBase* reporter;  // track/point reporter

  // LISZT parameters
  unsigned lisztOffset;
  unsigned lisztLength;

  // private methods
  void error(const char* a, const char* b = "", const char *sysFunc = 0) __attribute__ ((noreturn));

  void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
  void initDBHeader(const char *dbName);
  void initInputFile(const char *inFile);
  void initTables(const char* dbName, const char* inFile = 0);
  void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
  void prefix_name(char** const name, const char* prefix);

 public:
  audioDB(const unsigned argc, const char *argv[]);
  audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse);
  audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
  audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse);

  void cleanup();
  ~audioDB();
  int processArgs(const unsigned argc, const char* argv[]);
  void create(const char* dbName);
  void insert(const char* dbName, const char* inFile);
  void batchinsert(const char* dbName, const char* inFile);
  void datumFromFiles(adb_datum_t *datum);
  void query(const char* dbName, const char* inFile, struct soap *soap=0, adb__queryResponse *adbQueryResponse=0);
  void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);

  unsigned random_track(unsigned *propTable, unsigned total);
  void sample(const char *dbName);
  void l2norm(const char* dbName);
  void power_flag(const char *dbName);
  void dump(const char* dbName);
  void liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap=0, adb__lisztResponse* adbLisztResponse=0);

  // LSH indexing parameters and data structures
  LSH* lsh;
  bool lsh_in_core;     // load LSH tables for query into core (true) or keep on disk (false)
  bool lsh_use_u_functions;
  bool lsh_exact;      // flag to indicate use exact evaluation of points returned by LSH
  bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident
  double lsh_param_w; // Width of LSH hash-function bins
  Uns32T lsh_param_k; // Number of independent hash functions
  Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables
  Uns32T lsh_param_N; // Number of rows per hash table
  Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
  Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row

  // LSH indexing and retrieval methods  
  void index_index_db(const char* dbName);
  void index_initialize(double**,double**,double**,double**,unsigned int*);
  void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
  int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
  Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
  void insertPowerData(unsigned n, int powerfd, double *powerdata);
  void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
  
  // Web Services
  void startServer();

  void ws_status(const char*dbName, char* hostport);
  void ws_query(const char*dbName, const char *featureFileName, const char* hostport);
  void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport);
  void ws_liszt(const char* dbName, char* hostport);

};

#define O2_AUDIODB_INITIALIZERS			\
  dim(0),					\
    dbName(0),					\
    inFile(0),					\
    key(0),					\
    trackFileName(0),				\
    trackFile(0),				\
    command(0),					\
    output(0),					\
    timesFileName(0),				\
    timesFile(0),				\
    powerFileName(0),				\
    powerFile(0),				\
    adb_root(0),                                \
    adb_feature_root(0),                        \
    powerfd(0),                                 \
    dbfid(0),					\
    lshfid(0),					\
    forWrite(false),				\
    infid(0),					\
    dbH(0),					\
    adb(0),                                     \
    fileTable(0),				\
    trackTable(0),				\
    l2normTable(0),				\
    timesTable(0),				\
    powerTable(0),                              \
    featureFileNameTable(0),                    \
    timesFileNameTable(0),                      \
    powerFileNameTable(0),                      \
    fileTableLength(0),				\
    trackTableLength(0),			\
    timesTableLength(0),			\
    powerTableLength(0),			\
    l2normTableLength(0),			\
    verbosity(1),				\
    nsamples(2000),                             \
    datasize(O2_DEFAULT_DATASIZE),              \
    ntracks(O2_DEFAULT_NTRACKS),                \
    datadim(O2_DEFAULT_DATADIM),                \
    queryType(O2_POINT_QUERY),			\
    pointNN(O2_DEFAULT_POINTNN),		\
    trackNN(O2_DEFAULT_TRACKNN),		\
    sequenceLength(16),				\
    sequenceHop(1),				\
    normalizedDistance(true),			\
    no_unit_norming(false),                     \
    distance_kullback(false),                   \
    queryPoint(0),				\
    usingQueryPoint(0),				\
    usingTimes(0),				\
    usingPower(0),				\
    isClient(0),				\
    isServer(0),				\
    port(0),					\
    timesTol(0.1),				\
    radius(0),					\
    query_from_key(false),                      \
    use_absolute_threshold(false),		\
    absolute_threshold(0.0),			\
    use_relative_threshold(false),		\
    relative_threshold(0.0),			\
    reporter(0),                                \
    lisztOffset(0),                             \
    lisztLength(0),                             \
    lsh(0),					\
    lsh_in_core(false),				\
    lsh_use_u_functions(false),                 \
    lsh_exact(false),                           \
    WS_load_index(false),                       \
    lsh_param_k(0),				\
    lsh_param_m(0),				\
    lsh_param_N(0),				\
    lsh_param_b(0),				\
    lsh_param_ncols(0)
#endif