Mercurial > hg > audiodb
changeset 405:ef4792df8f93 api-inversion
invert audioDB::insert / audiodb_insert().
Start off by removing audioDB::insertDatum, and essentially reusing it
as audiodb_insert. We now ignore the fact that the command-line parsing
code has "helpfully" opened a std::ifstream for the times file and an fd
for the power file, and simply go ahead and do our own dirty work.
We can delete audioDB::insertDatum entirely, but unfortunately we can't
delete audioDB::insertPowerData and audioDB::insertTimestamps, because
the index and query code respectively use them. Instead, move the two
methods closer to their single uses.
audiodb_insert() is perhaps not as short and simple as it might have
been hoped given the existence of audiodb_insert_datum(); some of that
is C and its terribly way of making you pay every time you use dynamic
memory; some of it is the fact that the three different files (feature,
times, power) each requires slightly different treatment. Hey ho.
We can implement audiodb_batchinsert() in terms of audiodb_insert(); the
function is pleasingly small. We can't quite use it for
audioDB::batchinsert yet, as we have to deal with the O2_FLAG_LARGE_ADB
case (which codepath is untested in libtests/).
This means that we can delete whole swathes of hideous code from
audioDB.cpp, including not just the versions of audiodb_insert() and
audiodb_batchinsert() but also an entire audioDB constructor. Yay.
(audioDB::unitNormAndInsertL2 has also died a deserved death).
author | mas01cr |
---|---|
date | Fri, 05 Dec 2008 22:32:49 +0000 |
parents | 1fb8bee777e5 |
children | c279adeb47f4 |
files | audioDB.cpp audioDB.h audioDB_API.h index.cpp insert.cpp query.cpp |
diffstat | 6 files changed, 184 insertions(+), 384 deletions(-) [+] |
line wrap: on
line diff
--- a/audioDB.cpp Fri Dec 05 22:32:43 2008 +0000 +++ b/audioDB.cpp Fri Dec 05 22:32:49 2008 +0000 @@ -165,73 +165,6 @@ } } - -//for the lib / API -audioDB::audioDB(const unsigned argc, const char *argv[], int * apierror, adb_t *a): O2_AUDIODB_INITIALIZERS -{ - - try { - UseApiError=1; - adb = a; - - if(processArgs(argc, argv)<0){ - printf("No command found.\n"); - cmdline_parser_print_version (); - if (strlen(gengetopt_args_info_purpose) > 0) - printf("%s\n", gengetopt_args_info_purpose); - printf("%s\n", gengetopt_args_info_usage); - printf("%s\n", gengetopt_args_info_help[1]); - printf("%s\n", gengetopt_args_info_help[2]); - printf("%s\n", gengetopt_args_info_help[0]); - error("No command found"); - } - - adb__queryResponse adbq; - - if(O2_ACTION(COM_CREATE)) - create(dbName); - - else if(O2_ACTION(COM_INSERT)) - insert(dbName, inFile); - - else if(O2_ACTION(COM_BATCHINSERT)) - batchinsert(dbName, inFile); - - else if(O2_ACTION(COM_QUERY)) - if(isClient) - ;//ws_query(dbName, inFile, (char*)hostport); - else - query(dbName, inFile, &adbq); - //query(dbName, inFile); - - else if(O2_ACTION(COM_STATUS)) - if(isClient) - ;//ws_status(dbName,(char*)hostport); - else - status(dbName); - - else if(O2_ACTION(COM_L2NORM)) - l2norm(dbName); - - else if(O2_ACTION(COM_POWER)) - power_flag(dbName); - - else if(O2_ACTION(COM_DUMP)) - dump(dbName); - - else - error("Unrecognized command",command); - - } catch(int a) { - *apierror=a; - return; - - } - *apierror=apierrortemp; - return; - -} - //for API query audioDB::audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror, adb_t *a): O2_AUDIODB_INITIALIZERS { @@ -799,48 +732,6 @@ } status(dbName); } -// Unit norm block of features - -/* FIXME: in fact this does not unit norm a block of features, it just - records the L2 norms somewhere. unitNorm() does in fact unit norm - a block of features. */ -void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n){ - unsigned d; - double *p; - unsigned nn = n; - - assert(l2normTable); - - VERB_LOG(2, "norming %u vectors...", n); - - double* l2buf = new double[n]; - double* l2ptr = l2buf; - assert(l2buf); - assert(X); - - while(nn--){ - p=X; - *l2ptr=0.0; - d=dim; - while(d--){ - *l2ptr+=*p**p; - p++; - } - l2ptr++; - X+=dim; - } - unsigned offset; - - // FIXME: a hack, a very palpable hack: the vectors have already - // been inserted, and dbH->length has already been updated. We - // need to subtract off again the number of vectors that we've - // inserted this time... - offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors - memcpy(l2normTable+offset, l2buf, n*sizeof(double)); - if(l2buf) - delete[] l2buf; - VERB_LOG(2, " done."); -} // This entry point is visited once per instance // so it is a good place to set any global state variables @@ -858,172 +749,6 @@ * Christophe Rhodes c.rhodes@gold.ac.uk * Ian Knopke mas01ik@gold.ac.uk, ian.knopke@gmail.com */ - int audiodb_insert(adb_ptr mydb, adb_insert_ptr ins) { - const char *argv[15]; - int argvctr=0; - int apierror=0; - - argv[argvctr++]="audioDB"; - argv[argvctr++]="-I"; - argv[argvctr++]="-d"; - argv[argvctr++]=mydb->path; - argv[argvctr++]="-f"; - argv[argvctr++]=ins->features; - - if (ins->times){ - argv[argvctr++]="--times"; - argv[argvctr++]=ins->times; - } - - if (ins->power){ - argv[argvctr++]="-w"; - argv[argvctr++]=ins->power; - } - - if (ins->key){ - argv[argvctr++]="--key"; - argv[argvctr++]=ins->key; - } - argv[argvctr]='\0'; - - audioDB::audioDB(argvctr,argv,&apierror,mydb); - return apierror; - } - - - int audiodb_batchinsert(adb_ptr mydb, adb_insert_ptr ins, unsigned int size) { - - const char *argv[22]; - int argvctr=0; - unsigned int i=0; - char tempfeaturename[]="tempfeatureXXXXXX"; - char temppowername[]="temppowerXXXXXX"; - char tempkeyname[]="tempkeyXXXXXX"; - char temptimesname[]="temptimesXXXXXX"; - int tempfeaturefd = -1; - int temppowerfd = -1; - int tempkeyfd = -1; - int temptimesfd = -1; - - int flags[4]={0}; - int apierror=0; - - /* So the final API should take an array of structs. However, the current - * version requires four separate text files. So temporarily, we need to - * unpack the struct array, make four separate text files, and then reinsert - * them into the command-line call. This should change as soon as possible */ - - - argv[argvctr++]="audioDB"; - argv[argvctr++]="-B"; - argv[argvctr++]="-d"; - argv[argvctr++]=mydb->path; - - - /* assume struct is well formed for all entries */ - if (ins[0].features){ flags[0]++;} else { - /* short circuit the case where there are no features in the structs */ - return -1; - } ; - if (ins[0].power){ flags[1]++;}; - if (ins[0].key){ flags[2]++;}; - if (ins[0].times){ flags[3]++;}; - - - /* make four temp files */ - if ((tempfeaturefd = mkstemp(tempfeaturename)) == -1) - goto error; - if ((temppowerfd = mkstemp(temppowername)) == -1) - goto error; - if ((tempkeyfd=mkstemp(tempkeyname)) == -1) - goto error; - if ((temptimesfd=mkstemp(temptimesname)) == -1) - goto error; - - /* Ok, so we should have a working set of files to write to */ - /* I'm going to assume that the same format is kept for all structs in the array */ - /* That is, each struct should be correctly formed, and contain at least a features file, because I'm just going to pass the terms along to the text files */ - for (i = 0; i < size; i++) { - if (write(tempfeaturefd,ins[i].features,strlen(ins[i].features)) != (ssize_t) strlen(ins[i].features)) - goto error; - if (write(tempfeaturefd,"\n",1) != 1) - goto error; - - if (flags[1]) { - if (write(temppowerfd,ins[i].power,strlen(ins[i].power)) != (ssize_t) strlen(ins[i].power)) - goto error; - if (write(temppowerfd,"\n",1) != 1) - goto error; - } - if (flags[2]) { - if (write(tempkeyfd,ins[i].key,strlen(ins[i].key)) != (ssize_t) strlen(ins[i].key)) - goto error; - if (write(tempkeyfd,"\n",1) != 1) - goto error; - } - if (flags[3]) { - if (write(temptimesfd,ins[i].times,strlen(ins[i].times)) != (ssize_t) strlen(ins[i].times)) - goto error; - if (write(temptimesfd,"\n",1) != 1) - goto error; - } - } - - argv[argvctr++]="-F"; - argv[argvctr++]=tempfeaturename; - close(tempfeaturefd); - close(temppowerfd); - close(tempkeyfd); - close(temptimesfd); - - if (flags[1]){ - argv[argvctr++]="--powerList"; - argv[argvctr++]=temppowername; - } - - if (flags[2]){ - argv[argvctr++]="--keyList"; - argv[argvctr++]=tempkeyname; - } - - if (flags[3]){ - argv[argvctr++]="--timesList"; - argv[argvctr++]=temptimesname; - } - - argv[argvctr]='\0'; - - audioDB::audioDB(argvctr,argv,&apierror,mydb); - - remove(tempfeaturename); - remove(temppowername); - remove(tempkeyname); - remove(temptimesname); - - - return apierror; - - error: - if(tempfeaturefd != -1) { - close(tempfeaturefd); - remove(tempfeaturename); - } - if(temppowerfd != -1) { - close(temppowerfd); - remove(temppowername); - } - if(tempkeyfd != -1) { - close(tempkeyfd); - remove(tempkeyname); - } - if(temptimesfd != -1) { - close(temptimesfd); - remove(temptimesname); - } - return -1; - } - - int audiodb_query(adb_ptr mydb, adb_query_ptr adbq, adb_queryresult_ptr adbqr){ const char *argv[32];
--- a/audioDB.h Fri Dec 05 22:32:43 2008 +0000 +++ b/audioDB.h Fri Dec 05 22:32:49 2008 +0000 @@ -324,6 +324,7 @@ void initialize_arrays(int track, unsigned int numVectors, double *query, double *data_buffer, double **D, double **DD); void delete_arrays(int track, unsigned int numVectors, double **D, double **DD); void read_data(int trkfid, int track, double **data_buffer_p, size_t *data_buffer_size_p); + void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void set_up_query(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned int *nvp); void set_up_query_from_key(double **qp, double **vqp, double **qnp, double **vqnp, double **qpp, double **vqpp, double *mqdp, unsigned *nvp, Uns32T queryIndex); void set_up_db(double **snp, double **vsnp, double **spp, double **vspp, double **mddp, unsigned int *dvp); @@ -337,8 +338,6 @@ void initTablesFromKey(const char* dbName, const Uns32T queryIndex); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); void unitNormAndInsertL2(double* X, unsigned dim, unsigned n); - void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); - void insertPowerData(unsigned n, int powerfd, double *powerdata); unsigned getKeyPos(char* key); void prefix_name(char** const name, const char* prefix); @@ -347,7 +346,6 @@ audioDB(const unsigned argc, const char *argv[], adb__queryResponse *adbQueryResponse); audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse); audioDB(const unsigned argc, const char *argv[], adb__lisztResponse *adbLisztResponse); - audioDB(const unsigned argc, const char *argv[], int * apierror, struct adb *a); audioDB(const unsigned argc, const char *argv[],adb__queryResponse *adbQueryResponse, int * apierror, struct adb *a); @@ -359,7 +357,6 @@ void create(const char* dbName); bool enough_per_file_space_free(); void insert(const char* dbName, const char* inFile); - void insertDatum(const char *inFile, std::ifstream *timesFile, int powerfd, const char *key); void batchinsert(const char* dbName, const char* inFile); void batchinsert_large_adb(const char* dbName, const char* inFile); void query(const char* dbName, const char* inFile, adb__queryResponse *adbQueryResponse=0); @@ -413,6 +410,7 @@ void initialize_exact_evalutation_queue(); void index_insert_exact_evaluation_queue(Uns32T trackID, Uns32T qpos, Uns32T spos); LSH* index_allocate(char* indexName, bool load_hashTables); + void insertPowerData(unsigned n, int powerfd, double *powerdata); void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp); // Web Services
--- a/audioDB_API.h Fri Dec 05 22:32:43 2008 +0000 +++ b/audioDB_API.h Fri Dec 05 22:32:49 2008 +0000 @@ -33,12 +33,10 @@ //used for both insert and batchinsert struct adbinsert { - - char * features; - char * power; - char * key; - char * times; - + const char *features; + const char *power; + const char *key; + const char *times; }; typedef struct adbinsert adb_insert_t, *adb_insert_ptr;
--- a/index.cpp Fri Dec 05 22:32:43 2008 +0000 +++ b/index.cpp Fri Dec 05 22:32:49 2008 +0000 @@ -239,6 +239,28 @@ } +void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { + if(usingPower){ + int one; + unsigned int count; + + count = read(powerfd, &one, sizeof(unsigned int)); + if (count != sizeof(unsigned int)) { + error("powerfd read failed", "int", "read"); + } + if (one != 1) { + error("dimensionality of power file not 1", powerFileName); + } + + // FIXME: should check that the powerfile is the right size for + // this. -- CSR, 2007-10-30 + count = read(powerfd, powerdata, numVectors * sizeof(double)); + if (count != numVectors * sizeof(double)) { + error("powerfd read failed", "double", "read"); + } + } +} + // initialize auxillary track data from filesystem // pre-conditions: // dbH->flags & O2_FLAG_LARGE_ADB
--- a/insert.cpp Fri Dec 05 22:32:43 2008 +0000 +++ b/insert.cpp Fri Dec 05 22:32:49 2008 +0000 @@ -168,50 +168,125 @@ return(dbH->numFiles < maxfiles); } -void audioDB::insertDatum(const char *inFile, std::ifstream *timesFile, int powerfd, const char *key) { +int audiodb_insert(adb_t *adb, adb_insert_t *insert) { adb_datum_t datum; - int fd; - struct stat statbuf; + int fd = 0; + FILE *file = NULL; + struct stat st; off_t size; int err; - - datum.times = 0; - datum.power = 0; - - if((fd = open(inFile, O_RDONLY)) == -1) { - error("failed to open input file", inFile); + + datum.data = NULL; + datum.power = NULL; + datum.times = NULL; + if((fd = open(insert->features, O_RDONLY)) == -1) { + goto error; } - if(fstat(fd, &statbuf)) { - error("failed to stat input file", inFile); + if(fstat(fd, &st)) { + goto error; } read(fd, &(datum.dim), sizeof(uint32_t)); - size = statbuf.st_size - sizeof(uint32_t); + size = st.st_size - sizeof(uint32_t); datum.nvectors = size / (sizeof(double) * datum.dim); datum.data = (double *) malloc(size); if(!datum.data) { - error("failed to allocate memory"); + goto error; } read(fd, datum.data, size); close(fd); - if(timesFile) { - datum.times = (double *) malloc(sizeof(double) * datum.nvectors * 2); + fd = 0; + if(insert->power) { + int dim; + if((fd = open(insert->power, O_RDONLY)) == -1) { + goto error; + } + if(fstat(fd, &st)) { + goto error; + } + if((st.st_size - sizeof(uint32_t)) != (size / datum.dim)) { + goto error; + } + read(fd, &dim, sizeof(uint32_t)); + if(dim != 1) { + goto error; + } + datum.power = (double *) malloc(size / datum.dim); + if(!datum.power) { + goto error; + } + read(fd, datum.power, size / datum.dim); + close(fd); + } + if(insert->times) { + double t, *tp; + if(!(file = fopen(insert->times, "r"))) { + goto error; + } + datum.times = (double *) malloc(2 * size / datum.dim); if(!datum.times) { - error("failed to allocate memory"); + goto error; } - insertTimeStamps(datum.nvectors, timesFile, datum.times); + if(fscanf(file, " %lf", &t) != 1) { + goto error; + } + tp = datum.times; + *tp++ = t; + for(unsigned int n = 0; n < datum.nvectors - 1; n++) { + if(fscanf(file, " %lf", &t) != 1) { + goto error; + } + *tp++ = t; + *tp++ = t; + } + if(fscanf(file, " %lf", &t) != 1) { + goto error; + } + *tp = t; + fclose(file); } - if(powerfd) { - datum.power = (double *) malloc(sizeof(double) * datum.nvectors); - if(!datum.power) { - error("failed to allocate memory"); + datum.key = insert->key ? insert->key : insert->features; + err = audiodb_insert_datum(adb, &datum); + free(datum.data); + if(datum.power) { + free(datum.power); + } + if(datum.times) { + free(datum.times); + } + if(err == 2) { + return 0; + } + else { + return err; + } + + error: + if(fd > 0) { + close(fd); + } + if(file) { + fclose(file); + } + if(datum.data) { + free(datum.data); + } + if(datum.power) { + free(datum.power); + } + if(datum.times) { + free(datum.times); + } + return 1; +} + +int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) { + int err; + for(unsigned int n = 0; n < size; n++) { + if((err = audiodb_insert(adb, &(insert[n])))) { + return err; } - insertPowerData(datum.nvectors, powerfd, datum.power); } - datum.key = key ? key : inFile; - err = audiodb_insert_datum(adb, &datum); - if(err && (err != 2)) { - error("failed to insert data for file", inFile); - } + return 0; } void audioDB::insert(const char* dbName, const char* inFile) { @@ -224,70 +299,20 @@ } else { /* at this point, we have powerfd (an fd), timesFile (a - * std::ifstream *) and inFile (a char *). Wacky, huh? */ - insertDatum(inFile, timesFile, powerfd, key); + * std::ifstream *) and inFile (a char *). Wacky, huh? Ignore + * the wackiness and just use the names. */ + adb_insert_t insert; + insert.features = inFile; + insert.times = timesFileName; + insert.power = powerFileName; + insert.key = key; + if(audiodb_insert(adb, &insert)) { + error("insertion failure", inFile); + } } status(dbName); } -void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) { - assert(usingTimes); - - unsigned numtimes = 0; - - if(!timesFile->is_open()) { - error("problem opening times file on timestamped database", timesFileName); - } - - double timepoint, next; - *timesFile >> timepoint; - if (timesFile->eof()) { - error("no entries in times file", timesFileName); - } - numtimes++; - do { - *timesFile >> next; - if (timesFile->eof()) { - break; - } - numtimes++; - timesdata[0] = timepoint; - timepoint = (timesdata[1] = next); - timesdata += 2; - } while (numtimes < numVectors + 1); - - if (numtimes < numVectors + 1) { - error("too few timepoints in times file", timesFileName); - } - - *timesFile >> next; - if (!timesFile->eof()) { - error("too many timepoints in times file", timesFileName); - } -} - -void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) { - if(usingPower){ - int one; - unsigned int count; - - count = read(powerfd, &one, sizeof(unsigned int)); - if (count != sizeof(unsigned int)) { - error("powerfd read failed", "int", "read"); - } - if (one != 1) { - error("dimensionality of power file not 1", powerFileName); - } - - // FIXME: should check that the powerfile is the right size for - // this. -- CSR, 2007-10-30 - count = read(powerfd, powerdata, numVectors * sizeof(double)); - if (count != numVectors * sizeof(double)) { - error("powerfd read failed", "double", "read"); - } - } -} - void audioDB::batchinsert(const char* dbName, const char* inFile) { forWrite = true; initDBHeader(dbName); @@ -302,8 +327,6 @@ key=inFile; std::ifstream *filesIn = 0; std::ifstream *keysIn = 0; - std::ifstream* thisTimesFile = 0; - int thispowerfd = 0; if(!(filesIn = new std::ifstream(inFile))) error("Could not open batch in file", inFile); @@ -341,22 +364,20 @@ if(timesFile->eof()) { error("not enough timestamp files in timesList", timesFileName); } - thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in); } if (usingPower) { if(powerFile->eof()) { error("not enough power files in powerList", powerFileName); } - thispowerfd = open(thisPowerFileName, O_RDONLY); - if (thispowerfd < 0) { - error("failed to open power file", thisPowerFileName); - } } - insertDatum(thisFile, thisTimesFile, thispowerfd, thisKey); - if(thisTimesFile) - delete thisTimesFile; - if(thispowerfd) - close(thispowerfd); + adb_insert_t insert; + insert.features = thisFile; + insert.times = usingTimes ? thisTimesFileName : NULL; + insert.power = usingPower ? thisPowerFileName : NULL; + insert.key = thisKey; + if(audiodb_insert(adb, &insert)) { + error("insertion failure", thisFile); + } } while(!filesIn->eof()); VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
--- a/query.cpp Fri Dec 05 22:32:43 2008 +0000 +++ b/query.cpp Fri Dec 05 22:32:49 2008 +0000 @@ -242,6 +242,42 @@ CHECKED_READ(trkfid, *data_buffer_p, trackTable[track] * sizeof(double) * dbH->dim); } +void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) { + assert(usingTimes); + + unsigned numtimes = 0; + + if(!timesFile->is_open()) { + error("problem opening times file on timestamped database", timesFileName); + } + + double timepoint, next; + *timesFile >> timepoint; + if (timesFile->eof()) { + error("no entries in times file", timesFileName); + } + numtimes++; + do { + *timesFile >> next; + if (timesFile->eof()) { + break; + } + numtimes++; + timesdata[0] = timepoint; + timepoint = (timesdata[1] = next); + timesdata += 2; + } while (numtimes < numVectors + 1); + + if (numtimes < numVectors + 1) { + error("too few timepoints in times file", timesFileName); + } + + *timesFile >> next; + if (!timesFile->eof()) { + error("too many timepoints in times file", timesFileName); + } +} + // These names deserve some unpicking. The names starting with a "q" // are pointers to the query, norm and power vectors; the names // starting with "v" are things that will end up pointing to the