Mercurial > hg > audiodb
changeset 401:a8a5f2ca5380 api-inversion
Invert audioDB::l2norm / audiodb_l2norm()
We now have some functions that shouldn't be exported to the user of the
library, but are used in more than one source file; case in point:
audiodb_sync_header(). Make a new audioDB-internals.h file for them,
and while we're at it put the scary mmap()-related macros in there too.
We can't delete audioDB::unitNormAndInsertL2() quite yet, because it's
used in insertion too, but we can delete the non-append branches.
That's not very much code to lose, but every little helps.
author | mas01cr |
---|---|
date | Wed, 03 Dec 2008 14:53:20 +0000 |
parents | 8c7453fb5bd9 |
children | 58b88ab69424 |
files | Makefile audioDB-internals.h audioDB.cpp audioDB.h audioDB_API.h dump.cpp insert.cpp l2norm.cpp power.cpp |
diffstat | 9 files changed, 133 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Thu Nov 27 16:22:52 2008 +0000 +++ b/Makefile Wed Dec 03 14:53:20 2008 +0000 @@ -8,7 +8,7 @@ SHARED_LIB_FLAGS=-shared -Wl,-soname, -LIBOBJS=insert.o create.o common.o open.o close.o status.o dump.o power.o liszt.o query.o sample.o index.o lshlib.o cmdline.o +LIBOBJS=insert.o create.o common.o open.o close.o status.o dump.o power.o l2norm.o liszt.o query.o sample.o index.o lshlib.o cmdline.o OBJS=$(LIBOBJS) soap.o audioDB.o
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/audioDB-internals.h Wed Dec 03 14:53:20 2008 +0000 @@ -0,0 +1,40 @@ +/* We could go gcc-specific here and use typeof() instead of passing + * in an explicit type. Answers on a postcard as to whether that's a + * good plan or not. */ +#define mmap_or_goto_error(type, var, start, length) \ + { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \ + if(tmp == (void *) -1) { \ + goto error; \ + } \ + var = (type) tmp; \ + } + +#define maybe_munmap(table, length) \ + { if(table) { \ + munmap(table, length); \ + } \ + } + +static inline int audiodb_sync_header(adb_t *adb) { + off_t pos; + pos = lseek(adb->fd, (off_t) 0, SEEK_CUR); + if(pos == (off_t) -1) { + goto error; + } + if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) { + goto error; + } + if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) { + goto error; + } + + /* can be fsync() if fdatasync() is racily exciting and new */ + fdatasync(adb->fd); + if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) { + goto error; + } + return 0; + + error: + return 1; +}
--- a/audioDB.cpp Thu Nov 27 16:22:52 2008 +0000 +++ b/audioDB.cpp Wed Dec 03 14:53:20 2008 +0000 @@ -757,17 +757,14 @@ } void audioDB::l2norm(const char* dbName) { - forWrite = true; - initTables(dbName, 0); - if( !(dbH->flags & O2_FLAG_LARGE_ADB ) && (dbH->length>0) ){ - /* FIXME: should probably be uint64_t */ - unsigned numVectors = dbH->length/(sizeof(double)*dbH->dim); - CHECKED_MMAP(double *, dataBuf, dbH->dataOffset, dataBufLength); - unitNormAndInsertL2(dataBuf, dbH->dim, numVectors, 0); // No append + if(!adb) { + if(!(adb = audiodb_open(dbName, O_RDWR))) { + error("Failed to open database file", dbName); + } } - // Update database flags - dbH->flags = dbH->flags|O2_FLAG_L2NORM; - memcpy (db, dbH, O2_HEADERSIZE); + if(audiodb_l2norm(adb)) { + error("failed to turn on l2norm flag for database", dbName); + } } void audioDB::power_flag(const char *dbName) { @@ -806,16 +803,13 @@ /* FIXME: in fact this does not unit norm a block of features, it just records the L2 norms somewhere. unitNorm() does in fact unit norm a block of features. */ -void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append=0){ +void audioDB::unitNormAndInsertL2(double* X, unsigned dim, unsigned n){ unsigned d; double *p; unsigned nn = n; assert(l2normTable); - if( !(dbH->flags & O2_FLAG_LARGE_ADB) && !append && (dbH->flags & O2_FLAG_L2NORM) ) - error("Database is already L2 normed", "automatic norm on insert is enabled"); - VERB_LOG(2, "norming %u vectors...", n); double* l2buf = new double[n]; @@ -835,15 +829,12 @@ X+=dim; } unsigned offset; - if(append) { - // FIXME: a hack, a very palpable hack: the vectors have already - // been inserted, and dbH->length has already been updated. We - // need to subtract off again the number of vectors that we've - // inserted this time... - offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors - } else { - offset=0; - } + + // FIXME: a hack, a very palpable hack: the vectors have already + // been inserted, and dbH->length has already been updated. We + // need to subtract off again the number of vectors that we've + // inserted this time... + offset=(dbH->length/(dbH->dim*sizeof(double)))-n; // number of vectors memcpy(l2normTable+offset, l2buf, n*sizeof(double)); if(l2buf) delete[] l2buf; @@ -1140,20 +1131,5 @@ return apierror; } - - int audiodb_l2norm(adb_ptr mydb){ - - const char *argv[5]; - int apierror=0; - - argv[0]="audioDB"; - argv[1]="--L2NORM"; - argv[2]="-d"; - argv[3]=mydb->path; - argv[4]='\0'; - - audioDB::audioDB(4,argv,&apierror,mydb); - return apierror; - } }
--- a/audioDB.h Thu Nov 27 16:22:52 2008 +0000 +++ b/audioDB.h Wed Dec 03 14:53:20 2008 +0000 @@ -336,7 +336,7 @@ void initTables(const char* dbName, const char* inFile = 0); void initTablesFromKey(const char* dbName, const Uns32T queryIndex); void unitNorm(double* X, unsigned d, unsigned n, double* qNorm); - void unitNormAndInsertL2(double* X, unsigned dim, unsigned n, unsigned append); + void unitNormAndInsertL2(double* X, unsigned dim, unsigned n); void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata); void insertPowerData(unsigned n, int powerfd, double *powerdata); unsigned getKeyPos(char* key);
--- a/audioDB_API.h Thu Nov 27 16:22:52 2008 +0000 +++ b/audioDB_API.h Wed Dec 03 14:53:20 2008 +0000 @@ -5,33 +5,13 @@ * Christophe Rhodes c.rhodes@gold.ac.uk * Ian Knopke mas01ik@gold.ac.uk, ian.knopke@gmail.com */ - -/*******************************************************************/ -/* Data types for API */ - /* Temporary workarounds */ typedef struct dbTableHeader adb_header_t; int acquire_lock(int, bool); -/* Internal Macros: make sure they end up in the private version of - this header. */ -/* We could go gcc-specific here and use typeof() instead of passing - * in an explicit type. Answers on a postcard as to whether that's a - * good plan or not. */ -#define mmap_or_goto_error(type, var, start, length) \ - { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \ - if(tmp == (void *) -1) { \ - goto error; \ - } \ - var = (type) tmp; \ - } - -#define maybe_munmap(table, length) \ - { if(table) { \ - munmap(table, length); \ - } \ - } +/*******************************************************************/ +/* Data types for API */ /* The main struct that stores the name of the database, and in future will hold all * kinds of other interesting information */ @@ -139,5 +119,3 @@ /* varoius dump formats */ int audiodb_dump(adb_ptr mydb, const char *outputdir); - -
--- a/dump.cpp Thu Nov 27 16:22:52 2008 +0000 +++ b/dump.cpp Wed Dec 03 14:53:20 2008 +0000 @@ -1,6 +1,7 @@ #include "audioDB.h" extern "C" { #include "audioDB_API.h" +#include "audioDB-internals.h" } int audiodb_dump(adb_t *adb, const char *output) {
--- a/insert.cpp Thu Nov 27 16:22:52 2008 +0000 +++ b/insert.cpp Wed Dec 03 14:53:20 2008 +0000 @@ -102,7 +102,7 @@ // Norm the vectors on input if the database is already L2 normed if(dbH->flags & O2_FLAG_L2NORM) - unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append + unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors); // Report status status(dbName); @@ -319,7 +319,7 @@ // Norm the vectors on input if the database is already L2 normed if(dbH->flags & O2_FLAG_L2NORM) - unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append + unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors); totalVectors+=numVectors;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/l2norm.cpp Wed Dec 03 14:53:20 2008 +0000 @@ -0,0 +1,71 @@ +#include "audioDB.h" +extern "C" { +#include "audioDB_API.h" +#include "audioDB-internals.h" +} + +static int audiodb_l2norm_existing(adb_t *adb) { + double *data_buffer, *l2norm_buffer; + double *dp, *lp; + adb_header_t *header = adb->header; + size_t data_buffer_size = ALIGN_PAGE_UP(header->length); + size_t nvectors = header->length / (sizeof(double) * header->dim); + /* FIXME: this map of the vector data will lose if we ever turn the + * l2norm flag on when we have already inserted a large number of + * vectors, as the mmap() will fail. "Don't do that, then" is one + * possible answer. */ + mmap_or_goto_error(double *, data_buffer, header->dataOffset, data_buffer_size); + l2norm_buffer = (double *) malloc(nvectors * sizeof(double)); + if(!l2norm_buffer) { + goto error; + } + + dp = data_buffer; + lp = l2norm_buffer; + for(size_t i = 0; i < nvectors; i++) { + *lp = 0; + for(unsigned int k = 0; k < header->dim; k++) { + *lp += (*dp)*(*dp); + dp++; + } + lp++; + } + + if(lseek(adb->fd, adb->header->l2normTableOffset, SEEK_SET) == (off_t) -1) { + goto error; + } + if(write(adb->fd, l2norm_buffer, nvectors * sizeof(double)) != (ssize_t) (nvectors * sizeof(double))) { + goto error; + } + + munmap(data_buffer, data_buffer_size); + free(l2norm_buffer); + + return 0; + + error: + maybe_munmap(data_buffer, data_buffer_size); + if(l2norm_buffer) { + free(l2norm_buffer); + } + return 1; +} + +int audiodb_l2norm(adb_t *adb) { + adb_header_t *header = adb->header; + if(header->flags & O2_FLAG_L2NORM) { + /* non-error code for forthcoming backwards-compatibility + * reasons */ + return 0; + } + if((!(header->flags & O2_FLAG_LARGE_ADB)) && (header->length > 0)) { + if(audiodb_l2norm_existing(adb)) { + goto error; + } + } + adb->header->flags |= O2_FLAG_L2NORM; + return audiodb_sync_header(adb); + + error: + return 1; +}
--- a/power.cpp Thu Nov 27 16:22:52 2008 +0000 +++ b/power.cpp Wed Dec 03 14:53:20 2008 +0000 @@ -1,31 +1,7 @@ #include "audioDB.h" extern "C" { #include "audioDB_API.h" -} - -/* FIXME: we should not export this symbol to users of the library. */ -int audiodb_sync_header(adb_t *adb) { - off_t pos; - pos = lseek(adb->fd, (off_t) 0, SEEK_CUR); - if(pos == (off_t) -1) { - goto error; - } - if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) { - goto error; - } - if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) { - goto error; - } - - /* can be fsync() if fdatasync() is racily exciting and new */ - fdatasync(adb->fd); - if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) { - goto error; - } - return 0; - - error: - return 1; +#include "audioDB-internals.h" } int audiodb_power(adb_t *adb) {