Mercurial > hg > audiodb
view create.cpp @ 405:ef4792df8f93 api-inversion
invert audioDB::insert / audiodb_insert().
Start off by removing audioDB::insertDatum, and essentially reusing it
as audiodb_insert. We now ignore the fact that the command-line parsing
code has "helpfully" opened a std::ifstream for the times file and an fd
for the power file, and simply go ahead and do our own dirty work.
We can delete audioDB::insertDatum entirely, but unfortunately we can't
delete audioDB::insertPowerData and audioDB::insertTimestamps, because
the index and query code respectively use them. Instead, move the two
methods closer to their single uses.
audiodb_insert() is perhaps not as short and simple as it might have
been hoped given the existence of audiodb_insert_datum(); some of that
is C and its terribly way of making you pay every time you use dynamic
memory; some of it is the fact that the three different files (feature,
times, power) each requires slightly different treatment. Hey ho.
We can implement audiodb_batchinsert() in terms of audiodb_insert(); the
function is pleasingly small. We can't quite use it for
audioDB::batchinsert yet, as we have to deal with the O2_FLAG_LARGE_ADB
case (which codepath is untested in libtests/).
This means that we can delete whole swathes of hideous code from
audioDB.cpp, including not just the versions of audiodb_insert() and
audiodb_batchinsert() but also an entire audioDB constructor. Yay.
(audioDB::unitNormAndInsertL2 has also died a deserved death).
author | mas01cr |
---|---|
date | Fri, 05 Dec 2008 22:32:49 +0000 |
parents | 78fed0d4c108 |
children | a82a2d9b2451 |
line wrap: on
line source
#include "audioDB.h" extern "C" { #include "audioDB_API.h" } /* Make a new database. IF size(featuredata) < O2_LARGE_ADB_SIZE The database consists of: * a header (see dbTableHeader struct definition); * keyTable: list of keys of tracks; * trackTable: Maps implicit feature index to a feature vector matrix (sizes of tracks) * featureTable: Lots of doubles; * timesTable: (start,end) time points for each feature vector; * powerTable: associated power for each feature vector; * l2normTable: squared l2norms for each feature vector. ELSE the database consists of: * a header (see dbTableHeader struct definition); * keyTable: list of keys of tracks * trackTable: sizes of tracks * featureTable: list of feature file names * timesTable: list of times file names * powerTable: list of power file names */ extern "C" { adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) { int fd; adb_header_t *header = 0; off_t databytes, auxbytes; if(datasize == 0) { datasize = O2_DEFAULT_DATASIZE; } if(ntracks == 0) { ntracks = O2_DEFAULT_NTRACKS; } if(datadim == 0) { datadim = O2_DEFAULT_DATADIM; } if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } if (acquire_lock(fd, true)) { goto error; } header = (adb_header_t *) malloc(sizeof(adb_header_t)); if(!header) { goto error; } // Initialize header header->magic = O2_MAGIC; header->version = O2_FORMAT_VERSION; header->numFiles = 0; header->dim = 0; header->flags = 0; header->headerSize = O2_HEADERSIZE; header->length = 0; header->fileTableOffset = ALIGN_PAGE_UP(O2_HEADERSIZE); header->trackTableOffset = ALIGN_PAGE_UP(header->fileTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); header->dataOffset = ALIGN_PAGE_UP(header->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks); databytes = ((off_t) datasize) * 1024 * 1024; auxbytes = databytes / datadim; /* FIXME: what's going on here? There are two distinct preprocessor constants (O2_LSH_N_POINT_BITS, LSH_N_POINT_BITS); a third is presumably some default (O2_DEFAULT_LSH_N_POINT_BITS), and then there's this magic 28 bits. Should this really be part of the flags structure at all? Putting it elsewhere will of course break backwards compatibility, unless 14 is the only value that's been used anywhere... */ // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header // If this value is 0 then it will be set to 14 #if O2_LSH_N_POINT_BITS > 15 #error "AudioDB Compile ERROR: consistency check of O2_LSH_POINT_BITS failed (>15)" #endif header->flags |= LSH_N_POINT_BITS << 28; // If database will fit in a single file the vectors are copied into the AudioDB instance // Else all the vectors are left on the FileSystem and we use the dataOffset as storage // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable) if(ntracks<O2_LARGE_ADB_NTRACKS && datasize<O2_LARGE_ADB_SIZE){ header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + databytes); header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + 2*auxbytes); header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + auxbytes); header->dbSize = ALIGN_PAGE_UP(header->l2normTableOffset + auxbytes); } else { // Create LARGE_ADB, features and powers kept on filesystem header->flags |= O2_FLAG_LARGE_ADB; header->timesTableOffset = ALIGN_PAGE_UP(header->dataOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); header->powerTableOffset = ALIGN_PAGE_UP(header->timesTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); header->l2normTableOffset = ALIGN_PAGE_UP(header->powerTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks); header->dbSize = header->l2normTableOffset; } if (write(fd, header, O2_HEADERSIZE) != O2_HEADERSIZE) { goto error; } // go to the location corresponding to the last byte if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) { goto error; } // write a dummy byte at the last location if (write (fd, "", 1) != 1) { goto error; } free(header); return audiodb_open(path, O_RDWR); error: if(header) { free(header); } return NULL; } }