annotate create.cpp @ 524:469b50a3dd84 multiprobeLSH

Fixed a bug in LSH hashtable writing to disk that doesn't always sort the t2 entries into strict weak ordering. Now it does. Lots of debugging informational code inserted.
author mas01mc
date Wed, 28 Jan 2009 16:02:17 +0000
parents cc2b97d020b1
children 06409b6e268f
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@239 6 /* Make a new database.
mas01cr@239 7
mas01cr@509 8 (FIXME: this text, in particular the conditional, will not be true
mas01cr@509 9 once we implement create flags rather than defaulting on format based
mas01cr@509 10 on the requested size arguments)
mas01cr@509 11
mas01cr@509 12 IF size(featuredata) < ADB_FIXME_LARGE_ADB_SIZE
mas01cr@239 13 The database consists of:
mas01cr@239 14
mas01cr@509 15 * a header (see adb_header_t definition);
mas01cr@239 16 * keyTable: list of keys of tracks;
mas01cr@239 17 * trackTable: Maps implicit feature index to a feature vector
mas01cr@239 18 matrix (sizes of tracks)
mas01cr@239 19 * featureTable: Lots of doubles;
mas01cr@239 20 * timesTable: (start,end) time points for each feature vector;
mas01cr@239 21 * powerTable: associated power for each feature vector;
mas01cr@239 22 * l2normTable: squared l2norms for each feature vector.
mas01cr@498 23
mas01mc@324 24 ELSE the database consists of:
mas01cr@498 25
mas01cr@509 26 * a header (see adb_header_t definition);
mas01mc@324 27 * keyTable: list of keys of tracks
mas01mc@324 28 * trackTable: sizes of tracks
mas01mc@324 29 * featureTable: list of feature file names
mas01mc@324 30 * timesTable: list of times file names
mas01mc@324 31 * powerTable: list of power file names
mas01mc@324 32
mas01cr@239 33 */
mas01cr@239 34
mas01cr@498 35 adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
mas01cr@498 36 int fd;
mas01cr@498 37 adb_header_t *header = 0;
mas01cr@498 38 off_t databytes, auxbytes;
mas01cr@498 39 if(datasize == 0) {
mas01cr@509 40 datasize = ADB_DEFAULT_DATASIZE;
mas01cr@498 41 }
mas01cr@498 42 if(ntracks == 0) {
mas01cr@509 43 ntracks = ADB_DEFAULT_NTRACKS;
mas01cr@498 44 }
mas01cr@498 45 if(datadim == 0) {
mas01cr@509 46 datadim = ADB_DEFAULT_DATADIM;
mas01cr@498 47 }
mas01cr@239 48
mas01cr@498 49 if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 50 goto error;
mas01cr@498 51 }
mas01cr@498 52 if (acquire_lock(fd, true)) {
mas01cr@498 53 goto error;
mas01cr@498 54 }
mas01cr@239 55
mas01cr@498 56 header = (adb_header_t *) malloc(sizeof(adb_header_t));
mas01cr@498 57 if(!header) {
mas01cr@498 58 goto error;
mas01cr@498 59 }
mas01cr@239 60
mas01cr@239 61 // Initialize header
mas01cr@509 62 header->magic = ADB_MAGIC;
mas01cr@509 63 header->version = ADB_FORMAT_VERSION;
mas01cr@498 64 header->numFiles = 0;
mas01cr@498 65 header->dim = 0;
mas01cr@498 66 header->flags = 0;
mas01cr@509 67 header->headerSize = ADB_HEADER_SIZE;
mas01cr@498 68 header->length = 0;
mas01cr@509 69 header->fileTableOffset = align_page_up(ADB_HEADER_SIZE);
mas01cr@509 70 header->trackTableOffset = align_page_up(header->fileTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks); //
mas01cr@509 71 header->dataOffset = align_page_up(header->trackTableOffset + ADB_TRACKTABLE_ENTRY_SIZE*ntracks);
mas01cr@256 72
mas01cr@498 73 databytes = ((off_t) datasize) * 1024 * 1024;
mas01cr@498 74 auxbytes = databytes / datadim;
mas01cr@498 75
mas01mc@324 76 // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header
mas01mc@324 77 // If this value is 0 then it will be set to 14
mas01mc@324 78
mas01cr@509 79 #if ADB_FIXME_LSH_N_POINT_BITS > 15
mas01cr@509 80 #error "consistency check of ADB_FIXME_LSH_N_POINT_BITS failed (>31)"
mas01cr@509 81 #endif
mas01cr@498 82
mas01cr@509 83 header->flags |= ADB_FIXME_LSH_N_POINT_BITS << 28;
mas01mc@324 84
mas01mc@324 85 // If database will fit in a single file the vectors are copied into the AudioDB instance
mas01mc@324 86 // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
mas01mc@324 87 // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
mas01cr@509 88 if(ntracks < ADB_FIXME_LARGE_ADB_NTRACKS && datasize < ADB_FIXME_LARGE_ADB_SIZE) {
mas01cr@509 89 header->timesTableOffset = align_page_up(header->dataOffset + databytes);
mas01cr@509 90 header->powerTableOffset = align_page_up(header->timesTableOffset + 2*auxbytes);
mas01cr@509 91 header->l2normTableOffset = align_page_up(header->powerTableOffset + auxbytes);
mas01cr@509 92 header->dbSize = align_page_up(header->l2normTableOffset + auxbytes);
mas01cr@509 93 } else { // Create REFERENCES ADB, features and powers kept on filesystem
mas01cr@509 94 header->flags |= ADB_HEADER_FLAG_REFERENCES;
mas01cr@509 95 header->timesTableOffset = align_page_up(header->dataOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@509 96 header->powerTableOffset = align_page_up(header->timesTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@509 97 header->l2normTableOffset = align_page_up(header->powerTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@498 98 header->dbSize = header->l2normTableOffset;
mas01mc@324 99 }
mas01cr@239 100
mas01cr@509 101 write_or_goto_error(fd, header, ADB_HEADER_SIZE);
mas01cr@239 102
mas01cr@239 103 // go to the location corresponding to the last byte
mas01cr@498 104 if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
mas01cr@498 105 goto error;
mas01cr@498 106 }
mas01cr@239 107
mas01cr@239 108 // write a dummy byte at the last location
mas01cr@498 109 write_or_goto_error(fd, "", 1);
mas01cr@239 110
mas01cr@498 111 free(header);
mas01cr@498 112 return audiodb_open(path, O_RDWR);
mas01cr@498 113
mas01cr@498 114 error:
mas01cr@498 115 if(header) {
mas01cr@498 116 free(header);
mas01cr@498 117 }
mas01cr@498 118 return NULL;
mas01cr@239 119 }