annotate create.cpp @ 755:37c2b9cce23a multiprobeLSH

Adding mkc_lsh_update branch, trunk candidate with improved LSH: merged trunk 1095 and branch multiprobe_lsh
author mas01mc
date Thu, 25 Nov 2010 13:42:40 +0000
parents 06409b6e268f
children 4eedc18634f5
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@239 6 /* Make a new database.
mas01cr@239 7
mas01cr@509 8 (FIXME: this text, in particular the conditional, will not be true
mas01cr@509 9 once we implement create flags rather than defaulting on format based
mas01cr@509 10 on the requested size arguments)
mas01cr@509 11
mas01cr@509 12 IF size(featuredata) < ADB_FIXME_LARGE_ADB_SIZE
mas01cr@239 13 The database consists of:
mas01cr@239 14
mas01cr@509 15 * a header (see adb_header_t definition);
mas01cr@239 16 * keyTable: list of keys of tracks;
mas01cr@239 17 * trackTable: Maps implicit feature index to a feature vector
mas01cr@239 18 matrix (sizes of tracks)
mas01cr@239 19 * featureTable: Lots of doubles;
mas01cr@239 20 * timesTable: (start,end) time points for each feature vector;
mas01cr@239 21 * powerTable: associated power for each feature vector;
mas01cr@239 22 * l2normTable: squared l2norms for each feature vector.
mas01cr@498 23
mas01mc@324 24 ELSE the database consists of:
mas01cr@498 25
mas01cr@509 26 * a header (see adb_header_t definition);
mas01mc@324 27 * keyTable: list of keys of tracks
mas01mc@324 28 * trackTable: sizes of tracks
mas01mc@324 29 * featureTable: list of feature file names
mas01mc@324 30 * timesTable: list of times file names
mas01mc@324 31 * powerTable: list of power file names
mas01mc@324 32
mas01cr@239 33 */
mas01cr@239 34
mas01cr@498 35 adb_t *audiodb_create(const char *path, unsigned datasize, unsigned ntracks, unsigned datadim) {
mas01cr@498 36 int fd;
mas01cr@498 37 adb_header_t *header = 0;
mas01cr@498 38 off_t databytes, auxbytes;
mas01cr@498 39 if(datasize == 0) {
mas01cr@509 40 datasize = ADB_DEFAULT_DATASIZE;
mas01cr@498 41 }
mas01cr@498 42 if(ntracks == 0) {
mas01cr@509 43 ntracks = ADB_DEFAULT_NTRACKS;
mas01cr@498 44 }
mas01cr@498 45 if(datadim == 0) {
mas01cr@509 46 datadim = ADB_DEFAULT_DATADIM;
mas01cr@498 47 }
mas01cr@239 48
mas01cr@498 49 if ((fd = open(path, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 50 goto error;
mas01cr@498 51 }
mas01cr@498 52 if (acquire_lock(fd, true)) {
mas01cr@498 53 goto error;
mas01cr@498 54 }
mas01cr@239 55
mas01cr@498 56 header = (adb_header_t *) malloc(sizeof(adb_header_t));
mas01cr@498 57 if(!header) {
mas01cr@498 58 goto error;
mas01cr@498 59 }
mas01cr@239 60
mas01cr@239 61 // Initialize header
mas01cr@509 62 header->magic = ADB_MAGIC;
mas01cr@509 63 header->version = ADB_FORMAT_VERSION;
mas01cr@498 64 header->numFiles = 0;
mas01cr@498 65 header->dim = 0;
mas01cr@498 66 header->flags = 0;
mas01cr@509 67 header->headerSize = ADB_HEADER_SIZE;
mas01cr@498 68 header->length = 0;
mas01cr@509 69 header->fileTableOffset = align_page_up(ADB_HEADER_SIZE);
mas01cr@509 70 header->trackTableOffset = align_page_up(header->fileTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks); //
mas01cr@509 71 header->dataOffset = align_page_up(header->trackTableOffset + ADB_TRACKTABLE_ENTRY_SIZE*ntracks);
mas01cr@256 72
mas01cr@498 73 databytes = ((off_t) datasize) * 1024 * 1024;
mas01cr@498 74 auxbytes = databytes / datadim;
mas01cr@498 75
mas01mc@324 76 // If database will fit in a single file the vectors are copied into the AudioDB instance
mas01mc@324 77 // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
mas01mc@324 78 // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
mas01cr@509 79 if(ntracks < ADB_FIXME_LARGE_ADB_NTRACKS && datasize < ADB_FIXME_LARGE_ADB_SIZE) {
mas01cr@509 80 header->timesTableOffset = align_page_up(header->dataOffset + databytes);
mas01cr@509 81 header->powerTableOffset = align_page_up(header->timesTableOffset + 2*auxbytes);
mas01cr@509 82 header->l2normTableOffset = align_page_up(header->powerTableOffset + auxbytes);
mas01cr@509 83 header->dbSize = align_page_up(header->l2normTableOffset + auxbytes);
mas01cr@509 84 } else { // Create REFERENCES ADB, features and powers kept on filesystem
mas01cr@509 85 header->flags |= ADB_HEADER_FLAG_REFERENCES;
mas01cr@509 86 header->timesTableOffset = align_page_up(header->dataOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@509 87 header->powerTableOffset = align_page_up(header->timesTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@509 88 header->l2normTableOffset = align_page_up(header->powerTableOffset + ADB_FILETABLE_ENTRY_SIZE*ntracks);
mas01cr@498 89 header->dbSize = header->l2normTableOffset;
mas01mc@324 90 }
mas01cr@239 91
mas01cr@509 92 write_or_goto_error(fd, header, ADB_HEADER_SIZE);
mas01cr@239 93
mas01cr@239 94 // go to the location corresponding to the last byte
mas01cr@498 95 if (lseek (fd, header->dbSize - 1, SEEK_SET) == -1) {
mas01cr@498 96 goto error;
mas01cr@498 97 }
mas01cr@239 98
mas01cr@239 99 // write a dummy byte at the last location
mas01cr@498 100 write_or_goto_error(fd, "", 1);
mas01cr@239 101
mas01cr@498 102 free(header);
mas01cr@498 103 return audiodb_open(path, O_RDWR);
mas01cr@498 104
mas01cr@498 105 error:
mas01cr@498 106 if(header) {
mas01cr@498 107 free(header);
mas01cr@498 108 }
mas01cr@498 109 return NULL;
mas01cr@239 110 }