annotate insert.cpp @ 755:37c2b9cce23a multiprobeLSH

Adding mkc_lsh_update branch, trunk candidate with improved LSH: merged trunk 1095 and branch multiprobe_lsh
author mas01mc
date Thu, 25 Nov 2010 13:42:40 +0000
parents e8193805ce42
children
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@498 6 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
mas01cr@498 7 adb_header_t *header = adb->header;
mas01cr@509 8 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 9 return true;
mas01cr@498 10 } else {
mas01cr@498 11 /* FIXME: timesTableOffset isn't necessarily the next biggest
mas01cr@498 12 * offset after dataOffset. Maybe make the offsets into an array
mas01cr@498 13 * that we can iterate over... */
mas01cr@498 14 return (header->timesTableOffset >
mas01cr@498 15 (header->dataOffset + header->length + size));
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18
mas01cr@498 19 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
mas01cr@498 20 /* FIXME: the comment above about the ordering of the tables applies
mas01cr@498 21 here too. */
mas01cr@498 22 adb_header_t *header = adb->header;
mas01cr@498 23 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
mas01cr@498 24 off_t track_table_length = header->dataOffset - header->trackTableOffset;
mas01cr@509 25 int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 26 int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE;
mas01cr@498 27 /* maxfiles is the _minimum_ of the two. Do not be confused... */
mas01cr@498 28 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@509 29 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 30 /* by default, these tables are created with the same size as the
mas01cr@498 31 * fileTable (which should be called key_table); relying on that
mas01cr@498 32 * always being the case, though, smacks of optimism, so instead
mas01cr@498 33 * we code defensively... */
mas01cr@498 34 off_t data_table_length = header->timesTableOffset - header->dataOffset;
mas01cr@498 35 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
mas01cr@498 36 off_t power_table_length = header->dbSize - header->powerTableOffset;
mas01cr@509 37 int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 38 int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 39 int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 40 /* ... even though it means a certain amount of tedium. */
mas01cr@498 41 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
mas01cr@498 42 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
mas01cr@498 43 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
mas01cr@498 44 }
mas01cr@498 45 return (header->numFiles < (unsigned int) maxfiles);
mas01cr@498 46 }
mas01cr@498 47
mas01cr@498 48 /*
mas01cr@498 49 * Hey, look, a comment. Normally I wouldn't bother, as the code
mas01cr@498 50 * should be self-documenting, but a lot of logic is concentrated in
mas01cr@498 51 * this one place, so let's give an overview beforehand. To insert a
mas01cr@498 52 * datum into the database, we:
mas01cr@498 53 *
mas01cr@498 54 * 1. check write permission;
mas01cr@498 55 * 2. check for enough space;
mas01cr@498 56 * 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 57 * header dimension is zero, in which case write datum->dim to
mas01cr@498 58 * adb->header->dim).
mas01cr@498 59 * 4. check for presence of datum->key in adb->keymap;
mas01cr@509 60 * 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 61 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 62 * 6. write in data, power, times as appropriate; add to track
mas01cr@498 63 * and key tables too;
mas01cr@509 64 * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 65 * compute norms and fill in table;
mas01cr@498 66 * 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 67 * adb->track_offsets and adb->header;
mas01cr@498 68 * 9. sync adb->header with disk.
mas01cr@498 69 *
mas01cr@498 70 * Step 9 essentially commits the transaction; until we update
mas01cr@498 71 * header->length, nothing will recognize the newly-written data. In
mas01cr@498 72 * principle, if it fails, we should roll back, which we can in fact
mas01cr@498 73 * do on the assumption that nothing in step 8 can ever fail; on the
mas01cr@498 74 * other hand, if it's failed, then it's unlikely that rolling back by
mas01cr@498 75 * syncing the original header back to disk is going to work
mas01cr@498 76 * desperately well. We should perhaps take an operating-system lock
mas01cr@498 77 * around step 9, so that we can't be interrupted part-way through
mas01cr@498 78 * (except of course for SIGKILL, but if we're hit with that we will
mas01cr@498 79 * always lose).
mas01cr@498 80 */
mas01cr@498 81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
mas01cr@498 82
mas01cr@498 83 off_t size, offset, nfiles;
mas01cr@498 84 double *l2norm_buffer = NULL;
mas01cr@498 85
mas01cr@498 86 /* 1. check write permission; */
mas01cr@498 87 if(!(adb->flags & O_RDWR)) {
mas01cr@498 88 return 1;
mas01cr@498 89 }
mas01cr@498 90 /* 2. check for enough space; */
mas01cr@498 91 size = sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 92 if(!audiodb_enough_data_space_free(adb, size)) {
mas01cr@498 93 return 1;
mas01cr@498 94 }
mas01cr@498 95 if(!audiodb_enough_per_file_space_free(adb)) {
mas01cr@498 96 return 1;
mas01cr@498 97 }
mas01cr@498 98 /* 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 99 * header dimension is zero, in which case write datum->dim to
mas01cr@498 100 * adb->header->dim).
mas01cr@498 101 */
mas01cr@498 102 if(adb->header->dim == 0) {
mas01cr@498 103 adb->header->dim = datum->dim;
mas01cr@498 104 } else if (adb->header->dim != datum->dim) {
mas01cr@498 105 return 1;
mas01cr@498 106 }
mas01cr@498 107 /* 4. check for presence of datum->key in adb->keymap; */
mas01cr@498 108 if(adb->keymap->count(datum->key)) {
mas01cr@498 109 /* not part of an explicit API/ABI, but we need a distinguished
mas01cr@498 110 value in this circumstance to preserve somewhat wonky behaviour
mas01cr@498 111 of audioDB::batchinsert. */
mas01cr@498 112 return 2;
mas01cr@498 113 }
mas01cr@509 114 /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 115 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 116 */
mas01cr@509 117 if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) ||
mas01cr@509 118 ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) {
mas01cr@498 119 return 1;
mas01cr@498 120 }
mas01cr@509 121 if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 122 if(adb->header->numFiles == 0) {
mas01cr@509 123 adb->header->flags |= ADB_HEADER_FLAG_TIMES;
mas01cr@498 124 } else {
mas01cr@498 125 return 1;
mas01cr@239 126 }
mas01cr@509 127 } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) {
mas01cr@498 128 return 1;
mas01cr@498 129 }
mas01cr@498 130 /* 6. write in data, power, times as appropriate; add to track
mas01cr@498 131 * and key tables too;
mas01cr@498 132 */
mas01cr@498 133 offset = adb->header->length;
mas01cr@498 134 nfiles = adb->header->numFiles;
mas01cr@498 135
mas01cr@498 136 /* FIXME: checking for all these lseek()s */
mas01cr@509 137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
mas01cr@509 139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 140 write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE);
mas01cr@509 141 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 142 char cwd[PATH_MAX];
mas01cr@498 143 char slash = '/';
mas01cr@498 144
mas01cr@498 145 if(!getcwd(cwd, PATH_MAX)) {
mas01cr@498 146 goto error;
mas01cr@498 147 }
mas01cr@509 148 lseek(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 149 if(*((char *) datum->data) != '/') {
mas01cr@498 150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 151 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 152 }
mas01cr@498 153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
mas01cr@498 154 if(datum->power) {
mas01cr@509 155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 156 if(*((char *) datum->power) != '/') {
mas01cr@498 157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 158 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 159 }
mas01cr@498 160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
mas01cr@498 161 }
mas01cr@498 162 if(datum->times) {
mas01cr@509 163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 164 if(*((char *) datum->times) != '/') {
mas01cr@498 165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 166 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 167 }
mas01cr@498 168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
mas01cr@498 169 }
mas01cr@498 170 } else {
mas01cr@498 171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
mas01cr@498 172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
mas01cr@498 173 if(datum->power) {
mas01cr@498 174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
mas01cr@498 176 }
mas01cr@498 177 if(datum->times) {
mas01cr@498 178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
mas01cr@498 179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
mas01cr@498 180 }
mas01cr@498 181 }
mas01cr@498 182
mas01cr@509 183 /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 184 * compute norms and fill in table;
mas01cr@498 185 */
mas01cr@509 186 if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) &&
mas01cr@509 187 !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
mas01mc@324 189
mas01cr@498 190 audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer);
mas01cr@498 191 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 192 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
mas01cr@498 193 free(l2norm_buffer);
mas01cr@498 194 l2norm_buffer = NULL;
mas01cr@498 195 }
mas01cr@498 196
mas01cr@498 197 /* 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 198 * adb->track_offsets and adb->header;
mas01cr@498 199 */
mas01cr@498 200 adb->keys->push_back(datum->key);
mas01cr@498 201 (*adb->keymap)[datum->key] = adb->header->numFiles;
mas01cr@498 202 adb->track_lengths->push_back(datum->nvectors);
mas01cr@498 203 adb->track_offsets->push_back(offset);
mas01cr@498 204 adb->header->numFiles += 1;
mas01cr@498 205 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 206
mas01cr@498 207 /* 9. sync adb->header with disk. */
mas01cr@498 208 return audiodb_sync_header(adb);
mas01cr@498 209
mas01cr@498 210 error:
mas01cr@498 211 if(l2norm_buffer) {
mas01cr@498 212 free(l2norm_buffer);
mas01cr@498 213 }
mas01cr@498 214 return 1;
mas01cr@498 215 }
mas01cr@498 216
mas01cr@498 217 int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) {
mas01cr@509 218 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 219 return 1;
mas01cr@498 220 } else {
mas01cr@498 221 adb_datum_internal_t d;
mas01cr@498 222 d.nvectors = datum->nvectors;
mas01cr@498 223 d.dim = datum->dim;
mas01cr@498 224 d.key = datum->key;
mas01cr@498 225 d.data = datum->data;
mas01cr@498 226 d.times = datum->times;
mas01cr@498 227 d.power = datum->power;
mas01cr@498 228 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 229 }
mas01cr@498 230 }
mas01cr@498 231
mas01cr@498 232 int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) {
mas01cr@509 233 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 234 return 1;
mas01cr@498 235 } else {
mas01cr@498 236 adb_datum_internal_t d;
mas01cr@498 237 struct stat st;
mas01cr@498 238 int fd;
mas01cr@498 239 off_t size;
mas01mc@324 240
mas01cr@498 241 if((fd = open(reference->features, O_RDONLY)) == -1) {
mas01cr@498 242 return 1;
mas01cr@239 243 }
mas01cr@498 244 if(fstat(fd, &st)) {
mas01cr@498 245 goto error;
mas01cr@239 246 }
mas01cr@498 247 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
mas01cr@498 248 close(fd);
mas01cr@498 249 fd = 0;
mas01cr@498 250 size = st.st_size - sizeof(uint32_t);
mas01cr@498 251 d.nvectors = size / (sizeof(double) * d.dim);
mas01cr@498 252 d.data = (void *) reference->features;
mas01cr@498 253 if(reference->power) {
mas01cr@498 254 if(stat(reference->power, &st)) {
mas01cr@498 255 return 1;
mas01cr@498 256 }
mas01cr@498 257 }
mas01cr@498 258 d.power = (void *) reference->power;
mas01cr@498 259 if(reference->times) {
mas01cr@498 260 if(stat(reference->times, &st)) {
mas01cr@498 261 return 1;
mas01cr@498 262 }
mas01cr@498 263 }
mas01cr@498 264 d.times = (void *) reference->times;
mas01cr@498 265 d.key = reference->key ? reference->key : reference->features;
mas01cr@498 266 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 267 error:
mas01cr@498 268 if(fd) {
mas01cr@498 269 close(fd);
mas01cr@498 270 }
mas01cr@498 271 return 1;
mas01cr@498 272 }
mas01cr@498 273 }
mas01cr@498 274
mas01cr@498 275 int audiodb_free_datum(adb_datum_t *datum) {
mas01cr@498 276 if(datum->data) {
mas01cr@498 277 free(datum->data);
mas01cr@498 278 datum->data = NULL;
mas01cr@498 279 }
mas01cr@498 280 if(datum->power) {
mas01cr@498 281 free(datum->power);
mas01cr@498 282 datum->power = NULL;
mas01cr@498 283 }
mas01cr@498 284 if(datum->times) {
mas01cr@498 285 free(datum->times);
mas01cr@498 286 datum->times = NULL;
mas01cr@498 287 }
mas01cr@498 288 return 0;
mas01cr@498 289 }
mas01cr@498 290
mas01mc@541 291 int audiodb_free_datum_cache(adb_fd_cache_t *cache){
mas01mc@541 292 if(cache){
mas01mc@541 293 if(cache->fname){
mas01mc@541 294 free(cache->fname);
mas01mc@541 295 cache->fname = NULL;
mas01mc@541 296 }
mas01mc@541 297 if(cache->data_fd){
mas01mc@541 298 close(cache->data_fd);
mas01mc@541 299 cache->data_fd = 0;
mas01mc@541 300 }
mas01mc@541 301 if(cache->power_fd){
mas01mc@541 302 close(cache->power_fd);
mas01mc@541 303 cache->power_fd = 0;
mas01mc@541 304 }
mas01mc@541 305 if(cache->times_file){
mas01mc@541 306 fclose(cache->times_file);
mas01mc@541 307 cache->times_file = NULL;
mas01mc@541 308 }
mas01mc@541 309 if(cache->reference){
mas01mc@541 310 audiodb_free_datum_reference(cache->reference);
mas01mc@542 311 free(cache->reference);
mas01mc@541 312 cache->reference = NULL;
mas01mc@541 313 }
mas01mc@541 314 }
mas01mc@541 315 return 0;
mas01mc@541 316 }
mas01mc@541 317
mas01mc@541 318 int audiodb_free_datum_reference(adb_reference_t * reference){
mas01mc@541 319 if(reference){
mas01mc@541 320 if(reference->features){
mas01mc@541 321 free((char *)reference->features);
mas01mc@541 322 reference->features = 0;
mas01mc@541 323 }
mas01mc@541 324 if(reference->power){
mas01mc@541 325 free((char *)reference->power);
mas01mc@541 326 reference->power = 0;
mas01mc@541 327 }
mas01mc@541 328 if(reference->times){
mas01mc@541 329 free((char *)reference->times);
mas01mc@541 330 reference->times = 0;
mas01mc@541 331 }
mas01mc@541 332 }
mas01mc@541 333 return 0;
mas01mc@541 334 }
mas01mc@541 335
mas01mc@545 336 int audiodb_insert_create_datum(adb_insert_t * insert, adb_datum_t *datum){
mas01mc@545 337 return audiodb_insert_create_datum_offset(insert, datum, 0, 0, 0);
mas01mc@545 338 }
mas01mc@545 339
mas01mc@546 340 int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t vector_offset, size_t num_vectors, adb_fd_cache_t *cache) {
mas01cr@498 341 int fd = 0;
mas01cr@498 342 FILE *file = NULL;
mas01cr@498 343 struct stat st;
mas01cr@498 344 off_t size;
mas01mc@541 345 bool clear_cache = false;
mas01cr@498 346
mas01mc@541 347 if(!cache){
mas01mc@541 348 datum->data = NULL;
mas01mc@541 349 datum->power = NULL;
mas01mc@541 350 datum->times = NULL;
mas01cr@498 351 }
mas01mc@541 352
mas01mc@541 353 // STEP 1 check if we need to clear the cache
mas01mc@546 354 if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0)){
mas01mc@541 355 clear_cache = true;
mas01mc@546 356 }
mas01mc@541 357
mas01mc@541 358 // STEP 2. Clear the cache if necessary
mas01mc@541 359 if(cache && clear_cache){
mas01mc@541 360 close(cache->data_fd);
mas01mc@541 361 cache->data_fd = 0;
mas01mc@541 362 free(cache->fname);
mas01mc@541 363 cache->fname = 0;
mas01mc@541 364 }
mas01mc@541 365
mas01mc@541 366 // STEP 3. Use the cached file descriptor or open a new file descriptor
mas01mc@541 367 if (cache && cache->data_fd ){
mas01mc@541 368 fd = cache->data_fd;
mas01mc@541 369 }
mas01mc@541 370 else{
mas01mc@541 371 if ((fd = open(insert->features, O_RDONLY)) == -1) {
mas01mc@541 372 goto error;
mas01mc@541 373 }
mas01mc@541 374 if(cache){
mas01mc@541 375 cache->fname = (char*) malloc(strlen(insert->features));
mas01mc@541 376 strncpy(cache->fname, insert->features, strlen(insert->features));
mas01mc@541 377 }
mas01mc@541 378 }
mas01mc@541 379
mas01cr@498 380 if(fstat(fd, &st)) {
mas01cr@498 381 goto error;
mas01cr@498 382 }
mas01mc@541 383
mas01mc@541 384 // STEP 4. If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 385 if( !( cache && cache->data_fd ) ){
mas01mc@541 386 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
mas01mc@541 387 if(cache)
mas01mc@541 388 cache->data_fd = fd;
mas01mc@541 389 }
mas01mc@541 390
mas01mc@541 391 // STEP 5. Allocate data memory if necessary, read the requested amount of data
mas01mc@546 392 if(num_vectors){
mas01mc@546 393 size = num_vectors*datum->dim*sizeof(double);
mas01mc@546 394 }
mas01mc@546 395 else{
mas01mc@539 396 size = st.st_size - sizeof(uint32_t);
mas01mc@546 397 }
mas01mc@541 398
mas01cr@498 399 datum->nvectors = size / (sizeof(double) * datum->dim);
mas01mc@541 400
mas01mc@541 401 if(!datum->data){
mas01mc@541 402 datum->data = (double *) malloc(size);
mas01mc@541 403 }
mas01mc@541 404
mas01cr@498 405 if(!datum->data) {
mas01cr@498 406 goto error;
mas01cr@498 407 }
mas01mc@541 408
mas01mc@546 409 if(vector_offset){
mas01mc@546 410 lseek(fd, sizeof(uint32_t) + vector_offset*datum->dim*sizeof(double), SEEK_SET);
mas01mc@546 411 }
mas01cr@498 412 read_or_goto_error(fd, datum->data, size);
mas01mc@541 413
mas01mc@541 414 // STEP 6. Close the file descriptor, unless we are caching it
mas01mc@546 415 if(!cache){
mas01mc@541 416 close(fd);
mas01mc@546 417 }
mas01mc@541 418 fd = 0; // we're done with the data
mas01mc@541 419
mas01cr@498 420 if(insert->power) {
mas01cr@498 421 int dim;
mas01mc@541 422
mas01mc@541 423 // Clear the cache if necessary
mas01mc@541 424 if(clear_cache){
mas01mc@541 425 close(cache->power_fd);
mas01mc@541 426 cache->power_fd = 0;
mas01mc@541 427 }
mas01mc@541 428
mas01mc@541 429 // Use the cached file descriptor or open a new file descriptor
mas01mc@546 430 if (cache && cache->power_fd){
mas01mc@541 431 fd = cache->power_fd;
mas01mc@546 432 }
mas01mc@541 433 else if((fd = open(insert->power, O_RDONLY)) == -1) {
mas01cr@498 434 goto error;
mas01cr@498 435 }
mas01mc@541 436
mas01cr@498 437 if(fstat(fd, &st)) {
mas01cr@498 438 goto error;
mas01cr@498 439 }
mas01mc@541 440
mas01cr@498 441 /* This cast is so non-trivial that it deserves a comment.
mas01cr@498 442 *
mas01cr@498 443 * The data types in this expression, left to right, are: off_t,
mas01cr@498 444 * size_t, off_t, uint32_t. The rules for conversions in
mas01cr@498 445 * arithmetic expressions with mixtures of integral types are
mas01cr@498 446 * essentially that the widest type wins, with unsigned types
mas01cr@498 447 * winning on a tie-break.
mas01cr@498 448 *
mas01cr@498 449 * Because we are enforcing (through the use of sufficient
mas01cr@498 450 * compiler flags, if necessary) that off_t be a (signed) 64-bit
mas01cr@498 451 * type, the only variability in this set of types is in fact the
mas01cr@498 452 * size_t. On 32-bit machines, size_t is uint32_t and so the
mas01cr@498 453 * coercions on both sides of the equality end up promoting
mas01cr@498 454 * everything to int64_t, which is fine. On 64-bit machines,
mas01cr@498 455 * however, the left hand side is promoted to a uint64_t, while
mas01cr@498 456 * the right hand side remains int64_t.
mas01cr@498 457 *
mas01cr@498 458 * The mixture of signed and unsigned types in comparisons is Evil
mas01cr@498 459 * Bad and Wrong, and gcc complains about it. (It's right to do
mas01cr@498 460 * so, actually). Of course in this case it will never matter
mas01cr@498 461 * because of the particular relationships between all of these
mas01cr@498 462 * numbers, so we just cast the left hand side to off_t, which
mas01cr@498 463 * will do the right thing for us on all platforms.
mas01cr@498 464 *
mas01cr@498 465 * I hate C.
mas01cr@498 466 */
mas01mc@541 467
mas01mc@546 468 if( (!num_vectors) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
mas01cr@498 469 goto error;
mas01cr@498 470 }
mas01mc@541 471
mas01mc@541 472 // If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 473 if( !( cache && cache->power_fd ) ){
mas01mc@541 474 read_or_goto_error(fd, &dim, sizeof(uint32_t));
mas01mc@541 475 if(dim != 1) {
mas01mc@541 476 goto error;
mas01mc@541 477 }
mas01mc@546 478 if(cache){
mas01mc@541 479 cache->power_fd = fd;
mas01mc@546 480 }
mas01cr@498 481 }
mas01mc@541 482
mas01mc@541 483 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 484 if(!datum->power)
mas01mc@541 485 datum->power = (double *) malloc(size / datum->dim);
mas01cr@498 486 if(!datum->power) {
mas01cr@498 487 goto error;
mas01cr@498 488 }
mas01mc@541 489
mas01mc@546 490 if(vector_offset){
mas01mc@546 491 lseek(fd, sizeof(uint32_t) + vector_offset*sizeof(double), SEEK_SET);
mas01mc@546 492 }
mas01mc@541 493
mas01cr@498 494 read_or_goto_error(fd, datum->power, size / datum->dim);
mas01mc@541 495
mas01mc@546 496 if(!cache){
mas01mc@541 497 close(fd);
mas01mc@546 498 }
mas01mc@541 499 fd = 0;
mas01cr@498 500 }
mas01mc@541 501
mas01cr@498 502 if(insert->times) {
mas01cr@498 503 double t, *tp;
mas01mc@541 504
mas01mc@541 505 // Clear the cache if necessary
mas01mc@541 506 if(clear_cache){
mas01mc@541 507 fclose(cache->times_file);
mas01mc@541 508 cache->times_file = 0;
mas01cr@498 509 }
mas01mc@541 510
mas01mc@541 511 // Use the cached file descriptor or open a new file descriptor and maybe cache
mas01mc@546 512 if (cache && cache->times_file){
mas01mc@541 513 file = cache->times_file;
mas01mc@546 514 }
mas01mc@541 515 else{
mas01mc@541 516 if(!(file = fopen(insert->times, "r"))) {
mas01mc@541 517 goto error;
mas01mc@541 518 }
mas01mc@546 519 if(cache){
mas01mc@541 520 cache->times_file = file;
mas01mc@546 521 }
mas01mc@541 522 }
mas01mc@541 523
mas01mc@541 524 // Allocate data memory if necessary, read the requested amount of data
mas01mc@546 525 if(!datum->times){
mas01mc@541 526 datum->times = (double *) malloc(2 * size / datum->dim);
mas01mc@546 527 }
mas01cr@498 528 if(!datum->times) {
mas01cr@498 529 goto error;
mas01cr@498 530 }
mas01mc@541 531
mas01mc@541 532 rewind(file);
mas01mc@546 533
mas01cr@498 534 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 535 goto error;
mas01cr@498 536 }
mas01mc@546 537 if(vector_offset){
mas01mc@546 538 while(vector_offset-- != 1 ){
mas01mc@546 539 if(fscanf(file, " %lf", &t) != 1){
mas01mc@539 540 goto error;
mas01mc@546 541 }
mas01mc@546 542 }
mas01mc@546 543 }
mas01cr@498 544 tp = datum->times;
mas01cr@498 545 *tp++ = t;
mas01cr@498 546 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
mas01cr@498 547 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 548 goto error;
mas01cr@498 549 }
mas01cr@498 550 *tp++ = t;
mas01cr@498 551 *tp++ = t;
mas01cr@498 552 }
mas01cr@498 553 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 554 goto error;
mas01cr@498 555 }
mas01cr@498 556 *tp = t;
mas01mc@541 557 if(!cache){
mas01mc@541 558 fclose(file);
mas01mc@541 559 file=0;
mas01mc@541 560 }
mas01cr@498 561 }
mas01cr@498 562 datum->key = insert->key ? insert->key : insert->features;
mas01cr@498 563 return 0;
mas01cr@498 564
mas01cr@498 565 error:
mas01cr@498 566 if(fd > 0) {
mas01cr@498 567 close(fd);
mas01cr@498 568 }
mas01cr@498 569 if(file) {
mas01cr@498 570 fclose(file);
mas01cr@498 571 }
mas01cr@498 572 audiodb_free_datum(datum);
mas01mc@541 573 if(cache)
mas01mc@541 574 audiodb_free_datum_cache(cache);
mas01cr@498 575 return 1;
mas01mc@541 576 }
mas01cr@498 577
mas01cr@498 578 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
mas01cr@509 579 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 580 adb_reference_t *reference = insert;
mas01cr@498 581 int err;
mas01cr@498 582 err = audiodb_insert_reference(adb, reference);
mas01cr@498 583
mas01cr@498 584 if(err == 2) {
mas01cr@498 585 return 0;
mas01cr@498 586 } else {
mas01cr@498 587 return err;
mas01cr@498 588 }
mas01cr@498 589 } else {
mas01cr@498 590 adb_datum_t datum;
mas01cr@498 591 int err;
mas01cr@498 592
mas01cr@498 593 if(audiodb_insert_create_datum(insert, &datum)) {
mas01cr@498 594 return 1;
mas01cr@498 595 }
mas01cr@498 596 err = audiodb_insert_datum(adb, &datum);
mas01cr@498 597 audiodb_free_datum(&datum);
mas01cr@498 598
mas01cr@498 599 if(err == 2) {
mas01cr@498 600 return 0;
mas01cr@498 601 } else {
mas01cr@498 602 return err;
mas01cr@239 603 }
mas01cr@239 604 }
mas01cr@239 605 }
mas01cr@239 606
mas01cr@498 607 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
mas01cr@498 608 int err;
mas01cr@498 609 for(unsigned int n = 0; n < size; n++) {
mas01cr@498 610 if((err = audiodb_insert(adb, &(insert[n])))) {
mas01cr@498 611 return err;
mas01cr@498 612 }
mas01mc@324 613 }
mas01cr@498 614 return 0;
mas01cr@239 615 }