annotate insert.cpp @ 542:79ffab663ace multiprobeLSH

This fix plugs a minor memory leak introduced in caching file references.
author mas01mc
date Sat, 07 Feb 2009 18:01:18 +0000
parents 52d82badc544
children bf89c80ec4cc
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@498 6 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
mas01cr@498 7 adb_header_t *header = adb->header;
mas01cr@509 8 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 9 return true;
mas01cr@498 10 } else {
mas01cr@498 11 /* FIXME: timesTableOffset isn't necessarily the next biggest
mas01cr@498 12 * offset after dataOffset. Maybe make the offsets into an array
mas01cr@498 13 * that we can iterate over... */
mas01cr@498 14 return (header->timesTableOffset >
mas01cr@498 15 (header->dataOffset + header->length + size));
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18
mas01cr@498 19 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
mas01cr@498 20 /* FIXME: the comment above about the ordering of the tables applies
mas01cr@498 21 here too. */
mas01cr@498 22 adb_header_t *header = adb->header;
mas01cr@498 23 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
mas01cr@498 24 off_t track_table_length = header->dataOffset - header->trackTableOffset;
mas01cr@509 25 int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 26 int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE;
mas01cr@498 27 /* maxfiles is the _minimum_ of the two. Do not be confused... */
mas01cr@498 28 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@509 29 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 30 /* by default, these tables are created with the same size as the
mas01cr@498 31 * fileTable (which should be called key_table); relying on that
mas01cr@498 32 * always being the case, though, smacks of optimism, so instead
mas01cr@498 33 * we code defensively... */
mas01cr@498 34 off_t data_table_length = header->timesTableOffset - header->dataOffset;
mas01cr@498 35 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
mas01cr@498 36 off_t power_table_length = header->dbSize - header->powerTableOffset;
mas01cr@509 37 int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 38 int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 39 int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 40 /* ... even though it means a certain amount of tedium. */
mas01cr@498 41 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
mas01cr@498 42 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
mas01cr@498 43 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
mas01cr@498 44 }
mas01cr@498 45 return (header->numFiles < (unsigned int) maxfiles);
mas01cr@498 46 }
mas01cr@498 47
mas01cr@498 48 /*
mas01cr@498 49 * Hey, look, a comment. Normally I wouldn't bother, as the code
mas01cr@498 50 * should be self-documenting, but a lot of logic is concentrated in
mas01cr@498 51 * this one place, so let's give an overview beforehand. To insert a
mas01cr@498 52 * datum into the database, we:
mas01cr@498 53 *
mas01cr@498 54 * 1. check write permission;
mas01cr@498 55 * 2. check for enough space;
mas01cr@498 56 * 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 57 * header dimension is zero, in which case write datum->dim to
mas01cr@498 58 * adb->header->dim).
mas01cr@498 59 * 4. check for presence of datum->key in adb->keymap;
mas01cr@509 60 * 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 61 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 62 * 6. write in data, power, times as appropriate; add to track
mas01cr@498 63 * and key tables too;
mas01cr@509 64 * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 65 * compute norms and fill in table;
mas01cr@498 66 * 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 67 * adb->track_offsets and adb->header;
mas01cr@498 68 * 9. sync adb->header with disk.
mas01cr@498 69 *
mas01cr@498 70 * Step 9 essentially commits the transaction; until we update
mas01cr@498 71 * header->length, nothing will recognize the newly-written data. In
mas01cr@498 72 * principle, if it fails, we should roll back, which we can in fact
mas01cr@498 73 * do on the assumption that nothing in step 8 can ever fail; on the
mas01cr@498 74 * other hand, if it's failed, then it's unlikely that rolling back by
mas01cr@498 75 * syncing the original header back to disk is going to work
mas01cr@498 76 * desperately well. We should perhaps take an operating-system lock
mas01cr@498 77 * around step 9, so that we can't be interrupted part-way through
mas01cr@498 78 * (except of course for SIGKILL, but if we're hit with that we will
mas01cr@498 79 * always lose).
mas01cr@498 80 */
mas01cr@498 81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
mas01cr@498 82
mas01cr@498 83 off_t size, offset, nfiles;
mas01cr@498 84 double *l2norm_buffer = NULL;
mas01cr@498 85
mas01cr@498 86 /* 1. check write permission; */
mas01cr@498 87 if(!(adb->flags & O_RDWR)) {
mas01cr@498 88 return 1;
mas01cr@498 89 }
mas01cr@498 90 /* 2. check for enough space; */
mas01cr@498 91 size = sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 92 if(!audiodb_enough_data_space_free(adb, size)) {
mas01cr@498 93 return 1;
mas01cr@498 94 }
mas01cr@498 95 if(!audiodb_enough_per_file_space_free(adb)) {
mas01cr@498 96 return 1;
mas01cr@498 97 }
mas01cr@498 98 /* 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 99 * header dimension is zero, in which case write datum->dim to
mas01cr@498 100 * adb->header->dim).
mas01cr@498 101 */
mas01cr@498 102 if(adb->header->dim == 0) {
mas01cr@498 103 adb->header->dim = datum->dim;
mas01cr@498 104 } else if (adb->header->dim != datum->dim) {
mas01cr@498 105 return 1;
mas01cr@498 106 }
mas01cr@498 107 /* 4. check for presence of datum->key in adb->keymap; */
mas01cr@498 108 if(adb->keymap->count(datum->key)) {
mas01cr@498 109 /* not part of an explicit API/ABI, but we need a distinguished
mas01cr@498 110 value in this circumstance to preserve somewhat wonky behaviour
mas01cr@498 111 of audioDB::batchinsert. */
mas01cr@498 112 return 2;
mas01cr@498 113 }
mas01cr@509 114 /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 115 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 116 */
mas01cr@509 117 if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) ||
mas01cr@509 118 ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) {
mas01cr@498 119 return 1;
mas01cr@498 120 }
mas01cr@509 121 if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 122 if(adb->header->numFiles == 0) {
mas01cr@509 123 adb->header->flags |= ADB_HEADER_FLAG_TIMES;
mas01cr@498 124 } else {
mas01cr@498 125 return 1;
mas01cr@239 126 }
mas01cr@509 127 } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) {
mas01cr@498 128 return 1;
mas01cr@498 129 }
mas01cr@498 130 /* 6. write in data, power, times as appropriate; add to track
mas01cr@498 131 * and key tables too;
mas01cr@498 132 */
mas01cr@498 133 offset = adb->header->length;
mas01cr@498 134 nfiles = adb->header->numFiles;
mas01cr@498 135
mas01cr@498 136 /* FIXME: checking for all these lseek()s */
mas01cr@509 137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
mas01cr@509 139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 140 write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE);
mas01cr@509 141 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 142 char cwd[PATH_MAX];
mas01cr@498 143 char slash = '/';
mas01cr@498 144
mas01cr@498 145 if(!getcwd(cwd, PATH_MAX)) {
mas01cr@498 146 goto error;
mas01cr@498 147 }
mas01cr@509 148 lseek(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 149 if(*((char *) datum->data) != '/') {
mas01cr@498 150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 151 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 152 }
mas01cr@498 153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
mas01cr@498 154 if(datum->power) {
mas01cr@509 155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 156 if(*((char *) datum->power) != '/') {
mas01cr@498 157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 158 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 159 }
mas01cr@498 160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
mas01cr@498 161 }
mas01cr@498 162 if(datum->times) {
mas01cr@509 163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 164 if(*((char *) datum->times) != '/') {
mas01cr@498 165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 166 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 167 }
mas01cr@498 168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
mas01cr@498 169 }
mas01cr@498 170 } else {
mas01cr@498 171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
mas01cr@498 172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
mas01cr@498 173 if(datum->power) {
mas01cr@498 174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
mas01cr@498 176 }
mas01cr@498 177 if(datum->times) {
mas01cr@498 178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
mas01cr@498 179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
mas01cr@498 180 }
mas01cr@498 181 }
mas01cr@498 182
mas01cr@509 183 /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 184 * compute norms and fill in table;
mas01cr@498 185 */
mas01cr@509 186 if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) &&
mas01cr@509 187 !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
mas01mc@324 189
mas01cr@498 190 audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer);
mas01cr@498 191 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 192 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
mas01cr@498 193 free(l2norm_buffer);
mas01cr@498 194 l2norm_buffer = NULL;
mas01cr@498 195 }
mas01cr@498 196
mas01cr@498 197 /* 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 198 * adb->track_offsets and adb->header;
mas01cr@498 199 */
mas01cr@498 200 adb->keys->push_back(datum->key);
mas01cr@498 201 (*adb->keymap)[datum->key] = adb->header->numFiles;
mas01cr@498 202 adb->track_lengths->push_back(datum->nvectors);
mas01cr@498 203 adb->track_offsets->push_back(offset);
mas01cr@498 204 adb->header->numFiles += 1;
mas01cr@498 205 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 206
mas01cr@498 207 /* 9. sync adb->header with disk. */
mas01cr@498 208 return audiodb_sync_header(adb);
mas01cr@498 209
mas01cr@498 210 error:
mas01cr@498 211 if(l2norm_buffer) {
mas01cr@498 212 free(l2norm_buffer);
mas01cr@498 213 }
mas01cr@498 214 return 1;
mas01cr@498 215 }
mas01cr@498 216
mas01cr@498 217 int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) {
mas01cr@509 218 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 219 return 1;
mas01cr@498 220 } else {
mas01cr@498 221 adb_datum_internal_t d;
mas01cr@498 222 d.nvectors = datum->nvectors;
mas01cr@498 223 d.dim = datum->dim;
mas01cr@498 224 d.key = datum->key;
mas01cr@498 225 d.data = datum->data;
mas01cr@498 226 d.times = datum->times;
mas01cr@498 227 d.power = datum->power;
mas01cr@498 228 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 229 }
mas01cr@498 230 }
mas01cr@498 231
mas01cr@498 232 int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) {
mas01cr@509 233 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 234 return 1;
mas01cr@498 235 } else {
mas01cr@498 236 adb_datum_internal_t d;
mas01cr@498 237 struct stat st;
mas01cr@498 238 int fd;
mas01cr@498 239 off_t size;
mas01mc@324 240
mas01cr@498 241 if((fd = open(reference->features, O_RDONLY)) == -1) {
mas01cr@498 242 return 1;
mas01cr@239 243 }
mas01cr@498 244 if(fstat(fd, &st)) {
mas01cr@498 245 goto error;
mas01cr@239 246 }
mas01cr@498 247 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
mas01cr@498 248 close(fd);
mas01cr@498 249 fd = 0;
mas01cr@498 250 size = st.st_size - sizeof(uint32_t);
mas01cr@498 251 d.nvectors = size / (sizeof(double) * d.dim);
mas01cr@498 252 d.data = (void *) reference->features;
mas01cr@498 253 if(reference->power) {
mas01cr@498 254 if(stat(reference->power, &st)) {
mas01cr@498 255 return 1;
mas01cr@498 256 }
mas01cr@498 257 }
mas01cr@498 258 d.power = (void *) reference->power;
mas01cr@498 259 if(reference->times) {
mas01cr@498 260 if(stat(reference->times, &st)) {
mas01cr@498 261 return 1;
mas01cr@498 262 }
mas01cr@498 263 }
mas01cr@498 264 d.times = (void *) reference->times;
mas01cr@498 265 d.key = reference->key ? reference->key : reference->features;
mas01cr@498 266 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 267 error:
mas01cr@498 268 if(fd) {
mas01cr@498 269 close(fd);
mas01cr@498 270 }
mas01cr@498 271 return 1;
mas01cr@498 272 }
mas01cr@498 273 }
mas01cr@498 274
mas01cr@498 275 int audiodb_free_datum(adb_datum_t *datum) {
mas01cr@498 276 if(datum->data) {
mas01cr@498 277 free(datum->data);
mas01cr@498 278 datum->data = NULL;
mas01cr@498 279 }
mas01cr@498 280 if(datum->power) {
mas01cr@498 281 free(datum->power);
mas01cr@498 282 datum->power = NULL;
mas01cr@498 283 }
mas01cr@498 284 if(datum->times) {
mas01cr@498 285 free(datum->times);
mas01cr@498 286 datum->times = NULL;
mas01cr@498 287 }
mas01cr@498 288 return 0;
mas01cr@498 289 }
mas01cr@498 290
mas01mc@541 291 int audiodb_free_datum_cache(adb_fd_cache_t *cache){
mas01mc@541 292 if(cache){
mas01mc@541 293 if(cache->fname){
mas01mc@541 294 free(cache->fname);
mas01mc@541 295 cache->fname = NULL;
mas01mc@541 296 }
mas01mc@541 297 if(cache->data_fd){
mas01mc@541 298 close(cache->data_fd);
mas01mc@541 299 cache->data_fd = 0;
mas01mc@541 300 }
mas01mc@541 301 if(cache->power_fd){
mas01mc@541 302 close(cache->power_fd);
mas01mc@541 303 cache->power_fd = 0;
mas01mc@541 304 }
mas01mc@541 305 if(cache->times_file){
mas01mc@541 306 fclose(cache->times_file);
mas01mc@541 307 cache->times_file = NULL;
mas01mc@541 308 }
mas01mc@541 309 if(cache->reference){
mas01mc@541 310 audiodb_free_datum_reference(cache->reference);
mas01mc@542 311 free(cache->reference);
mas01mc@541 312 cache->reference = NULL;
mas01mc@541 313 }
mas01mc@541 314 }
mas01mc@541 315 return 0;
mas01mc@541 316 }
mas01mc@541 317
mas01mc@541 318 int audiodb_free_datum_reference(adb_reference_t * reference){
mas01mc@541 319 if(reference){
mas01mc@541 320 if(reference->features){
mas01mc@541 321 free((char *)reference->features);
mas01mc@541 322 reference->features = 0;
mas01mc@541 323 }
mas01mc@541 324 if(reference->power){
mas01mc@541 325 free((char *)reference->power);
mas01mc@541 326 reference->power = 0;
mas01mc@541 327 }
mas01mc@541 328 if(reference->times){
mas01mc@541 329 free((char *)reference->times);
mas01mc@541 330 reference->times = 0;
mas01mc@541 331 }
mas01mc@541 332 }
mas01mc@541 333 return 0;
mas01mc@541 334 }
mas01mc@541 335
mas01mc@541 336 int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum, off_t data_offset=0, size_t data_size=0, adb_fd_cache_t *cache=0) {
mas01cr@498 337 int fd = 0;
mas01cr@498 338 FILE *file = NULL;
mas01cr@498 339 struct stat st;
mas01cr@498 340 off_t size;
mas01mc@541 341 bool clear_cache = false;
mas01cr@498 342
mas01mc@541 343 if(!cache){
mas01mc@541 344 datum->data = NULL;
mas01mc@541 345 datum->power = NULL;
mas01mc@541 346 datum->times = NULL;
mas01cr@498 347 }
mas01mc@541 348
mas01mc@541 349 // STEP 1 check if we need to clear the cache
mas01mc@541 350 if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0))
mas01mc@541 351 clear_cache = true;
mas01mc@541 352
mas01mc@541 353 // STEP 2. Clear the cache if necessary
mas01mc@541 354 if(cache && clear_cache){
mas01mc@541 355 close(cache->data_fd);
mas01mc@541 356 cache->data_fd = 0;
mas01mc@541 357 free(cache->fname);
mas01mc@541 358 cache->fname = 0;
mas01mc@541 359 }
mas01mc@541 360
mas01mc@541 361 // STEP 3. Use the cached file descriptor or open a new file descriptor
mas01mc@541 362 if (cache && cache->data_fd ){
mas01mc@541 363 fd = cache->data_fd;
mas01mc@541 364 }
mas01mc@541 365 else{
mas01mc@541 366 if ((fd = open(insert->features, O_RDONLY)) == -1) {
mas01mc@541 367 goto error;
mas01mc@541 368 }
mas01mc@541 369 if(cache){
mas01mc@541 370 cache->fname = (char*) malloc(strlen(insert->features));
mas01mc@541 371 strncpy(cache->fname, insert->features, strlen(insert->features));
mas01mc@541 372 }
mas01mc@541 373 }
mas01mc@541 374
mas01cr@498 375 if(fstat(fd, &st)) {
mas01cr@498 376 goto error;
mas01cr@498 377 }
mas01mc@541 378
mas01mc@541 379 // STEP 4. If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 380 if( !( cache && cache->data_fd ) ){
mas01mc@541 381 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
mas01mc@541 382 if(cache)
mas01mc@541 383 cache->data_fd = fd;
mas01mc@541 384 }
mas01mc@541 385
mas01mc@541 386 // STEP 5. Allocate data memory if necessary, read the requested amount of data
mas01mc@539 387 if(data_size)
mas01mc@539 388 size = data_size;
mas01mc@539 389 else
mas01mc@539 390 size = st.st_size - sizeof(uint32_t);
mas01mc@541 391
mas01cr@498 392 datum->nvectors = size / (sizeof(double) * datum->dim);
mas01mc@541 393
mas01mc@541 394 if(!datum->data){
mas01mc@541 395 datum->data = (double *) malloc(size);
mas01mc@541 396 }
mas01mc@541 397
mas01cr@498 398 if(!datum->data) {
mas01cr@498 399 goto error;
mas01cr@498 400 }
mas01mc@541 401
mas01mc@539 402 if(data_offset)
mas01mc@541 403 lseek(fd, sizeof(uint32_t) + data_offset, SEEK_SET);
mas01cr@498 404 read_or_goto_error(fd, datum->data, size);
mas01mc@541 405
mas01mc@541 406 // STEP 6. Close the file descriptor, unless we are caching it
mas01mc@541 407 if(!cache)
mas01mc@541 408 close(fd);
mas01mc@541 409 fd = 0; // we're done with the data
mas01mc@541 410
mas01cr@498 411 if(insert->power) {
mas01cr@498 412 int dim;
mas01mc@541 413
mas01mc@541 414 // Clear the cache if necessary
mas01mc@541 415 if(clear_cache){
mas01mc@541 416 close(cache->power_fd);
mas01mc@541 417 cache->power_fd = 0;
mas01mc@541 418 }
mas01mc@541 419
mas01mc@541 420 // Use the cached file descriptor or open a new file descriptor
mas01mc@541 421 if (cache && cache->power_fd)
mas01mc@541 422 fd = cache->power_fd;
mas01mc@541 423 else if((fd = open(insert->power, O_RDONLY)) == -1) {
mas01cr@498 424 goto error;
mas01cr@498 425 }
mas01mc@541 426
mas01cr@498 427 if(fstat(fd, &st)) {
mas01cr@498 428 goto error;
mas01cr@498 429 }
mas01mc@541 430
mas01cr@498 431 /* This cast is so non-trivial that it deserves a comment.
mas01cr@498 432 *
mas01cr@498 433 * The data types in this expression, left to right, are: off_t,
mas01cr@498 434 * size_t, off_t, uint32_t. The rules for conversions in
mas01cr@498 435 * arithmetic expressions with mixtures of integral types are
mas01cr@498 436 * essentially that the widest type wins, with unsigned types
mas01cr@498 437 * winning on a tie-break.
mas01cr@498 438 *
mas01cr@498 439 * Because we are enforcing (through the use of sufficient
mas01cr@498 440 * compiler flags, if necessary) that off_t be a (signed) 64-bit
mas01cr@498 441 * type, the only variability in this set of types is in fact the
mas01cr@498 442 * size_t. On 32-bit machines, size_t is uint32_t and so the
mas01cr@498 443 * coercions on both sides of the equality end up promoting
mas01cr@498 444 * everything to int64_t, which is fine. On 64-bit machines,
mas01cr@498 445 * however, the left hand side is promoted to a uint64_t, while
mas01cr@498 446 * the right hand side remains int64_t.
mas01cr@498 447 *
mas01cr@498 448 * The mixture of signed and unsigned types in comparisons is Evil
mas01cr@498 449 * Bad and Wrong, and gcc complains about it. (It's right to do
mas01cr@498 450 * so, actually). Of course in this case it will never matter
mas01cr@498 451 * because of the particular relationships between all of these
mas01cr@498 452 * numbers, so we just cast the left hand side to off_t, which
mas01cr@498 453 * will do the right thing for us on all platforms.
mas01cr@498 454 *
mas01cr@498 455 * I hate C.
mas01cr@498 456 */
mas01mc@541 457
mas01mc@539 458 if( (!data_size) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
mas01cr@498 459 goto error;
mas01cr@498 460 }
mas01mc@541 461
mas01mc@541 462 // If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 463 if( !( cache && cache->power_fd ) ){
mas01mc@541 464 read_or_goto_error(fd, &dim, sizeof(uint32_t));
mas01mc@541 465 if(dim != 1) {
mas01mc@541 466 goto error;
mas01mc@541 467 }
mas01mc@541 468 if(cache)
mas01mc@541 469 cache->power_fd = fd;
mas01cr@498 470 }
mas01mc@541 471
mas01mc@541 472 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 473 if(!datum->power)
mas01mc@541 474 datum->power = (double *) malloc(size / datum->dim);
mas01cr@498 475 if(!datum->power) {
mas01cr@498 476 goto error;
mas01cr@498 477 }
mas01mc@541 478
mas01mc@539 479 if(data_offset)
mas01mc@541 480 lseek(fd, sizeof(uint32_t) + data_offset/datum->dim, SEEK_SET);
mas01mc@541 481
mas01cr@498 482 read_or_goto_error(fd, datum->power, size / datum->dim);
mas01mc@541 483
mas01mc@541 484 if(!cache)
mas01mc@541 485 close(fd);
mas01mc@541 486 fd = 0;
mas01cr@498 487 }
mas01mc@541 488
mas01cr@498 489 if(insert->times) {
mas01cr@498 490 double t, *tp;
mas01mc@541 491
mas01mc@541 492 // Clear the cache if necessary
mas01mc@541 493 if(clear_cache){
mas01mc@541 494 fclose(cache->times_file);
mas01mc@541 495 cache->times_file = 0;
mas01cr@498 496 }
mas01mc@541 497
mas01mc@541 498 // Use the cached file descriptor or open a new file descriptor and maybe cache
mas01mc@541 499 if (cache && cache->times_file)
mas01mc@541 500 file = cache->times_file;
mas01mc@541 501 else{
mas01mc@541 502 if(!(file = fopen(insert->times, "r"))) {
mas01mc@541 503 goto error;
mas01mc@541 504 }
mas01mc@541 505 if(cache)
mas01mc@541 506 cache->times_file = file;
mas01mc@541 507 }
mas01mc@541 508
mas01mc@541 509 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 510 if(!datum->times)
mas01mc@541 511 datum->times = (double *) malloc(2 * size / datum->dim);
mas01cr@498 512 if(!datum->times) {
mas01cr@498 513 goto error;
mas01cr@498 514 }
mas01mc@541 515
mas01mc@541 516 rewind(file);
mas01cr@498 517 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 518 goto error;
mas01cr@498 519 }
mas01mc@539 520 if(data_offset)
mas01mc@541 521 while(data_offset-- != 1 )
mas01mc@539 522 if(fscanf(file, " %lf", &t) != 1)
mas01mc@539 523 goto error;
mas01cr@498 524 tp = datum->times;
mas01cr@498 525 *tp++ = t;
mas01cr@498 526 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
mas01cr@498 527 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 528 goto error;
mas01cr@498 529 }
mas01cr@498 530 *tp++ = t;
mas01cr@498 531 *tp++ = t;
mas01cr@498 532 }
mas01cr@498 533 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 534 goto error;
mas01cr@498 535 }
mas01cr@498 536 *tp = t;
mas01mc@541 537 if(!cache){
mas01mc@541 538 fclose(file);
mas01mc@541 539 file=0;
mas01mc@541 540 }
mas01cr@498 541 }
mas01cr@498 542 datum->key = insert->key ? insert->key : insert->features;
mas01cr@498 543 return 0;
mas01cr@498 544
mas01cr@498 545 error:
mas01cr@498 546 if(fd > 0) {
mas01cr@498 547 close(fd);
mas01cr@498 548 }
mas01cr@498 549 if(file) {
mas01cr@498 550 fclose(file);
mas01cr@498 551 }
mas01cr@498 552 audiodb_free_datum(datum);
mas01mc@541 553 if(cache)
mas01mc@541 554 audiodb_free_datum_cache(cache);
mas01cr@498 555 return 1;
mas01mc@541 556 }
mas01cr@498 557
mas01cr@498 558 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
mas01cr@509 559 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 560 adb_reference_t *reference = insert;
mas01cr@498 561 int err;
mas01cr@498 562 err = audiodb_insert_reference(adb, reference);
mas01cr@498 563
mas01cr@498 564 if(err == 2) {
mas01cr@498 565 return 0;
mas01cr@498 566 } else {
mas01cr@498 567 return err;
mas01cr@498 568 }
mas01cr@498 569 } else {
mas01cr@498 570 adb_datum_t datum;
mas01cr@498 571 int err;
mas01cr@498 572
mas01cr@498 573 if(audiodb_insert_create_datum(insert, &datum)) {
mas01cr@498 574 return 1;
mas01cr@498 575 }
mas01cr@498 576 err = audiodb_insert_datum(adb, &datum);
mas01cr@498 577 audiodb_free_datum(&datum);
mas01cr@498 578
mas01cr@498 579 if(err == 2) {
mas01cr@498 580 return 0;
mas01cr@498 581 } else {
mas01cr@498 582 return err;
mas01cr@239 583 }
mas01cr@239 584 }
mas01cr@239 585 }
mas01cr@239 586
mas01cr@498 587 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
mas01cr@498 588 int err;
mas01cr@498 589 for(unsigned int n = 0; n < size; n++) {
mas01cr@498 590 if((err = audiodb_insert(adb, &(insert[n])))) {
mas01cr@498 591 return err;
mas01cr@498 592 }
mas01mc@324 593 }
mas01cr@498 594 return 0;
mas01cr@239 595 }