annotate insert.cpp @ 541:52d82badc544 multiprobeLSH

Added file caching for sparse datum reads. This required making a new type called adb_fd_cache_t and modifying the read methods audiodb_track_id_datum() and audiodb_insert_create_datum() to use the cache struct if one is provided by the user.
author mas01mc
date Sat, 07 Feb 2009 16:59:31 +0000
parents 1bf090279174
children 79ffab663ace
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@498 6 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
mas01cr@498 7 adb_header_t *header = adb->header;
mas01cr@509 8 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 9 return true;
mas01cr@498 10 } else {
mas01cr@498 11 /* FIXME: timesTableOffset isn't necessarily the next biggest
mas01cr@498 12 * offset after dataOffset. Maybe make the offsets into an array
mas01cr@498 13 * that we can iterate over... */
mas01cr@498 14 return (header->timesTableOffset >
mas01cr@498 15 (header->dataOffset + header->length + size));
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18
mas01cr@498 19 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
mas01cr@498 20 /* FIXME: the comment above about the ordering of the tables applies
mas01cr@498 21 here too. */
mas01cr@498 22 adb_header_t *header = adb->header;
mas01cr@498 23 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
mas01cr@498 24 off_t track_table_length = header->dataOffset - header->trackTableOffset;
mas01cr@509 25 int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 26 int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE;
mas01cr@498 27 /* maxfiles is the _minimum_ of the two. Do not be confused... */
mas01cr@498 28 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@509 29 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 30 /* by default, these tables are created with the same size as the
mas01cr@498 31 * fileTable (which should be called key_table); relying on that
mas01cr@498 32 * always being the case, though, smacks of optimism, so instead
mas01cr@498 33 * we code defensively... */
mas01cr@498 34 off_t data_table_length = header->timesTableOffset - header->dataOffset;
mas01cr@498 35 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
mas01cr@498 36 off_t power_table_length = header->dbSize - header->powerTableOffset;
mas01cr@509 37 int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 38 int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 39 int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 40 /* ... even though it means a certain amount of tedium. */
mas01cr@498 41 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
mas01cr@498 42 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
mas01cr@498 43 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
mas01cr@498 44 }
mas01cr@498 45 return (header->numFiles < (unsigned int) maxfiles);
mas01cr@498 46 }
mas01cr@498 47
mas01cr@498 48 /*
mas01cr@498 49 * Hey, look, a comment. Normally I wouldn't bother, as the code
mas01cr@498 50 * should be self-documenting, but a lot of logic is concentrated in
mas01cr@498 51 * this one place, so let's give an overview beforehand. To insert a
mas01cr@498 52 * datum into the database, we:
mas01cr@498 53 *
mas01cr@498 54 * 1. check write permission;
mas01cr@498 55 * 2. check for enough space;
mas01cr@498 56 * 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 57 * header dimension is zero, in which case write datum->dim to
mas01cr@498 58 * adb->header->dim).
mas01cr@498 59 * 4. check for presence of datum->key in adb->keymap;
mas01cr@509 60 * 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 61 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 62 * 6. write in data, power, times as appropriate; add to track
mas01cr@498 63 * and key tables too;
mas01cr@509 64 * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 65 * compute norms and fill in table;
mas01cr@498 66 * 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 67 * adb->track_offsets and adb->header;
mas01cr@498 68 * 9. sync adb->header with disk.
mas01cr@498 69 *
mas01cr@498 70 * Step 9 essentially commits the transaction; until we update
mas01cr@498 71 * header->length, nothing will recognize the newly-written data. In
mas01cr@498 72 * principle, if it fails, we should roll back, which we can in fact
mas01cr@498 73 * do on the assumption that nothing in step 8 can ever fail; on the
mas01cr@498 74 * other hand, if it's failed, then it's unlikely that rolling back by
mas01cr@498 75 * syncing the original header back to disk is going to work
mas01cr@498 76 * desperately well. We should perhaps take an operating-system lock
mas01cr@498 77 * around step 9, so that we can't be interrupted part-way through
mas01cr@498 78 * (except of course for SIGKILL, but if we're hit with that we will
mas01cr@498 79 * always lose).
mas01cr@498 80 */
mas01cr@498 81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
mas01cr@498 82
mas01cr@498 83 off_t size, offset, nfiles;
mas01cr@498 84 double *l2norm_buffer = NULL;
mas01cr@498 85
mas01cr@498 86 /* 1. check write permission; */
mas01cr@498 87 if(!(adb->flags & O_RDWR)) {
mas01cr@498 88 return 1;
mas01cr@498 89 }
mas01cr@498 90 /* 2. check for enough space; */
mas01cr@498 91 size = sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 92 if(!audiodb_enough_data_space_free(adb, size)) {
mas01cr@498 93 return 1;
mas01cr@498 94 }
mas01cr@498 95 if(!audiodb_enough_per_file_space_free(adb)) {
mas01cr@498 96 return 1;
mas01cr@498 97 }
mas01cr@498 98 /* 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 99 * header dimension is zero, in which case write datum->dim to
mas01cr@498 100 * adb->header->dim).
mas01cr@498 101 */
mas01cr@498 102 if(adb->header->dim == 0) {
mas01cr@498 103 adb->header->dim = datum->dim;
mas01cr@498 104 } else if (adb->header->dim != datum->dim) {
mas01cr@498 105 return 1;
mas01cr@498 106 }
mas01cr@498 107 /* 4. check for presence of datum->key in adb->keymap; */
mas01cr@498 108 if(adb->keymap->count(datum->key)) {
mas01cr@498 109 /* not part of an explicit API/ABI, but we need a distinguished
mas01cr@498 110 value in this circumstance to preserve somewhat wonky behaviour
mas01cr@498 111 of audioDB::batchinsert. */
mas01cr@498 112 return 2;
mas01cr@498 113 }
mas01cr@509 114 /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 115 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 116 */
mas01cr@509 117 if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) ||
mas01cr@509 118 ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) {
mas01cr@498 119 return 1;
mas01cr@498 120 }
mas01cr@509 121 if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 122 if(adb->header->numFiles == 0) {
mas01cr@509 123 adb->header->flags |= ADB_HEADER_FLAG_TIMES;
mas01cr@498 124 } else {
mas01cr@498 125 return 1;
mas01cr@239 126 }
mas01cr@509 127 } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) {
mas01cr@498 128 return 1;
mas01cr@498 129 }
mas01cr@498 130 /* 6. write in data, power, times as appropriate; add to track
mas01cr@498 131 * and key tables too;
mas01cr@498 132 */
mas01cr@498 133 offset = adb->header->length;
mas01cr@498 134 nfiles = adb->header->numFiles;
mas01cr@498 135
mas01cr@498 136 /* FIXME: checking for all these lseek()s */
mas01cr@509 137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
mas01cr@509 139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 140 write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE);
mas01cr@509 141 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 142 char cwd[PATH_MAX];
mas01cr@498 143 char slash = '/';
mas01cr@498 144
mas01cr@498 145 if(!getcwd(cwd, PATH_MAX)) {
mas01cr@498 146 goto error;
mas01cr@498 147 }
mas01cr@509 148 lseek(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 149 if(*((char *) datum->data) != '/') {
mas01cr@498 150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 151 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 152 }
mas01cr@498 153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
mas01cr@498 154 if(datum->power) {
mas01cr@509 155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 156 if(*((char *) datum->power) != '/') {
mas01cr@498 157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 158 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 159 }
mas01cr@498 160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
mas01cr@498 161 }
mas01cr@498 162 if(datum->times) {
mas01cr@509 163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 164 if(*((char *) datum->times) != '/') {
mas01cr@498 165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 166 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 167 }
mas01cr@498 168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
mas01cr@498 169 }
mas01cr@498 170 } else {
mas01cr@498 171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
mas01cr@498 172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
mas01cr@498 173 if(datum->power) {
mas01cr@498 174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
mas01cr@498 176 }
mas01cr@498 177 if(datum->times) {
mas01cr@498 178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
mas01cr@498 179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
mas01cr@498 180 }
mas01cr@498 181 }
mas01cr@498 182
mas01cr@509 183 /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 184 * compute norms and fill in table;
mas01cr@498 185 */
mas01cr@509 186 if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) &&
mas01cr@509 187 !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
mas01mc@324 189
mas01cr@498 190 audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer);
mas01cr@498 191 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 192 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
mas01cr@498 193 free(l2norm_buffer);
mas01cr@498 194 l2norm_buffer = NULL;
mas01cr@498 195 }
mas01cr@498 196
mas01cr@498 197 /* 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 198 * adb->track_offsets and adb->header;
mas01cr@498 199 */
mas01cr@498 200 adb->keys->push_back(datum->key);
mas01cr@498 201 (*adb->keymap)[datum->key] = adb->header->numFiles;
mas01cr@498 202 adb->track_lengths->push_back(datum->nvectors);
mas01cr@498 203 adb->track_offsets->push_back(offset);
mas01cr@498 204 adb->header->numFiles += 1;
mas01cr@498 205 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 206
mas01cr@498 207 /* 9. sync adb->header with disk. */
mas01cr@498 208 return audiodb_sync_header(adb);
mas01cr@498 209
mas01cr@498 210 error:
mas01cr@498 211 if(l2norm_buffer) {
mas01cr@498 212 free(l2norm_buffer);
mas01cr@498 213 }
mas01cr@498 214 return 1;
mas01cr@498 215 }
mas01cr@498 216
mas01cr@498 217 int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) {
mas01cr@509 218 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 219 return 1;
mas01cr@498 220 } else {
mas01cr@498 221 adb_datum_internal_t d;
mas01cr@498 222 d.nvectors = datum->nvectors;
mas01cr@498 223 d.dim = datum->dim;
mas01cr@498 224 d.key = datum->key;
mas01cr@498 225 d.data = datum->data;
mas01cr@498 226 d.times = datum->times;
mas01cr@498 227 d.power = datum->power;
mas01cr@498 228 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 229 }
mas01cr@498 230 }
mas01cr@498 231
mas01cr@498 232 int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) {
mas01cr@509 233 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 234 return 1;
mas01cr@498 235 } else {
mas01cr@498 236 adb_datum_internal_t d;
mas01cr@498 237 struct stat st;
mas01cr@498 238 int fd;
mas01cr@498 239 off_t size;
mas01mc@324 240
mas01cr@498 241 if((fd = open(reference->features, O_RDONLY)) == -1) {
mas01cr@498 242 return 1;
mas01cr@239 243 }
mas01cr@498 244 if(fstat(fd, &st)) {
mas01cr@498 245 goto error;
mas01cr@239 246 }
mas01cr@498 247 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
mas01cr@498 248 close(fd);
mas01cr@498 249 fd = 0;
mas01cr@498 250 size = st.st_size - sizeof(uint32_t);
mas01cr@498 251 d.nvectors = size / (sizeof(double) * d.dim);
mas01cr@498 252 d.data = (void *) reference->features;
mas01cr@498 253 if(reference->power) {
mas01cr@498 254 if(stat(reference->power, &st)) {
mas01cr@498 255 return 1;
mas01cr@498 256 }
mas01cr@498 257 }
mas01cr@498 258 d.power = (void *) reference->power;
mas01cr@498 259 if(reference->times) {
mas01cr@498 260 if(stat(reference->times, &st)) {
mas01cr@498 261 return 1;
mas01cr@498 262 }
mas01cr@498 263 }
mas01cr@498 264 d.times = (void *) reference->times;
mas01cr@498 265 d.key = reference->key ? reference->key : reference->features;
mas01cr@498 266 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 267 error:
mas01cr@498 268 if(fd) {
mas01cr@498 269 close(fd);
mas01cr@498 270 }
mas01cr@498 271 return 1;
mas01cr@498 272 }
mas01cr@498 273 }
mas01cr@498 274
mas01cr@498 275 int audiodb_free_datum(adb_datum_t *datum) {
mas01cr@498 276 if(datum->data) {
mas01cr@498 277 free(datum->data);
mas01cr@498 278 datum->data = NULL;
mas01cr@498 279 }
mas01cr@498 280 if(datum->power) {
mas01cr@498 281 free(datum->power);
mas01cr@498 282 datum->power = NULL;
mas01cr@498 283 }
mas01cr@498 284 if(datum->times) {
mas01cr@498 285 free(datum->times);
mas01cr@498 286 datum->times = NULL;
mas01cr@498 287 }
mas01cr@498 288 return 0;
mas01cr@498 289 }
mas01cr@498 290
mas01mc@541 291 int audiodb_free_datum_cache(adb_fd_cache_t *cache){
mas01mc@541 292 if(cache){
mas01mc@541 293 if(cache->fname){
mas01mc@541 294 free(cache->fname);
mas01mc@541 295 cache->fname = NULL;
mas01mc@541 296 }
mas01mc@541 297 if(cache->data_fd){
mas01mc@541 298 close(cache->data_fd);
mas01mc@541 299 cache->data_fd = 0;
mas01mc@541 300 }
mas01mc@541 301 if(cache->power_fd){
mas01mc@541 302 close(cache->power_fd);
mas01mc@541 303 cache->power_fd = 0;
mas01mc@541 304 }
mas01mc@541 305 if(cache->times_file){
mas01mc@541 306 fclose(cache->times_file);
mas01mc@541 307 cache->times_file = NULL;
mas01mc@541 308 }
mas01mc@541 309 if(cache->reference){
mas01mc@541 310 audiodb_free_datum_reference(cache->reference);
mas01mc@541 311 cache->reference = NULL;
mas01mc@541 312 }
mas01mc@541 313 }
mas01mc@541 314 return 0;
mas01mc@541 315 }
mas01mc@541 316
mas01mc@541 317 int audiodb_free_datum_reference(adb_reference_t * reference){
mas01mc@541 318 if(reference){
mas01mc@541 319 if(reference->features){
mas01mc@541 320 free((char *)reference->features);
mas01mc@541 321 reference->features = 0;
mas01mc@541 322 }
mas01mc@541 323 if(reference->power){
mas01mc@541 324 free((char *)reference->power);
mas01mc@541 325 reference->power = 0;
mas01mc@541 326 }
mas01mc@541 327 if(reference->times){
mas01mc@541 328 free((char *)reference->times);
mas01mc@541 329 reference->times = 0;
mas01mc@541 330 }
mas01mc@541 331 }
mas01mc@541 332 return 0;
mas01mc@541 333 }
mas01mc@541 334
mas01mc@541 335 int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum, off_t data_offset=0, size_t data_size=0, adb_fd_cache_t *cache=0) {
mas01cr@498 336 int fd = 0;
mas01cr@498 337 FILE *file = NULL;
mas01cr@498 338 struct stat st;
mas01cr@498 339 off_t size;
mas01mc@541 340 bool clear_cache = false;
mas01cr@498 341
mas01mc@541 342 if(!cache){
mas01mc@541 343 datum->data = NULL;
mas01mc@541 344 datum->power = NULL;
mas01mc@541 345 datum->times = NULL;
mas01cr@498 346 }
mas01mc@541 347
mas01mc@541 348 // STEP 1 check if we need to clear the cache
mas01mc@541 349 if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0))
mas01mc@541 350 clear_cache = true;
mas01mc@541 351
mas01mc@541 352 // STEP 2. Clear the cache if necessary
mas01mc@541 353 if(cache && clear_cache){
mas01mc@541 354 close(cache->data_fd);
mas01mc@541 355 cache->data_fd = 0;
mas01mc@541 356 free(cache->fname);
mas01mc@541 357 cache->fname = 0;
mas01mc@541 358 }
mas01mc@541 359
mas01mc@541 360 // STEP 3. Use the cached file descriptor or open a new file descriptor
mas01mc@541 361 if (cache && cache->data_fd ){
mas01mc@541 362 fd = cache->data_fd;
mas01mc@541 363 }
mas01mc@541 364 else{
mas01mc@541 365 if ((fd = open(insert->features, O_RDONLY)) == -1) {
mas01mc@541 366 goto error;
mas01mc@541 367 }
mas01mc@541 368 if(cache){
mas01mc@541 369 cache->fname = (char*) malloc(strlen(insert->features));
mas01mc@541 370 strncpy(cache->fname, insert->features, strlen(insert->features));
mas01mc@541 371 }
mas01mc@541 372 }
mas01mc@541 373
mas01cr@498 374 if(fstat(fd, &st)) {
mas01cr@498 375 goto error;
mas01cr@498 376 }
mas01mc@541 377
mas01mc@541 378 // STEP 4. If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 379 if( !( cache && cache->data_fd ) ){
mas01mc@541 380 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
mas01mc@541 381 if(cache)
mas01mc@541 382 cache->data_fd = fd;
mas01mc@541 383 }
mas01mc@541 384
mas01mc@541 385 // STEP 5. Allocate data memory if necessary, read the requested amount of data
mas01mc@539 386 if(data_size)
mas01mc@539 387 size = data_size;
mas01mc@539 388 else
mas01mc@539 389 size = st.st_size - sizeof(uint32_t);
mas01mc@541 390
mas01cr@498 391 datum->nvectors = size / (sizeof(double) * datum->dim);
mas01mc@541 392
mas01mc@541 393 if(!datum->data){
mas01mc@541 394 datum->data = (double *) malloc(size);
mas01mc@541 395 }
mas01mc@541 396
mas01cr@498 397 if(!datum->data) {
mas01cr@498 398 goto error;
mas01cr@498 399 }
mas01mc@541 400
mas01mc@539 401 if(data_offset)
mas01mc@541 402 lseek(fd, sizeof(uint32_t) + data_offset, SEEK_SET);
mas01cr@498 403 read_or_goto_error(fd, datum->data, size);
mas01mc@541 404
mas01mc@541 405 // STEP 6. Close the file descriptor, unless we are caching it
mas01mc@541 406 if(!cache)
mas01mc@541 407 close(fd);
mas01mc@541 408 fd = 0; // we're done with the data
mas01mc@541 409
mas01cr@498 410 if(insert->power) {
mas01cr@498 411 int dim;
mas01mc@541 412
mas01mc@541 413 // Clear the cache if necessary
mas01mc@541 414 if(clear_cache){
mas01mc@541 415 close(cache->power_fd);
mas01mc@541 416 cache->power_fd = 0;
mas01mc@541 417 }
mas01mc@541 418
mas01mc@541 419 // Use the cached file descriptor or open a new file descriptor
mas01mc@541 420 if (cache && cache->power_fd)
mas01mc@541 421 fd = cache->power_fd;
mas01mc@541 422 else if((fd = open(insert->power, O_RDONLY)) == -1) {
mas01cr@498 423 goto error;
mas01cr@498 424 }
mas01mc@541 425
mas01cr@498 426 if(fstat(fd, &st)) {
mas01cr@498 427 goto error;
mas01cr@498 428 }
mas01mc@541 429
mas01cr@498 430 /* This cast is so non-trivial that it deserves a comment.
mas01cr@498 431 *
mas01cr@498 432 * The data types in this expression, left to right, are: off_t,
mas01cr@498 433 * size_t, off_t, uint32_t. The rules for conversions in
mas01cr@498 434 * arithmetic expressions with mixtures of integral types are
mas01cr@498 435 * essentially that the widest type wins, with unsigned types
mas01cr@498 436 * winning on a tie-break.
mas01cr@498 437 *
mas01cr@498 438 * Because we are enforcing (through the use of sufficient
mas01cr@498 439 * compiler flags, if necessary) that off_t be a (signed) 64-bit
mas01cr@498 440 * type, the only variability in this set of types is in fact the
mas01cr@498 441 * size_t. On 32-bit machines, size_t is uint32_t and so the
mas01cr@498 442 * coercions on both sides of the equality end up promoting
mas01cr@498 443 * everything to int64_t, which is fine. On 64-bit machines,
mas01cr@498 444 * however, the left hand side is promoted to a uint64_t, while
mas01cr@498 445 * the right hand side remains int64_t.
mas01cr@498 446 *
mas01cr@498 447 * The mixture of signed and unsigned types in comparisons is Evil
mas01cr@498 448 * Bad and Wrong, and gcc complains about it. (It's right to do
mas01cr@498 449 * so, actually). Of course in this case it will never matter
mas01cr@498 450 * because of the particular relationships between all of these
mas01cr@498 451 * numbers, so we just cast the left hand side to off_t, which
mas01cr@498 452 * will do the right thing for us on all platforms.
mas01cr@498 453 *
mas01cr@498 454 * I hate C.
mas01cr@498 455 */
mas01mc@541 456
mas01mc@539 457 if( (!data_size) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
mas01cr@498 458 goto error;
mas01cr@498 459 }
mas01mc@541 460
mas01mc@541 461 // If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 462 if( !( cache && cache->power_fd ) ){
mas01mc@541 463 read_or_goto_error(fd, &dim, sizeof(uint32_t));
mas01mc@541 464 if(dim != 1) {
mas01mc@541 465 goto error;
mas01mc@541 466 }
mas01mc@541 467 if(cache)
mas01mc@541 468 cache->power_fd = fd;
mas01cr@498 469 }
mas01mc@541 470
mas01mc@541 471 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 472 if(!datum->power)
mas01mc@541 473 datum->power = (double *) malloc(size / datum->dim);
mas01cr@498 474 if(!datum->power) {
mas01cr@498 475 goto error;
mas01cr@498 476 }
mas01mc@541 477
mas01mc@539 478 if(data_offset)
mas01mc@541 479 lseek(fd, sizeof(uint32_t) + data_offset/datum->dim, SEEK_SET);
mas01mc@541 480
mas01cr@498 481 read_or_goto_error(fd, datum->power, size / datum->dim);
mas01mc@541 482
mas01mc@541 483 if(!cache)
mas01mc@541 484 close(fd);
mas01mc@541 485 fd = 0;
mas01cr@498 486 }
mas01mc@541 487
mas01cr@498 488 if(insert->times) {
mas01cr@498 489 double t, *tp;
mas01mc@541 490
mas01mc@541 491 // Clear the cache if necessary
mas01mc@541 492 if(clear_cache){
mas01mc@541 493 fclose(cache->times_file);
mas01mc@541 494 cache->times_file = 0;
mas01cr@498 495 }
mas01mc@541 496
mas01mc@541 497 // Use the cached file descriptor or open a new file descriptor and maybe cache
mas01mc@541 498 if (cache && cache->times_file)
mas01mc@541 499 file = cache->times_file;
mas01mc@541 500 else{
mas01mc@541 501 if(!(file = fopen(insert->times, "r"))) {
mas01mc@541 502 goto error;
mas01mc@541 503 }
mas01mc@541 504 if(cache)
mas01mc@541 505 cache->times_file = file;
mas01mc@541 506 }
mas01mc@541 507
mas01mc@541 508 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 509 if(!datum->times)
mas01mc@541 510 datum->times = (double *) malloc(2 * size / datum->dim);
mas01cr@498 511 if(!datum->times) {
mas01cr@498 512 goto error;
mas01cr@498 513 }
mas01mc@541 514
mas01mc@541 515 rewind(file);
mas01cr@498 516 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 517 goto error;
mas01cr@498 518 }
mas01mc@539 519 if(data_offset)
mas01mc@541 520 while(data_offset-- != 1 )
mas01mc@539 521 if(fscanf(file, " %lf", &t) != 1)
mas01mc@539 522 goto error;
mas01cr@498 523 tp = datum->times;
mas01cr@498 524 *tp++ = t;
mas01cr@498 525 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
mas01cr@498 526 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 527 goto error;
mas01cr@498 528 }
mas01cr@498 529 *tp++ = t;
mas01cr@498 530 *tp++ = t;
mas01cr@498 531 }
mas01cr@498 532 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 533 goto error;
mas01cr@498 534 }
mas01cr@498 535 *tp = t;
mas01mc@541 536 if(!cache){
mas01mc@541 537 fclose(file);
mas01mc@541 538 file=0;
mas01mc@541 539 }
mas01cr@498 540 }
mas01cr@498 541 datum->key = insert->key ? insert->key : insert->features;
mas01cr@498 542 return 0;
mas01cr@498 543
mas01cr@498 544 error:
mas01cr@498 545 if(fd > 0) {
mas01cr@498 546 close(fd);
mas01cr@498 547 }
mas01cr@498 548 if(file) {
mas01cr@498 549 fclose(file);
mas01cr@498 550 }
mas01cr@498 551 audiodb_free_datum(datum);
mas01mc@541 552 if(cache)
mas01mc@541 553 audiodb_free_datum_cache(cache);
mas01cr@498 554 return 1;
mas01mc@541 555 }
mas01cr@498 556
mas01cr@498 557 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
mas01cr@509 558 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 559 adb_reference_t *reference = insert;
mas01cr@498 560 int err;
mas01cr@498 561 err = audiodb_insert_reference(adb, reference);
mas01cr@498 562
mas01cr@498 563 if(err == 2) {
mas01cr@498 564 return 0;
mas01cr@498 565 } else {
mas01cr@498 566 return err;
mas01cr@498 567 }
mas01cr@498 568 } else {
mas01cr@498 569 adb_datum_t datum;
mas01cr@498 570 int err;
mas01cr@498 571
mas01cr@498 572 if(audiodb_insert_create_datum(insert, &datum)) {
mas01cr@498 573 return 1;
mas01cr@498 574 }
mas01cr@498 575 err = audiodb_insert_datum(adb, &datum);
mas01cr@498 576 audiodb_free_datum(&datum);
mas01cr@498 577
mas01cr@498 578 if(err == 2) {
mas01cr@498 579 return 0;
mas01cr@498 580 } else {
mas01cr@498 581 return err;
mas01cr@239 582 }
mas01cr@239 583 }
mas01cr@239 584 }
mas01cr@239 585
mas01cr@498 586 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
mas01cr@498 587 int err;
mas01cr@498 588 for(unsigned int n = 0; n < size; n++) {
mas01cr@498 589 if((err = audiodb_insert(adb, &(insert[n])))) {
mas01cr@498 590 return err;
mas01cr@498 591 }
mas01mc@324 592 }
mas01cr@498 593 return 0;
mas01cr@239 594 }