annotate insert.cpp @ 545:bf89c80ec4cc multiprobeLSH

Expanded interface to audiodb_insert_create_datum() and audiodb_track_id_datum() to include _offset versions of both. Default cache is NULL, but offset and size parameters required.
author mas01mc
date Sun, 08 Feb 2009 15:53:57 +0000
parents 79ffab663ace
children e8193805ce42
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@498 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@498 6 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
mas01cr@498 7 adb_header_t *header = adb->header;
mas01cr@509 8 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 9 return true;
mas01cr@498 10 } else {
mas01cr@498 11 /* FIXME: timesTableOffset isn't necessarily the next biggest
mas01cr@498 12 * offset after dataOffset. Maybe make the offsets into an array
mas01cr@498 13 * that we can iterate over... */
mas01cr@498 14 return (header->timesTableOffset >
mas01cr@498 15 (header->dataOffset + header->length + size));
mas01cr@239 16 }
mas01cr@239 17 }
mas01cr@239 18
mas01cr@498 19 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
mas01cr@498 20 /* FIXME: the comment above about the ordering of the tables applies
mas01cr@498 21 here too. */
mas01cr@498 22 adb_header_t *header = adb->header;
mas01cr@498 23 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
mas01cr@498 24 off_t track_table_length = header->dataOffset - header->trackTableOffset;
mas01cr@509 25 int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 26 int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE;
mas01cr@498 27 /* maxfiles is the _minimum_ of the two. Do not be confused... */
mas01cr@498 28 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@509 29 if(header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 30 /* by default, these tables are created with the same size as the
mas01cr@498 31 * fileTable (which should be called key_table); relying on that
mas01cr@498 32 * always being the case, though, smacks of optimism, so instead
mas01cr@498 33 * we code defensively... */
mas01cr@498 34 off_t data_table_length = header->timesTableOffset - header->dataOffset;
mas01cr@498 35 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
mas01cr@498 36 off_t power_table_length = header->dbSize - header->powerTableOffset;
mas01cr@509 37 int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 38 int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@509 39 int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 40 /* ... even though it means a certain amount of tedium. */
mas01cr@498 41 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
mas01cr@498 42 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
mas01cr@498 43 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
mas01cr@498 44 }
mas01cr@498 45 return (header->numFiles < (unsigned int) maxfiles);
mas01cr@498 46 }
mas01cr@498 47
mas01cr@498 48 /*
mas01cr@498 49 * Hey, look, a comment. Normally I wouldn't bother, as the code
mas01cr@498 50 * should be self-documenting, but a lot of logic is concentrated in
mas01cr@498 51 * this one place, so let's give an overview beforehand. To insert a
mas01cr@498 52 * datum into the database, we:
mas01cr@498 53 *
mas01cr@498 54 * 1. check write permission;
mas01cr@498 55 * 2. check for enough space;
mas01cr@498 56 * 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 57 * header dimension is zero, in which case write datum->dim to
mas01cr@498 58 * adb->header->dim).
mas01cr@498 59 * 4. check for presence of datum->key in adb->keymap;
mas01cr@509 60 * 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 61 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 62 * 6. write in data, power, times as appropriate; add to track
mas01cr@498 63 * and key tables too;
mas01cr@509 64 * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 65 * compute norms and fill in table;
mas01cr@498 66 * 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 67 * adb->track_offsets and adb->header;
mas01cr@498 68 * 9. sync adb->header with disk.
mas01cr@498 69 *
mas01cr@498 70 * Step 9 essentially commits the transaction; until we update
mas01cr@498 71 * header->length, nothing will recognize the newly-written data. In
mas01cr@498 72 * principle, if it fails, we should roll back, which we can in fact
mas01cr@498 73 * do on the assumption that nothing in step 8 can ever fail; on the
mas01cr@498 74 * other hand, if it's failed, then it's unlikely that rolling back by
mas01cr@498 75 * syncing the original header back to disk is going to work
mas01cr@498 76 * desperately well. We should perhaps take an operating-system lock
mas01cr@498 77 * around step 9, so that we can't be interrupted part-way through
mas01cr@498 78 * (except of course for SIGKILL, but if we're hit with that we will
mas01cr@498 79 * always lose).
mas01cr@498 80 */
mas01cr@498 81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
mas01cr@498 82
mas01cr@498 83 off_t size, offset, nfiles;
mas01cr@498 84 double *l2norm_buffer = NULL;
mas01cr@498 85
mas01cr@498 86 /* 1. check write permission; */
mas01cr@498 87 if(!(adb->flags & O_RDWR)) {
mas01cr@498 88 return 1;
mas01cr@498 89 }
mas01cr@498 90 /* 2. check for enough space; */
mas01cr@498 91 size = sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 92 if(!audiodb_enough_data_space_free(adb, size)) {
mas01cr@498 93 return 1;
mas01cr@498 94 }
mas01cr@498 95 if(!audiodb_enough_per_file_space_free(adb)) {
mas01cr@498 96 return 1;
mas01cr@498 97 }
mas01cr@498 98 /* 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@498 99 * header dimension is zero, in which case write datum->dim to
mas01cr@498 100 * adb->header->dim).
mas01cr@498 101 */
mas01cr@498 102 if(adb->header->dim == 0) {
mas01cr@498 103 adb->header->dim = datum->dim;
mas01cr@498 104 } else if (adb->header->dim != datum->dim) {
mas01cr@498 105 return 1;
mas01cr@498 106 }
mas01cr@498 107 /* 4. check for presence of datum->key in adb->keymap; */
mas01cr@498 108 if(adb->keymap->count(datum->key)) {
mas01cr@498 109 /* not part of an explicit API/ABI, but we need a distinguished
mas01cr@498 110 value in this circumstance to preserve somewhat wonky behaviour
mas01cr@498 111 of audioDB::batchinsert. */
mas01cr@498 112 return 2;
mas01cr@498 113 }
mas01cr@509 114 /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER,
mas01cr@509 115 * and times and ADB_HEADER_FLAG_TIMES;
mas01cr@498 116 */
mas01cr@509 117 if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) ||
mas01cr@509 118 ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) {
mas01cr@498 119 return 1;
mas01cr@498 120 }
mas01cr@509 121 if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) {
mas01cr@498 122 if(adb->header->numFiles == 0) {
mas01cr@509 123 adb->header->flags |= ADB_HEADER_FLAG_TIMES;
mas01cr@498 124 } else {
mas01cr@498 125 return 1;
mas01cr@239 126 }
mas01cr@509 127 } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) {
mas01cr@498 128 return 1;
mas01cr@498 129 }
mas01cr@498 130 /* 6. write in data, power, times as appropriate; add to track
mas01cr@498 131 * and key tables too;
mas01cr@498 132 */
mas01cr@498 133 offset = adb->header->length;
mas01cr@498 134 nfiles = adb->header->numFiles;
mas01cr@498 135
mas01cr@498 136 /* FIXME: checking for all these lseek()s */
mas01cr@509 137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
mas01cr@509 139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@509 140 write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE);
mas01cr@509 141 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 142 char cwd[PATH_MAX];
mas01cr@498 143 char slash = '/';
mas01cr@498 144
mas01cr@498 145 if(!getcwd(cwd, PATH_MAX)) {
mas01cr@498 146 goto error;
mas01cr@498 147 }
mas01cr@509 148 lseek(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 149 if(*((char *) datum->data) != '/') {
mas01cr@498 150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 151 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 152 }
mas01cr@498 153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
mas01cr@498 154 if(datum->power) {
mas01cr@509 155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 156 if(*((char *) datum->power) != '/') {
mas01cr@498 157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 158 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 159 }
mas01cr@498 160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
mas01cr@498 161 }
mas01cr@498 162 if(datum->times) {
mas01cr@509 163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@498 164 if(*((char *) datum->times) != '/') {
mas01cr@498 165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@498 166 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@498 167 }
mas01cr@498 168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
mas01cr@498 169 }
mas01cr@498 170 } else {
mas01cr@498 171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
mas01cr@498 172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
mas01cr@498 173 if(datum->power) {
mas01cr@498 174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
mas01cr@498 176 }
mas01cr@498 177 if(datum->times) {
mas01cr@498 178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
mas01cr@498 179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
mas01cr@498 180 }
mas01cr@498 181 }
mas01cr@498 182
mas01cr@509 183 /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES,
mas01cr@509 184 * compute norms and fill in table;
mas01cr@498 185 */
mas01cr@509 186 if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) &&
mas01cr@509 187 !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
mas01mc@324 189
mas01cr@498 190 audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer);
mas01cr@498 191 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@498 192 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
mas01cr@498 193 free(l2norm_buffer);
mas01cr@498 194 l2norm_buffer = NULL;
mas01cr@498 195 }
mas01cr@498 196
mas01cr@498 197 /* 8. update adb->keys, adb->keymap, adb->track_lengths,
mas01cr@498 198 * adb->track_offsets and adb->header;
mas01cr@498 199 */
mas01cr@498 200 adb->keys->push_back(datum->key);
mas01cr@498 201 (*adb->keymap)[datum->key] = adb->header->numFiles;
mas01cr@498 202 adb->track_lengths->push_back(datum->nvectors);
mas01cr@498 203 adb->track_offsets->push_back(offset);
mas01cr@498 204 adb->header->numFiles += 1;
mas01cr@498 205 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
mas01cr@498 206
mas01cr@498 207 /* 9. sync adb->header with disk. */
mas01cr@498 208 return audiodb_sync_header(adb);
mas01cr@498 209
mas01cr@498 210 error:
mas01cr@498 211 if(l2norm_buffer) {
mas01cr@498 212 free(l2norm_buffer);
mas01cr@498 213 }
mas01cr@498 214 return 1;
mas01cr@498 215 }
mas01cr@498 216
mas01cr@498 217 int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) {
mas01cr@509 218 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 219 return 1;
mas01cr@498 220 } else {
mas01cr@498 221 adb_datum_internal_t d;
mas01cr@498 222 d.nvectors = datum->nvectors;
mas01cr@498 223 d.dim = datum->dim;
mas01cr@498 224 d.key = datum->key;
mas01cr@498 225 d.data = datum->data;
mas01cr@498 226 d.times = datum->times;
mas01cr@498 227 d.power = datum->power;
mas01cr@498 228 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 229 }
mas01cr@498 230 }
mas01cr@498 231
mas01cr@498 232 int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) {
mas01cr@509 233 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 234 return 1;
mas01cr@498 235 } else {
mas01cr@498 236 adb_datum_internal_t d;
mas01cr@498 237 struct stat st;
mas01cr@498 238 int fd;
mas01cr@498 239 off_t size;
mas01mc@324 240
mas01cr@498 241 if((fd = open(reference->features, O_RDONLY)) == -1) {
mas01cr@498 242 return 1;
mas01cr@239 243 }
mas01cr@498 244 if(fstat(fd, &st)) {
mas01cr@498 245 goto error;
mas01cr@239 246 }
mas01cr@498 247 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
mas01cr@498 248 close(fd);
mas01cr@498 249 fd = 0;
mas01cr@498 250 size = st.st_size - sizeof(uint32_t);
mas01cr@498 251 d.nvectors = size / (sizeof(double) * d.dim);
mas01cr@498 252 d.data = (void *) reference->features;
mas01cr@498 253 if(reference->power) {
mas01cr@498 254 if(stat(reference->power, &st)) {
mas01cr@498 255 return 1;
mas01cr@498 256 }
mas01cr@498 257 }
mas01cr@498 258 d.power = (void *) reference->power;
mas01cr@498 259 if(reference->times) {
mas01cr@498 260 if(stat(reference->times, &st)) {
mas01cr@498 261 return 1;
mas01cr@498 262 }
mas01cr@498 263 }
mas01cr@498 264 d.times = (void *) reference->times;
mas01cr@498 265 d.key = reference->key ? reference->key : reference->features;
mas01cr@498 266 return audiodb_insert_datum_internal(adb, &d);
mas01cr@498 267 error:
mas01cr@498 268 if(fd) {
mas01cr@498 269 close(fd);
mas01cr@498 270 }
mas01cr@498 271 return 1;
mas01cr@498 272 }
mas01cr@498 273 }
mas01cr@498 274
mas01cr@498 275 int audiodb_free_datum(adb_datum_t *datum) {
mas01cr@498 276 if(datum->data) {
mas01cr@498 277 free(datum->data);
mas01cr@498 278 datum->data = NULL;
mas01cr@498 279 }
mas01cr@498 280 if(datum->power) {
mas01cr@498 281 free(datum->power);
mas01cr@498 282 datum->power = NULL;
mas01cr@498 283 }
mas01cr@498 284 if(datum->times) {
mas01cr@498 285 free(datum->times);
mas01cr@498 286 datum->times = NULL;
mas01cr@498 287 }
mas01cr@498 288 return 0;
mas01cr@498 289 }
mas01cr@498 290
mas01mc@541 291 int audiodb_free_datum_cache(adb_fd_cache_t *cache){
mas01mc@541 292 if(cache){
mas01mc@541 293 if(cache->fname){
mas01mc@541 294 free(cache->fname);
mas01mc@541 295 cache->fname = NULL;
mas01mc@541 296 }
mas01mc@541 297 if(cache->data_fd){
mas01mc@541 298 close(cache->data_fd);
mas01mc@541 299 cache->data_fd = 0;
mas01mc@541 300 }
mas01mc@541 301 if(cache->power_fd){
mas01mc@541 302 close(cache->power_fd);
mas01mc@541 303 cache->power_fd = 0;
mas01mc@541 304 }
mas01mc@541 305 if(cache->times_file){
mas01mc@541 306 fclose(cache->times_file);
mas01mc@541 307 cache->times_file = NULL;
mas01mc@541 308 }
mas01mc@541 309 if(cache->reference){
mas01mc@541 310 audiodb_free_datum_reference(cache->reference);
mas01mc@542 311 free(cache->reference);
mas01mc@541 312 cache->reference = NULL;
mas01mc@541 313 }
mas01mc@541 314 }
mas01mc@541 315 return 0;
mas01mc@541 316 }
mas01mc@541 317
mas01mc@541 318 int audiodb_free_datum_reference(adb_reference_t * reference){
mas01mc@541 319 if(reference){
mas01mc@541 320 if(reference->features){
mas01mc@541 321 free((char *)reference->features);
mas01mc@541 322 reference->features = 0;
mas01mc@541 323 }
mas01mc@541 324 if(reference->power){
mas01mc@541 325 free((char *)reference->power);
mas01mc@541 326 reference->power = 0;
mas01mc@541 327 }
mas01mc@541 328 if(reference->times){
mas01mc@541 329 free((char *)reference->times);
mas01mc@541 330 reference->times = 0;
mas01mc@541 331 }
mas01mc@541 332 }
mas01mc@541 333 return 0;
mas01mc@541 334 }
mas01mc@541 335
mas01mc@545 336 int audiodb_insert_create_datum(adb_insert_t * insert, adb_datum_t *datum){
mas01mc@545 337 return audiodb_insert_create_datum_offset(insert, datum, 0, 0, 0);
mas01mc@545 338 }
mas01mc@545 339
mas01mc@545 340 int audiodb_insert_create_datum_offset(adb_insert_t *insert, adb_datum_t *datum, off_t data_offset, size_t data_size, adb_fd_cache_t *cache) {
mas01cr@498 341 int fd = 0;
mas01cr@498 342 FILE *file = NULL;
mas01cr@498 343 struct stat st;
mas01cr@498 344 off_t size;
mas01mc@541 345 bool clear_cache = false;
mas01cr@498 346
mas01mc@541 347 if(!cache){
mas01mc@541 348 datum->data = NULL;
mas01mc@541 349 datum->power = NULL;
mas01mc@541 350 datum->times = NULL;
mas01cr@498 351 }
mas01mc@541 352
mas01mc@541 353 // STEP 1 check if we need to clear the cache
mas01mc@541 354 if(cache && (cache->fname && strncmp(cache->fname, insert->features, strlen(insert->features))!=0))
mas01mc@541 355 clear_cache = true;
mas01mc@541 356
mas01mc@541 357 // STEP 2. Clear the cache if necessary
mas01mc@541 358 if(cache && clear_cache){
mas01mc@541 359 close(cache->data_fd);
mas01mc@541 360 cache->data_fd = 0;
mas01mc@541 361 free(cache->fname);
mas01mc@541 362 cache->fname = 0;
mas01mc@541 363 }
mas01mc@541 364
mas01mc@541 365 // STEP 3. Use the cached file descriptor or open a new file descriptor
mas01mc@541 366 if (cache && cache->data_fd ){
mas01mc@541 367 fd = cache->data_fd;
mas01mc@541 368 }
mas01mc@541 369 else{
mas01mc@541 370 if ((fd = open(insert->features, O_RDONLY)) == -1) {
mas01mc@541 371 goto error;
mas01mc@541 372 }
mas01mc@541 373 if(cache){
mas01mc@541 374 cache->fname = (char*) malloc(strlen(insert->features));
mas01mc@541 375 strncpy(cache->fname, insert->features, strlen(insert->features));
mas01mc@541 376 }
mas01mc@541 377 }
mas01mc@541 378
mas01cr@498 379 if(fstat(fd, &st)) {
mas01cr@498 380 goto error;
mas01cr@498 381 }
mas01mc@541 382
mas01mc@541 383 // STEP 4. If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 384 if( !( cache && cache->data_fd ) ){
mas01mc@541 385 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
mas01mc@541 386 if(cache)
mas01mc@541 387 cache->data_fd = fd;
mas01mc@541 388 }
mas01mc@541 389
mas01mc@541 390 // STEP 5. Allocate data memory if necessary, read the requested amount of data
mas01mc@539 391 if(data_size)
mas01mc@539 392 size = data_size;
mas01mc@539 393 else
mas01mc@539 394 size = st.st_size - sizeof(uint32_t);
mas01mc@541 395
mas01cr@498 396 datum->nvectors = size / (sizeof(double) * datum->dim);
mas01mc@541 397
mas01mc@541 398 if(!datum->data){
mas01mc@541 399 datum->data = (double *) malloc(size);
mas01mc@541 400 }
mas01mc@541 401
mas01cr@498 402 if(!datum->data) {
mas01cr@498 403 goto error;
mas01cr@498 404 }
mas01mc@541 405
mas01mc@539 406 if(data_offset)
mas01mc@541 407 lseek(fd, sizeof(uint32_t) + data_offset, SEEK_SET);
mas01cr@498 408 read_or_goto_error(fd, datum->data, size);
mas01mc@541 409
mas01mc@541 410 // STEP 6. Close the file descriptor, unless we are caching it
mas01mc@541 411 if(!cache)
mas01mc@541 412 close(fd);
mas01mc@541 413 fd = 0; // we're done with the data
mas01mc@541 414
mas01cr@498 415 if(insert->power) {
mas01cr@498 416 int dim;
mas01mc@541 417
mas01mc@541 418 // Clear the cache if necessary
mas01mc@541 419 if(clear_cache){
mas01mc@541 420 close(cache->power_fd);
mas01mc@541 421 cache->power_fd = 0;
mas01mc@541 422 }
mas01mc@541 423
mas01mc@541 424 // Use the cached file descriptor or open a new file descriptor
mas01mc@541 425 if (cache && cache->power_fd)
mas01mc@541 426 fd = cache->power_fd;
mas01mc@541 427 else if((fd = open(insert->power, O_RDONLY)) == -1) {
mas01cr@498 428 goto error;
mas01cr@498 429 }
mas01mc@541 430
mas01cr@498 431 if(fstat(fd, &st)) {
mas01cr@498 432 goto error;
mas01cr@498 433 }
mas01mc@541 434
mas01cr@498 435 /* This cast is so non-trivial that it deserves a comment.
mas01cr@498 436 *
mas01cr@498 437 * The data types in this expression, left to right, are: off_t,
mas01cr@498 438 * size_t, off_t, uint32_t. The rules for conversions in
mas01cr@498 439 * arithmetic expressions with mixtures of integral types are
mas01cr@498 440 * essentially that the widest type wins, with unsigned types
mas01cr@498 441 * winning on a tie-break.
mas01cr@498 442 *
mas01cr@498 443 * Because we are enforcing (through the use of sufficient
mas01cr@498 444 * compiler flags, if necessary) that off_t be a (signed) 64-bit
mas01cr@498 445 * type, the only variability in this set of types is in fact the
mas01cr@498 446 * size_t. On 32-bit machines, size_t is uint32_t and so the
mas01cr@498 447 * coercions on both sides of the equality end up promoting
mas01cr@498 448 * everything to int64_t, which is fine. On 64-bit machines,
mas01cr@498 449 * however, the left hand side is promoted to a uint64_t, while
mas01cr@498 450 * the right hand side remains int64_t.
mas01cr@498 451 *
mas01cr@498 452 * The mixture of signed and unsigned types in comparisons is Evil
mas01cr@498 453 * Bad and Wrong, and gcc complains about it. (It's right to do
mas01cr@498 454 * so, actually). Of course in this case it will never matter
mas01cr@498 455 * because of the particular relationships between all of these
mas01cr@498 456 * numbers, so we just cast the left hand side to off_t, which
mas01cr@498 457 * will do the right thing for us on all platforms.
mas01cr@498 458 *
mas01cr@498 459 * I hate C.
mas01cr@498 460 */
mas01mc@541 461
mas01mc@539 462 if( (!data_size) && ((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) {
mas01cr@498 463 goto error;
mas01cr@498 464 }
mas01mc@541 465
mas01mc@541 466 // If file descriptor is new, read the dimensionality, maybe cache the file descriptor
mas01mc@541 467 if( !( cache && cache->power_fd ) ){
mas01mc@541 468 read_or_goto_error(fd, &dim, sizeof(uint32_t));
mas01mc@541 469 if(dim != 1) {
mas01mc@541 470 goto error;
mas01mc@541 471 }
mas01mc@541 472 if(cache)
mas01mc@541 473 cache->power_fd = fd;
mas01cr@498 474 }
mas01mc@541 475
mas01mc@541 476 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 477 if(!datum->power)
mas01mc@541 478 datum->power = (double *) malloc(size / datum->dim);
mas01cr@498 479 if(!datum->power) {
mas01cr@498 480 goto error;
mas01cr@498 481 }
mas01mc@541 482
mas01mc@539 483 if(data_offset)
mas01mc@541 484 lseek(fd, sizeof(uint32_t) + data_offset/datum->dim, SEEK_SET);
mas01mc@541 485
mas01cr@498 486 read_or_goto_error(fd, datum->power, size / datum->dim);
mas01mc@541 487
mas01mc@541 488 if(!cache)
mas01mc@541 489 close(fd);
mas01mc@541 490 fd = 0;
mas01cr@498 491 }
mas01mc@541 492
mas01cr@498 493 if(insert->times) {
mas01cr@498 494 double t, *tp;
mas01mc@541 495
mas01mc@541 496 // Clear the cache if necessary
mas01mc@541 497 if(clear_cache){
mas01mc@541 498 fclose(cache->times_file);
mas01mc@541 499 cache->times_file = 0;
mas01cr@498 500 }
mas01mc@541 501
mas01mc@541 502 // Use the cached file descriptor or open a new file descriptor and maybe cache
mas01mc@541 503 if (cache && cache->times_file)
mas01mc@541 504 file = cache->times_file;
mas01mc@541 505 else{
mas01mc@541 506 if(!(file = fopen(insert->times, "r"))) {
mas01mc@541 507 goto error;
mas01mc@541 508 }
mas01mc@541 509 if(cache)
mas01mc@541 510 cache->times_file = file;
mas01mc@541 511 }
mas01mc@541 512
mas01mc@541 513 // Allocate data memory if necessary, read the requested amount of data
mas01mc@541 514 if(!datum->times)
mas01mc@541 515 datum->times = (double *) malloc(2 * size / datum->dim);
mas01cr@498 516 if(!datum->times) {
mas01cr@498 517 goto error;
mas01cr@498 518 }
mas01mc@541 519
mas01mc@541 520 rewind(file);
mas01cr@498 521 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 522 goto error;
mas01cr@498 523 }
mas01mc@539 524 if(data_offset)
mas01mc@541 525 while(data_offset-- != 1 )
mas01mc@539 526 if(fscanf(file, " %lf", &t) != 1)
mas01mc@539 527 goto error;
mas01cr@498 528 tp = datum->times;
mas01cr@498 529 *tp++ = t;
mas01cr@498 530 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
mas01cr@498 531 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 532 goto error;
mas01cr@498 533 }
mas01cr@498 534 *tp++ = t;
mas01cr@498 535 *tp++ = t;
mas01cr@498 536 }
mas01cr@498 537 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@498 538 goto error;
mas01cr@498 539 }
mas01cr@498 540 *tp = t;
mas01mc@541 541 if(!cache){
mas01mc@541 542 fclose(file);
mas01mc@541 543 file=0;
mas01mc@541 544 }
mas01cr@498 545 }
mas01cr@498 546 datum->key = insert->key ? insert->key : insert->features;
mas01cr@498 547 return 0;
mas01cr@498 548
mas01cr@498 549 error:
mas01cr@498 550 if(fd > 0) {
mas01cr@498 551 close(fd);
mas01cr@498 552 }
mas01cr@498 553 if(file) {
mas01cr@498 554 fclose(file);
mas01cr@498 555 }
mas01cr@498 556 audiodb_free_datum(datum);
mas01mc@541 557 if(cache)
mas01mc@541 558 audiodb_free_datum_cache(cache);
mas01cr@498 559 return 1;
mas01mc@541 560 }
mas01cr@498 561
mas01cr@498 562 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
mas01cr@509 563 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 564 adb_reference_t *reference = insert;
mas01cr@498 565 int err;
mas01cr@498 566 err = audiodb_insert_reference(adb, reference);
mas01cr@498 567
mas01cr@498 568 if(err == 2) {
mas01cr@498 569 return 0;
mas01cr@498 570 } else {
mas01cr@498 571 return err;
mas01cr@498 572 }
mas01cr@498 573 } else {
mas01cr@498 574 adb_datum_t datum;
mas01cr@498 575 int err;
mas01cr@498 576
mas01cr@498 577 if(audiodb_insert_create_datum(insert, &datum)) {
mas01cr@498 578 return 1;
mas01cr@498 579 }
mas01cr@498 580 err = audiodb_insert_datum(adb, &datum);
mas01cr@498 581 audiodb_free_datum(&datum);
mas01cr@498 582
mas01cr@498 583 if(err == 2) {
mas01cr@498 584 return 0;
mas01cr@498 585 } else {
mas01cr@498 586 return err;
mas01cr@239 587 }
mas01cr@239 588 }
mas01cr@239 589 }
mas01cr@239 590
mas01cr@498 591 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
mas01cr@498 592 int err;
mas01cr@498 593 for(unsigned int n = 0; n < size; n++) {
mas01cr@498 594 if((err = audiodb_insert(adb, &(insert[n])))) {
mas01cr@498 595 return err;
mas01cr@498 596 }
mas01mc@324 597 }
mas01cr@498 598 return 0;
mas01cr@239 599 }