mas01cr@498: extern "C" { mas01cr@498: #include "audioDB_API.h" mas01cr@498: } mas01cr@498: #include "audioDB-internals.h" mas01cr@239: mas01cr@498: static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) { mas01cr@498: adb_header_t *header = adb->header; mas01cr@509: if(header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@498: return true; mas01cr@498: } else { mas01cr@498: /* FIXME: timesTableOffset isn't necessarily the next biggest mas01cr@498: * offset after dataOffset. Maybe make the offsets into an array mas01cr@498: * that we can iterate over... */ mas01cr@498: return (header->timesTableOffset > mas01cr@498: (header->dataOffset + header->length + size)); mas01cr@239: } mas01cr@239: } mas01cr@239: mas01cr@498: static bool audiodb_enough_per_file_space_free(adb_t *adb) { mas01cr@498: /* FIXME: the comment above about the ordering of the tables applies mas01cr@498: here too. */ mas01cr@498: adb_header_t *header = adb->header; mas01cr@498: off_t file_table_length = header->trackTableOffset - header->fileTableOffset; mas01cr@498: off_t track_table_length = header->dataOffset - header->trackTableOffset; mas01cr@509: int fmaxfiles = file_table_length / ADB_FILETABLE_ENTRY_SIZE; mas01cr@509: int tmaxfiles = track_table_length / ADB_TRACKTABLE_ENTRY_SIZE; mas01cr@498: /* maxfiles is the _minimum_ of the two. Do not be confused... */ mas01cr@498: int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles; mas01cr@509: if(header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@498: /* by default, these tables are created with the same size as the mas01cr@498: * fileTable (which should be called key_table); relying on that mas01cr@498: * always being the case, though, smacks of optimism, so instead mas01cr@498: * we code defensively... */ mas01cr@498: off_t data_table_length = header->timesTableOffset - header->dataOffset; mas01cr@498: off_t times_table_length = header->powerTableOffset - header->timesTableOffset; mas01cr@498: off_t power_table_length = header->dbSize - header->powerTableOffset; mas01cr@509: int dmaxfiles = data_table_length / ADB_FILETABLE_ENTRY_SIZE; mas01cr@509: int timaxfiles = times_table_length / ADB_FILETABLE_ENTRY_SIZE; mas01cr@509: int pmaxfiles = power_table_length / ADB_FILETABLE_ENTRY_SIZE; mas01cr@498: /* ... even though it means a certain amount of tedium. */ mas01cr@498: maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles; mas01cr@498: maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles; mas01cr@498: maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles; mas01cr@498: } mas01cr@498: return (header->numFiles < (unsigned int) maxfiles); mas01cr@498: } mas01cr@498: mas01cr@498: /* mas01cr@498: * Hey, look, a comment. Normally I wouldn't bother, as the code mas01cr@498: * should be self-documenting, but a lot of logic is concentrated in mas01cr@498: * this one place, so let's give an overview beforehand. To insert a mas01cr@498: * datum into the database, we: mas01cr@498: * mas01cr@498: * 1. check write permission; mas01cr@498: * 2. check for enough space; mas01cr@498: * 3. check that datum->dim and adb->header->dim agree (or that the mas01cr@498: * header dimension is zero, in which case write datum->dim to mas01cr@498: * adb->header->dim). mas01cr@498: * 4. check for presence of datum->key in adb->keymap; mas01cr@509: * 5. check for consistency between power and ADB_HEADER_FLAG_POWER, mas01cr@509: * and times and ADB_HEADER_FLAG_TIMES; mas01cr@498: * 6. write in data, power, times as appropriate; add to track mas01cr@498: * and key tables too; mas01cr@509: * 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES, mas01cr@509: * compute norms and fill in table; mas01cr@498: * 8. update adb->keys, adb->keymap, adb->track_lengths, mas01cr@498: * adb->track_offsets and adb->header; mas01cr@498: * 9. sync adb->header with disk. mas01cr@498: * mas01cr@498: * Step 9 essentially commits the transaction; until we update mas01cr@498: * header->length, nothing will recognize the newly-written data. In mas01cr@498: * principle, if it fails, we should roll back, which we can in fact mas01cr@498: * do on the assumption that nothing in step 8 can ever fail; on the mas01cr@498: * other hand, if it's failed, then it's unlikely that rolling back by mas01cr@498: * syncing the original header back to disk is going to work mas01cr@498: * desperately well. We should perhaps take an operating-system lock mas01cr@498: * around step 9, so that we can't be interrupted part-way through mas01cr@498: * (except of course for SIGKILL, but if we're hit with that we will mas01cr@498: * always lose). mas01cr@498: */ mas01cr@498: static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) { mas01cr@498: mas01cr@498: off_t size, offset, nfiles; mas01cr@498: double *l2norm_buffer = NULL; mas01cr@498: mas01cr@498: /* 1. check write permission; */ mas01cr@498: if(!(adb->flags & O_RDWR)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: /* 2. check for enough space; */ mas01cr@498: size = sizeof(double) * datum->nvectors * datum->dim; mas01cr@498: if(!audiodb_enough_data_space_free(adb, size)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: if(!audiodb_enough_per_file_space_free(adb)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: /* 3. check that datum->dim and adb->header->dim agree (or that the mas01cr@498: * header dimension is zero, in which case write datum->dim to mas01cr@498: * adb->header->dim). mas01cr@498: */ mas01cr@498: if(adb->header->dim == 0) { mas01cr@498: adb->header->dim = datum->dim; mas01cr@498: } else if (adb->header->dim != datum->dim) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: /* 4. check for presence of datum->key in adb->keymap; */ mas01cr@498: if(adb->keymap->count(datum->key)) { mas01cr@498: /* not part of an explicit API/ABI, but we need a distinguished mas01cr@498: value in this circumstance to preserve somewhat wonky behaviour mas01cr@498: of audioDB::batchinsert. */ mas01cr@498: return 2; mas01cr@498: } mas01cr@509: /* 5. check for consistency between power and ADB_HEADER_FLAG_POWER, mas01cr@509: * and times and ADB_HEADER_FLAG_TIMES; mas01cr@498: */ mas01cr@509: if((datum->power && !(adb->header->flags & ADB_HEADER_FLAG_POWER)) || mas01cr@509: ((adb->header->flags & ADB_HEADER_FLAG_POWER) && !datum->power)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@509: if(datum->times && !(adb->header->flags & ADB_HEADER_FLAG_TIMES)) { mas01cr@498: if(adb->header->numFiles == 0) { mas01cr@509: adb->header->flags |= ADB_HEADER_FLAG_TIMES; mas01cr@498: } else { mas01cr@498: return 1; mas01cr@239: } mas01cr@509: } else if ((adb->header->flags & ADB_HEADER_FLAG_TIMES) && !datum->times) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: /* 6. write in data, power, times as appropriate; add to track mas01cr@498: * and key tables too; mas01cr@498: */ mas01cr@498: offset = adb->header->length; mas01cr@498: nfiles = adb->header->numFiles; mas01cr@498: mas01cr@498: /* FIXME: checking for all these lseek()s */ mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->fileTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE); mas01cr@498: write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1); mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->trackTableOffset + nfiles * ADB_TRACKTABLE_ENTRY_SIZE); mas01cr@509: write_or_goto_error(adb->fd, &datum->nvectors, ADB_TRACKTABLE_ENTRY_SIZE); mas01cr@509: if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@498: char cwd[PATH_MAX]; mas01cr@498: char slash = '/'; mas01cr@498: mas01cr@498: if(!getcwd(cwd, PATH_MAX)) { mas01cr@498: goto error; mas01cr@498: } mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE); mas01cr@498: if(*((char *) datum->data) != '/') { mas01cr@498: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@498: write_or_goto_error(adb->fd, &slash, 1); mas01cr@498: } mas01cr@498: write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1); mas01cr@498: if(datum->power) { mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->powerTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE); mas01cr@498: if(*((char *) datum->power) != '/') { mas01cr@498: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@498: write_or_goto_error(adb->fd, &slash, 1); mas01cr@498: } mas01cr@498: write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1); mas01cr@498: } mas01cr@498: if(datum->times) { mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->timesTableOffset + nfiles * ADB_FILETABLE_ENTRY_SIZE); mas01cr@498: if(*((char *) datum->times) != '/') { mas01cr@498: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@498: write_or_goto_error(adb->fd, &slash, 1); mas01cr@498: } mas01cr@498: write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1); mas01cr@498: } mas01cr@498: } else { mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->dataOffset + offset); mas01cr@498: write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim); mas01cr@498: if(datum->power) { mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->powerTableOffset + offset / datum->dim); mas01cr@498: write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors); mas01cr@498: } mas01cr@498: if(datum->times) { mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2); mas01cr@498: write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2); mas01cr@498: } mas01cr@498: } mas01cr@498: mas01cr@509: /* 7. if ADB_HEADER_FLAG_L2NORM and !ADB_HEADER_FLAG_REFERENCES, mas01cr@509: * compute norms and fill in table; mas01cr@498: */ mas01cr@509: if((adb->header->flags & ADB_HEADER_FLAG_L2NORM) && mas01cr@509: !(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) { mas01cr@498: l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double)); mas01mc@324: mas01cr@498: audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer); mas01cr@596: lseek_set_or_goto_error(adb->fd, adb->header->l2normTableOffset + offset / datum->dim); mas01cr@498: write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors); mas01cr@498: free(l2norm_buffer); mas01cr@498: l2norm_buffer = NULL; mas01cr@498: } mas01cr@498: mas01cr@498: /* 8. update adb->keys, adb->keymap, adb->track_lengths, mas01cr@498: * adb->track_offsets and adb->header; mas01cr@498: */ mas01cr@498: adb->keys->push_back(datum->key); mas01cr@498: (*adb->keymap)[datum->key] = adb->header->numFiles; mas01cr@498: adb->track_lengths->push_back(datum->nvectors); mas01cr@498: adb->track_offsets->push_back(offset); mas01cr@498: adb->header->numFiles += 1; mas01cr@498: adb->header->length += sizeof(double) * datum->nvectors * datum->dim; mas01cr@498: mas01cr@498: /* 9. sync adb->header with disk. */ mas01cr@498: return audiodb_sync_header(adb); mas01cr@498: mas01cr@498: error: mas01cr@596: maybe_free(l2norm_buffer); mas01cr@498: return 1; mas01cr@498: } mas01cr@498: mas01cr@498: int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) { mas01cr@509: if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@498: return 1; mas01cr@498: } else { mas01cr@498: adb_datum_internal_t d; mas01cr@498: d.nvectors = datum->nvectors; mas01cr@498: d.dim = datum->dim; mas01cr@498: d.key = datum->key; mas01cr@498: d.data = datum->data; mas01cr@498: d.times = datum->times; mas01cr@498: d.power = datum->power; mas01cr@498: return audiodb_insert_datum_internal(adb, &d); mas01cr@498: } mas01cr@498: } mas01cr@498: mas01cr@498: int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) { mas01cr@509: if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) { mas01cr@498: return 1; mas01cr@498: } else { mas01cr@498: adb_datum_internal_t d; mas01cr@498: struct stat st; mas01cr@498: int fd; mas01cr@498: off_t size; mas01mc@324: mas01cr@498: if((fd = open(reference->features, O_RDONLY)) == -1) { mas01cr@498: return 1; mas01cr@239: } mas01cr@498: if(fstat(fd, &st)) { mas01cr@498: goto error; mas01cr@239: } mas01cr@498: read_or_goto_error(fd, &(d.dim), sizeof(uint32_t)); mas01cr@498: close(fd); mas01cr@498: fd = 0; mas01cr@498: size = st.st_size - sizeof(uint32_t); mas01cr@498: d.nvectors = size / (sizeof(double) * d.dim); mas01cr@498: d.data = (void *) reference->features; mas01cr@498: if(reference->power) { mas01cr@498: if(stat(reference->power, &st)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: } mas01cr@498: d.power = (void *) reference->power; mas01cr@498: if(reference->times) { mas01cr@498: if(stat(reference->times, &st)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: } mas01cr@498: d.times = (void *) reference->times; mas01cr@498: d.key = reference->key ? reference->key : reference->features; mas01cr@498: return audiodb_insert_datum_internal(adb, &d); mas01cr@498: error: mas01cr@498: if(fd) { mas01cr@498: close(fd); mas01cr@498: } mas01cr@498: return 1; mas01cr@498: } mas01cr@498: } mas01cr@498: mas01cr@580: int audiodb_really_free_datum(adb_datum_t *datum) { mas01cr@498: if(datum->data) { mas01cr@498: free(datum->data); mas01cr@498: datum->data = NULL; mas01cr@498: } mas01cr@498: if(datum->power) { mas01cr@498: free(datum->power); mas01cr@498: datum->power = NULL; mas01cr@498: } mas01cr@498: if(datum->times) { mas01cr@498: free(datum->times); mas01cr@498: datum->times = NULL; mas01cr@498: } mas01cr@498: return 0; mas01cr@498: } mas01cr@498: mas01cr@498: int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) { mas01cr@498: int fd = 0; mas01cr@498: FILE *file = NULL; mas01cr@498: struct stat st; mas01cr@498: off_t size; mas01cr@498: mas01cr@498: datum->data = NULL; mas01cr@498: datum->power = NULL; mas01cr@498: datum->times = NULL; mas01cr@498: if((fd = open(insert->features, O_RDONLY)) == -1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: if(fstat(fd, &st)) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t)); mas01cr@498: size = st.st_size - sizeof(uint32_t); mas01cr@498: datum->nvectors = size / (sizeof(double) * datum->dim); mas01cr@498: datum->data = (double *) malloc(size); mas01cr@498: if(!datum->data) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: read_or_goto_error(fd, datum->data, size); mas01cr@498: close(fd); mas01cr@498: fd = 0; mas01cr@498: if(insert->power) { mas01cr@498: int dim; mas01cr@498: if((fd = open(insert->power, O_RDONLY)) == -1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: if(fstat(fd, &st)) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: /* This cast is so non-trivial that it deserves a comment. mas01cr@498: * mas01cr@498: * The data types in this expression, left to right, are: off_t, mas01cr@498: * size_t, off_t, uint32_t. The rules for conversions in mas01cr@498: * arithmetic expressions with mixtures of integral types are mas01cr@498: * essentially that the widest type wins, with unsigned types mas01cr@498: * winning on a tie-break. mas01cr@498: * mas01cr@498: * Because we are enforcing (through the use of sufficient mas01cr@498: * compiler flags, if necessary) that off_t be a (signed) 64-bit mas01cr@498: * type, the only variability in this set of types is in fact the mas01cr@498: * size_t. On 32-bit machines, size_t is uint32_t and so the mas01cr@498: * coercions on both sides of the equality end up promoting mas01cr@498: * everything to int64_t, which is fine. On 64-bit machines, mas01cr@498: * however, the left hand side is promoted to a uint64_t, while mas01cr@498: * the right hand side remains int64_t. mas01cr@498: * mas01cr@498: * The mixture of signed and unsigned types in comparisons is Evil mas01cr@498: * Bad and Wrong, and gcc complains about it. (It's right to do mas01cr@498: * so, actually). Of course in this case it will never matter mas01cr@498: * because of the particular relationships between all of these mas01cr@498: * numbers, so we just cast the left hand side to off_t, which mas01cr@498: * will do the right thing for us on all platforms. mas01cr@498: * mas01cr@498: * I hate C. mas01cr@594: * mas01cr@594: * Addendum: the above reasoning is skewered on Win32, where off_t mas01cr@594: * is apparently signed 32-bit always (i.e. no large-file mas01cr@594: * support). So now, we cast datum->dim to size_t, so that our mas01cr@594: * types are the same on both sides. I hate C even more. mas01cr@498: */ mas01cr@594: if((st.st_size - sizeof(uint32_t)) != (size / (size_t) datum->dim)) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: read_or_goto_error(fd, &dim, sizeof(uint32_t)); mas01cr@498: if(dim != 1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: datum->power = (double *) malloc(size / datum->dim); mas01cr@498: if(!datum->power) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: read_or_goto_error(fd, datum->power, size / datum->dim); mas01cr@498: close(fd); mas01cr@498: } mas01cr@498: if(insert->times) { mas01cr@498: double t, *tp; mas01cr@498: if(!(file = fopen(insert->times, "r"))) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: datum->times = (double *) malloc(2 * size / datum->dim); mas01cr@498: if(!datum->times) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: if(fscanf(file, " %lf", &t) != 1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: tp = datum->times; mas01cr@498: *tp++ = t; mas01cr@498: for(unsigned int n = 0; n < datum->nvectors - 1; n++) { mas01cr@498: if(fscanf(file, " %lf", &t) != 1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: *tp++ = t; mas01cr@498: *tp++ = t; mas01cr@498: } mas01cr@498: if(fscanf(file, " %lf", &t) != 1) { mas01cr@498: goto error; mas01cr@498: } mas01cr@498: *tp = t; mas01cr@498: fclose(file); mas01cr@498: } mas01cr@498: datum->key = insert->key ? insert->key : insert->features; mas01cr@498: return 0; mas01cr@498: mas01cr@498: error: mas01cr@498: if(fd > 0) { mas01cr@498: close(fd); mas01cr@498: } mas01cr@498: if(file) { mas01cr@498: fclose(file); mas01cr@498: } mas01cr@580: audiodb_really_free_datum(datum); mas01cr@498: return 1; mas01cr@498: } mas01cr@498: mas01cr@498: int audiodb_insert(adb_t *adb, adb_insert_t *insert) { mas01cr@509: if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) { mas01cr@498: adb_reference_t *reference = insert; mas01cr@498: int err; mas01cr@498: err = audiodb_insert_reference(adb, reference); mas01cr@498: mas01cr@498: if(err == 2) { mas01cr@498: return 0; mas01cr@498: } else { mas01cr@498: return err; mas01cr@498: } mas01cr@498: } else { mas01cr@498: adb_datum_t datum; mas01cr@498: int err; mas01cr@498: mas01cr@498: if(audiodb_insert_create_datum(insert, &datum)) { mas01cr@498: return 1; mas01cr@498: } mas01cr@498: err = audiodb_insert_datum(adb, &datum); mas01cr@580: audiodb_really_free_datum(&datum); mas01cr@498: mas01cr@498: if(err == 2) { mas01cr@498: return 0; mas01cr@498: } else { mas01cr@498: return err; mas01cr@239: } mas01cr@239: } mas01cr@239: } mas01cr@239: mas01cr@498: int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) { mas01cr@498: int err; mas01cr@498: for(unsigned int n = 0; n < size; n++) { mas01cr@498: if((err = audiodb_insert(adb, &(insert[n])))) { mas01cr@498: return err; mas01cr@498: } mas01mc@324: } mas01cr@498: return 0; mas01cr@239: }