mas01cr@239: #include "audioDB.h" mas01cr@404: extern "C" { mas01cr@404: #include "audioDB_API.h" mas01cr@404: } mas01cr@404: #include "audioDB-internals.h" mas01cr@404: mas01cr@404: static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) { mas01cr@404: adb_header_t *header = adb->header; mas01cr@409: if(header->flags & O2_FLAG_LARGE_ADB) { mas01cr@409: return true; mas01cr@409: } else { mas01cr@409: /* FIXME: timesTableOffset isn't necessarily the next biggest mas01cr@409: * offset after dataOffset. Maybe make the offsets into an array mas01cr@409: * that we can iterate over... */ mas01cr@409: return (header->timesTableOffset > mas01cr@409: (header->dataOffset + header->length + size)); mas01cr@409: } mas01cr@404: } mas01cr@404: mas01cr@404: static bool audiodb_enough_per_file_space_free(adb_t *adb) { mas01cr@404: /* FIXME: the comment above about the ordering of the tables applies mas01cr@404: here too. */ mas01cr@404: adb_header_t *header = adb->header; mas01cr@404: off_t file_table_length = header->trackTableOffset - header->fileTableOffset; mas01cr@404: off_t track_table_length = header->dataOffset - header->trackTableOffset; mas01cr@404: int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE; mas01cr@404: int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE; mas01cr@404: /* maxfiles is the _minimum_ of the two. Do not be confused... */ mas01cr@409: int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles; mas01cr@409: if(header->flags & O2_FLAG_LARGE_ADB) { mas01cr@409: /* by default, these tables are created with the same size as the mas01cr@409: * fileTable (which should be called key_table); relying on that mas01cr@409: * always being the case, though, smacks of optimism, so instead mas01cr@409: * we code defensively... */ mas01cr@409: off_t data_table_length = header->timesTableOffset - header->dataOffset; mas01cr@409: off_t times_table_length = header->powerTableOffset - header->timesTableOffset; mas01cr@409: off_t power_table_length = header->dbSize - header->powerTableOffset; mas01cr@409: int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE; mas01cr@409: int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE; mas01cr@409: int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE; mas01cr@409: /* ... even though it means a certain amount of tedium. */ mas01cr@409: maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles; mas01cr@409: maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles; mas01cr@409: maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles; mas01cr@409: } mas01cr@409: return (header->numFiles < (unsigned int) maxfiles); mas01cr@404: } mas01cr@404: mas01cr@404: /* mas01cr@404: * Hey, look, a comment. Normally I wouldn't bother, as the code mas01cr@404: * should be self-documenting, but a lot of logic is concentrated in mas01cr@404: * this one place, so let's give an overview beforehand. To insert a mas01cr@404: * datum into the database, we: mas01cr@404: * mas01cr@404: * 1. check write permission; mas01cr@409: * 2. check for enough space; mas01cr@409: * 3. check that datum->dim and adb->header->dim agree (or that the mas01cr@404: * header dimension is zero, in which case write datum->dim to mas01cr@404: * adb->header->dim). mas01cr@453: * 4. check for presence of datum->key in adb->keymap; mas01cr@409: * 5. check for consistency between power and O2_FLAG_POWER, and mas01cr@404: * times and O2_FLAG_TIMES; mas01cr@409: * 6. write in data, power, times as appropriate; add to track mas01cr@404: * and key tables too; mas01cr@409: * 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill mas01cr@409: * in table; mas01cr@453: * 8. update adb->keys, adb->keymap, adb->track_lengths, mas01cr@453: * adb->track_offsets and adb->header; mas01cr@409: * 9. sync adb->header with disk. mas01cr@404: * mas01cr@409: * Step 9 essentially commits the transaction; until we update mas01cr@408: * header->length, nothing will recognize the newly-written data. In mas01cr@408: * principle, if it fails, we should roll back, which we can in fact mas01cr@409: * do on the assumption that nothing in step 8 can ever fail; on the mas01cr@408: * other hand, if it's failed, then it's unlikely that rolling back by mas01cr@408: * syncing the original header back to disk is going to work mas01cr@408: * desperately well. We should perhaps take an operating-system lock mas01cr@409: * around step 9, so that we can't be interrupted part-way through mas01cr@408: * (except of course for SIGKILL, but if we're hit with that we will mas01cr@408: * always lose). mas01cr@404: */ mas01cr@408: static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) { mas01cr@404: mas01cr@404: off_t size, offset, nfiles; mas01cr@426: double *l2norm_buffer = NULL; mas01cr@404: mas01cr@404: /* 1. check write permission; */ mas01cr@404: if(!(adb->flags & O_RDWR)) { mas01cr@404: return 1; mas01cr@404: } mas01cr@409: /* 2. check for enough space; */ mas01cr@404: size = sizeof(double) * datum->nvectors * datum->dim; mas01cr@404: if(!audiodb_enough_data_space_free(adb, size)) { mas01cr@404: return 1; mas01cr@404: } mas01cr@404: if(!audiodb_enough_per_file_space_free(adb)) { mas01cr@404: return 1; mas01cr@404: } mas01cr@409: /* 3. check that datum->dim and adb->header->dim agree (or that the mas01cr@404: * header dimension is zero, in which case write datum->dim to mas01cr@404: * adb->header->dim). mas01cr@404: */ mas01cr@404: if(adb->header->dim == 0) { mas01cr@404: adb->header->dim = datum->dim; mas01cr@404: } else if (adb->header->dim != datum->dim) { mas01cr@404: return 1; mas01cr@404: } mas01cr@453: /* 4. check for presence of datum->key in adb->keymap; */ mas01cr@453: if(adb->keymap->count(datum->key)) { mas01cr@404: /* not part of an explicit API/ABI, but we need a distinguished mas01cr@404: value in this circumstance to preserve somewhat wonky behaviour mas01cr@404: of audioDB::batchinsert. */ mas01cr@404: return 2; mas01cr@404: } mas01cr@409: /* 5. check for consistency between power and O2_FLAG_POWER, and mas01cr@404: * times and O2_FLAG_TIMES; mas01cr@404: */ mas01cr@404: if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) || mas01cr@404: ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) { mas01cr@404: return 1; mas01cr@404: } mas01cr@404: if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) { mas01cr@404: if(adb->header->numFiles == 0) { mas01cr@404: adb->header->flags |= O2_FLAG_TIMES; mas01cr@404: } else { mas01cr@404: return 1; mas01cr@404: } mas01cr@404: } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) { mas01cr@404: return 1; mas01cr@404: } mas01cr@409: /* 6. write in data, power, times as appropriate; add to track mas01cr@404: * and key tables too; mas01cr@404: */ mas01cr@404: offset = adb->header->length; mas01cr@404: nfiles = adb->header->numFiles; mas01cr@404: mas01cr@410: /* FIXME: checking for all these lseek()s */ mas01cr@409: lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1); mas01cr@404: lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE); mas01cr@409: if(adb->header->flags & O2_FLAG_LARGE_ADB) { mas01cr@409: char cwd[PATH_MAX]; mas01cr@409: char slash = '/'; mas01cr@404: mas01cr@410: if(!getcwd(cwd, PATH_MAX)) { mas01cr@410: goto error; mas01cr@410: } mas01cr@409: lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); mas01cr@409: if(*((char *) datum->data) != '/') { mas01cr@410: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@410: write_or_goto_error(adb->fd, &slash, 1); mas01cr@409: } mas01cr@410: write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1); mas01cr@409: if(datum->power) { mas01cr@409: lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); mas01cr@409: if(*((char *) datum->power) != '/') { mas01cr@410: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@410: write_or_goto_error(adb->fd, &slash, 1); mas01cr@409: } mas01cr@410: write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1); mas01cr@409: } mas01cr@409: if(datum->times) { mas01cr@409: lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET); mas01cr@409: if(*((char *) datum->times) != '/') { mas01cr@410: write_or_goto_error(adb->fd, cwd, strlen(cwd)); mas01cr@410: write_or_goto_error(adb->fd, &slash, 1); mas01cr@409: } mas01cr@410: write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1); mas01cr@409: } mas01cr@409: } else { mas01cr@409: lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim); mas01cr@409: if(datum->power) { mas01cr@409: lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors); mas01cr@409: } mas01cr@409: if(datum->times) { mas01cr@409: lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2); mas01cr@409: } mas01cr@409: } mas01cr@409: mas01cr@409: /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill mas01cr@409: * in table; mas01cr@409: */ mas01cr@409: if((adb->header->flags & O2_FLAG_L2NORM) && mas01cr@409: !(adb->header->flags & O2_FLAG_LARGE_ADB)) { mas01cr@408: l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double)); mas01cr@408: mas01cr@426: audiodb_l2norm_buffer((double *) datum->data, datum->dim, datum->nvectors, l2norm_buffer); mas01cr@408: lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET); mas01cr@410: write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors); mas01cr@408: free(l2norm_buffer); mas01cr@410: l2norm_buffer = NULL; mas01cr@404: } mas01cr@404: mas01cr@453: /* 8. update adb->keys, adb->keymap, adb->track_lengths, mas01cr@453: * adb->track_offsets and adb->header; mas01cr@442: */ mas01cr@453: adb->keys->push_back(datum->key); mas01cr@453: (*adb->keymap)[datum->key] = adb->header->numFiles; mas01cr@432: adb->track_lengths->push_back(datum->nvectors); mas01cr@453: adb->track_offsets->push_back(offset); mas01cr@404: adb->header->numFiles += 1; mas01cr@404: adb->header->length += sizeof(double) * datum->nvectors * datum->dim; mas01cr@404: mas01cr@409: /* 9. sync adb->header with disk. */ mas01cr@404: return audiodb_sync_header(adb); mas01cr@404: mas01cr@404: error: mas01cr@410: if(l2norm_buffer) { mas01cr@410: free(l2norm_buffer); mas01cr@410: } mas01cr@404: return 1; mas01cr@404: } mas01cr@239: mas01cr@473: int audiodb_insert_datum(adb_t *adb, const adb_datum_t *datum) { mas01cr@440: if(adb->header->flags & O2_FLAG_LARGE_ADB) { mas01cr@440: return 1; mas01cr@440: } else { mas01cr@440: adb_datum_internal_t d; mas01cr@440: d.nvectors = datum->nvectors; mas01cr@440: d.dim = datum->dim; mas01cr@440: d.key = datum->key; mas01cr@440: d.data = datum->data; mas01cr@440: d.times = datum->times; mas01cr@440: d.power = datum->power; mas01cr@440: return audiodb_insert_datum_internal(adb, &d); mas01cr@440: } mas01cr@408: } mas01cr@408: mas01cr@473: int audiodb_insert_reference(adb_t *adb, const adb_reference_t *reference) { mas01cr@441: if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) { mas01cr@441: return 1; mas01cr@441: } else { mas01cr@441: adb_datum_internal_t d; mas01cr@441: struct stat st; mas01cr@441: int fd; mas01cr@441: off_t size; mas01cr@441: mas01cr@441: if((fd = open(reference->features, O_RDONLY)) == -1) { mas01cr@441: return 1; mas01cr@441: } mas01cr@441: if(fstat(fd, &st)) { mas01cr@441: goto error; mas01cr@441: } mas01cr@441: read_or_goto_error(fd, &(d.dim), sizeof(uint32_t)); mas01cr@441: close(fd); mas01cr@441: fd = 0; mas01cr@441: size = st.st_size - sizeof(uint32_t); mas01cr@441: d.nvectors = size / (sizeof(double) * d.dim); mas01cr@441: d.data = (void *) reference->features; mas01cr@441: if(reference->power) { mas01cr@441: if(stat(reference->power, &st)) { mas01cr@441: return 1; mas01cr@441: } mas01cr@441: } mas01cr@441: d.power = (void *) reference->power; mas01cr@441: if(reference->times) { mas01cr@441: if(stat(reference->times, &st)) { mas01cr@441: return 1; mas01cr@441: } mas01cr@441: } mas01cr@441: d.times = (void *) reference->times; mas01cr@441: d.key = reference->key ? reference->key : reference->features; mas01cr@441: return audiodb_insert_datum_internal(adb, &d); mas01cr@441: error: mas01cr@441: if(fd) { mas01cr@441: close(fd); mas01cr@441: } mas01cr@441: return 1; mas01cr@441: } mas01cr@441: } mas01cr@441: mas01cr@443: int audiodb_free_datum(adb_datum_t *datum) { mas01cr@406: if(datum->data) { mas01cr@406: free(datum->data); mas01cr@472: datum->data = NULL; mas01cr@406: } mas01cr@406: if(datum->power) { mas01cr@406: free(datum->power); mas01cr@472: datum->power = NULL; mas01cr@406: } mas01cr@406: if(datum->times) { mas01cr@406: free(datum->times); mas01cr@472: datum->times = NULL; mas01cr@406: } mas01cr@406: return 0; mas01cr@406: } mas01cr@406: mas01cr@443: int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) { mas01cr@405: int fd = 0; mas01cr@405: FILE *file = NULL; mas01cr@405: struct stat st; mas01cr@404: off_t size; mas01cr@405: mas01cr@406: datum->data = NULL; mas01cr@406: datum->power = NULL; mas01cr@406: datum->times = NULL; mas01cr@405: if((fd = open(insert->features, O_RDONLY)) == -1) { mas01cr@405: goto error; mas01cr@370: } mas01cr@405: if(fstat(fd, &st)) { mas01cr@405: goto error; mas01cr@404: } mas01cr@410: read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t)); mas01cr@405: size = st.st_size - sizeof(uint32_t); mas01cr@406: datum->nvectors = size / (sizeof(double) * datum->dim); mas01cr@406: datum->data = (double *) malloc(size); mas01cr@406: if(!datum->data) { mas01cr@405: goto error; mas01cr@404: } mas01cr@410: read_or_goto_error(fd, datum->data, size); mas01cr@404: close(fd); mas01cr@405: fd = 0; mas01cr@405: if(insert->power) { mas01cr@405: int dim; mas01cr@405: if((fd = open(insert->power, O_RDONLY)) == -1) { mas01cr@405: goto error; mas01cr@405: } mas01cr@405: if(fstat(fd, &st)) { mas01cr@405: goto error; mas01cr@405: } mas01cr@412: /* This cast is so non-trivial that it deserves a comment. mas01cr@412: * mas01cr@412: * The data types in this expression, left to right, are: off_t, mas01cr@412: * size_t, off_t, uint32_t. The rules for conversions in mas01cr@412: * arithmetic expressions with mixtures of integral types are mas01cr@412: * essentially that the widest type wins, with unsigned types mas01cr@412: * winning on a tie-break. mas01cr@412: * mas01cr@412: * Because we are enforcing (through the use of sufficient mas01cr@412: * compiler flags, if necessary) that off_t be a (signed) 64-bit mas01cr@412: * type, the only variability in this set of types is in fact the mas01cr@412: * size_t. On 32-bit machines, size_t is uint32_t and so the mas01cr@412: * coercions on both sides of the equality end up promoting mas01cr@412: * everything to int64_t, which is fine. On 64-bit machines, mas01cr@412: * however, the left hand side is promoted to a uint64_t, while mas01cr@412: * the right hand side remains int64_t. mas01cr@412: * mas01cr@412: * The mixture of signed and unsigned types in comparisons is Evil mas01cr@412: * Bad and Wrong, and gcc complains about it. (It's right to do mas01cr@412: * so, actually). Of course in this case it will never matter mas01cr@412: * because of the particular relationships between all of these mas01cr@412: * numbers, so we just cast the left hand side to off_t, which mas01cr@412: * will do the right thing for us on all platforms. mas01cr@412: * mas01cr@412: * I hate C. mas01cr@412: */ mas01cr@412: if(((off_t) (st.st_size - sizeof(uint32_t))) != (size / datum->dim)) { mas01cr@405: goto error; mas01cr@405: } mas01cr@410: read_or_goto_error(fd, &dim, sizeof(uint32_t)); mas01cr@405: if(dim != 1) { mas01cr@405: goto error; mas01cr@405: } mas01cr@406: datum->power = (double *) malloc(size / datum->dim); mas01cr@406: if(!datum->power) { mas01cr@405: goto error; mas01cr@405: } mas01cr@410: read_or_goto_error(fd, datum->power, size / datum->dim); mas01cr@405: close(fd); mas01cr@405: } mas01cr@405: if(insert->times) { mas01cr@405: double t, *tp; mas01cr@405: if(!(file = fopen(insert->times, "r"))) { mas01cr@405: goto error; mas01cr@405: } mas01cr@406: datum->times = (double *) malloc(2 * size / datum->dim); mas01cr@406: if(!datum->times) { mas01cr@405: goto error; mas01cr@404: } mas01cr@405: if(fscanf(file, " %lf", &t) != 1) { mas01cr@405: goto error; mas01cr@405: } mas01cr@406: tp = datum->times; mas01cr@405: *tp++ = t; mas01cr@406: for(unsigned int n = 0; n < datum->nvectors - 1; n++) { mas01cr@405: if(fscanf(file, " %lf", &t) != 1) { mas01cr@405: goto error; mas01cr@405: } mas01cr@405: *tp++ = t; mas01cr@405: *tp++ = t; mas01cr@405: } mas01cr@405: if(fscanf(file, " %lf", &t) != 1) { mas01cr@405: goto error; mas01cr@405: } mas01cr@405: *tp = t; mas01cr@405: fclose(file); mas01cr@404: } mas01cr@406: datum->key = insert->key ? insert->key : insert->features; mas01cr@406: return 0; mas01cr@405: mas01cr@405: error: mas01cr@405: if(fd > 0) { mas01cr@405: close(fd); mas01cr@405: } mas01cr@405: if(file) { mas01cr@405: fclose(file); mas01cr@405: } mas01cr@406: audiodb_free_datum(datum); mas01cr@406: return 1; mas01cr@406: } mas01cr@406: mas01cr@406: int audiodb_insert(adb_t *adb, adb_insert_t *insert) { mas01cr@406: if(adb->header->flags & O2_FLAG_LARGE_ADB) { mas01cr@441: adb_reference_t *reference = insert; mas01cr@409: int err; mas01cr@441: err = audiodb_insert_reference(adb, reference); mas01cr@409: mas01cr@409: if(err == 2) { mas01cr@409: return 0; mas01cr@409: } else { mas01cr@409: return err; mas01cr@409: } mas01cr@406: } else { mas01cr@406: adb_datum_t datum; mas01cr@406: int err; mas01cr@406: mas01cr@406: if(audiodb_insert_create_datum(insert, &datum)) { mas01cr@406: return 1; mas01cr@406: } mas01cr@406: err = audiodb_insert_datum(adb, &datum); mas01cr@406: audiodb_free_datum(&datum); mas01cr@406: mas01cr@406: if(err == 2) { mas01cr@406: return 0; mas01cr@409: } else { mas01cr@406: return err; mas01cr@406: } mas01cr@405: } mas01cr@405: } mas01cr@405: mas01cr@405: int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) { mas01cr@405: int err; mas01cr@405: for(unsigned int n = 0; n < size; n++) { mas01cr@405: if((err = audiodb_insert(adb, &(insert[n])))) { mas01cr@405: return err; mas01cr@404: } mas01cr@404: } mas01cr@405: return 0; mas01cr@239: }