annotate insert.cpp @ 410:d7e590d58c85 api-inversion

Pavlovian response to compiler warnings... ... attempt to squash them. For now we can get most of the way by writing a simple write_or_goto_error() macro for write(), and the equivalent for read(). One of the warnings, for the return value of chdir(), is silly, because we're already in an error case, and we really can't do anything sensible if the chdir fails. Try to deal with it anyway.
author mas01cr
date Thu, 11 Dec 2008 08:54:01 +0000
parents 99e6cbad7f76
children 223eda8408e1
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@404 2 extern "C" {
mas01cr@404 3 #include "audioDB_API.h"
mas01cr@404 4 }
mas01cr@404 5 #include "audioDB-internals.h"
mas01cr@404 6
mas01cr@404 7 static bool audiodb_enough_data_space_free(adb_t *adb, off_t size) {
mas01cr@404 8 adb_header_t *header = adb->header;
mas01cr@409 9 if(header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@409 10 return true;
mas01cr@409 11 } else {
mas01cr@409 12 /* FIXME: timesTableOffset isn't necessarily the next biggest
mas01cr@409 13 * offset after dataOffset. Maybe make the offsets into an array
mas01cr@409 14 * that we can iterate over... */
mas01cr@409 15 return (header->timesTableOffset >
mas01cr@409 16 (header->dataOffset + header->length + size));
mas01cr@409 17 }
mas01cr@404 18 }
mas01cr@404 19
mas01cr@404 20 static bool audiodb_enough_per_file_space_free(adb_t *adb) {
mas01cr@404 21 /* FIXME: the comment above about the ordering of the tables applies
mas01cr@404 22 here too. */
mas01cr@404 23 adb_header_t *header = adb->header;
mas01cr@404 24 off_t file_table_length = header->trackTableOffset - header->fileTableOffset;
mas01cr@404 25 off_t track_table_length = header->dataOffset - header->trackTableOffset;
mas01cr@404 26 int fmaxfiles = file_table_length / O2_FILETABLE_ENTRY_SIZE;
mas01cr@404 27 int tmaxfiles = track_table_length / O2_TRACKTABLE_ENTRY_SIZE;
mas01cr@404 28 /* maxfiles is the _minimum_ of the two. Do not be confused... */
mas01cr@409 29 int maxfiles = fmaxfiles > tmaxfiles ? tmaxfiles : fmaxfiles;
mas01cr@409 30 if(header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@409 31 /* by default, these tables are created with the same size as the
mas01cr@409 32 * fileTable (which should be called key_table); relying on that
mas01cr@409 33 * always being the case, though, smacks of optimism, so instead
mas01cr@409 34 * we code defensively... */
mas01cr@409 35 off_t data_table_length = header->timesTableOffset - header->dataOffset;
mas01cr@409 36 off_t times_table_length = header->powerTableOffset - header->timesTableOffset;
mas01cr@409 37 off_t power_table_length = header->dbSize - header->powerTableOffset;
mas01cr@409 38 int dmaxfiles = data_table_length / O2_FILETABLE_ENTRY_SIZE;
mas01cr@409 39 int timaxfiles = times_table_length / O2_FILETABLE_ENTRY_SIZE;
mas01cr@409 40 int pmaxfiles = power_table_length / O2_FILETABLE_ENTRY_SIZE;
mas01cr@409 41 /* ... even though it means a certain amount of tedium. */
mas01cr@409 42 maxfiles = maxfiles > dmaxfiles ? dmaxfiles : maxfiles;
mas01cr@409 43 maxfiles = maxfiles > timaxfiles ? timaxfiles : maxfiles;
mas01cr@409 44 maxfiles = maxfiles > pmaxfiles ? pmaxfiles : maxfiles;
mas01cr@409 45 }
mas01cr@409 46 return (header->numFiles < (unsigned int) maxfiles);
mas01cr@404 47 }
mas01cr@404 48
mas01cr@404 49 /*
mas01cr@404 50 * Hey, look, a comment. Normally I wouldn't bother, as the code
mas01cr@404 51 * should be self-documenting, but a lot of logic is concentrated in
mas01cr@404 52 * this one place, so let's give an overview beforehand. To insert a
mas01cr@404 53 * datum into the database, we:
mas01cr@404 54 *
mas01cr@404 55 * 1. check write permission;
mas01cr@409 56 * 2. check for enough space;
mas01cr@409 57 * 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@404 58 * header dimension is zero, in which case write datum->dim to
mas01cr@404 59 * adb->header->dim).
mas01cr@409 60 * 4. check for presence of datum->key in adb->keys;
mas01cr@409 61 * 5. check for consistency between power and O2_FLAG_POWER, and
mas01cr@404 62 * times and O2_FLAG_TIMES;
mas01cr@409 63 * 6. write in data, power, times as appropriate; add to track
mas01cr@404 64 * and key tables too;
mas01cr@409 65 * 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
mas01cr@409 66 * in table;
mas01cr@409 67 * 8. update adb->keys and adb->header;
mas01cr@409 68 * 9. sync adb->header with disk.
mas01cr@404 69 *
mas01cr@409 70 * Step 9 essentially commits the transaction; until we update
mas01cr@408 71 * header->length, nothing will recognize the newly-written data. In
mas01cr@408 72 * principle, if it fails, we should roll back, which we can in fact
mas01cr@409 73 * do on the assumption that nothing in step 8 can ever fail; on the
mas01cr@408 74 * other hand, if it's failed, then it's unlikely that rolling back by
mas01cr@408 75 * syncing the original header back to disk is going to work
mas01cr@408 76 * desperately well. We should perhaps take an operating-system lock
mas01cr@409 77 * around step 9, so that we can't be interrupted part-way through
mas01cr@408 78 * (except of course for SIGKILL, but if we're hit with that we will
mas01cr@408 79 * always lose).
mas01cr@404 80 */
mas01cr@408 81 static int audiodb_insert_datum_internal(adb_t *adb, adb_datum_internal_t *datum) {
mas01cr@404 82
mas01cr@404 83 off_t size, offset, nfiles;
mas01cr@410 84 double *l2norm_buffer = NULL, *lp, *dp;
mas01cr@404 85
mas01cr@404 86 /* 1. check write permission; */
mas01cr@404 87 if(!(adb->flags & O_RDWR)) {
mas01cr@404 88 return 1;
mas01cr@404 89 }
mas01cr@409 90 /* 2. check for enough space; */
mas01cr@404 91 size = sizeof(double) * datum->nvectors * datum->dim;
mas01cr@404 92 if(!audiodb_enough_data_space_free(adb, size)) {
mas01cr@404 93 return 1;
mas01cr@404 94 }
mas01cr@404 95 if(!audiodb_enough_per_file_space_free(adb)) {
mas01cr@404 96 return 1;
mas01cr@404 97 }
mas01cr@409 98 /* 3. check that datum->dim and adb->header->dim agree (or that the
mas01cr@404 99 * header dimension is zero, in which case write datum->dim to
mas01cr@404 100 * adb->header->dim).
mas01cr@404 101 */
mas01cr@404 102 if(adb->header->dim == 0) {
mas01cr@404 103 adb->header->dim = datum->dim;
mas01cr@404 104 } else if (adb->header->dim != datum->dim) {
mas01cr@404 105 return 1;
mas01cr@404 106 }
mas01cr@409 107 /* 4. check for presence of datum->key in adb->keys; */
mas01cr@404 108 if(adb->keys->count(datum->key)) {
mas01cr@404 109 /* not part of an explicit API/ABI, but we need a distinguished
mas01cr@404 110 value in this circumstance to preserve somewhat wonky behaviour
mas01cr@404 111 of audioDB::batchinsert. */
mas01cr@404 112 return 2;
mas01cr@404 113 }
mas01cr@409 114 /* 5. check for consistency between power and O2_FLAG_POWER, and
mas01cr@404 115 * times and O2_FLAG_TIMES;
mas01cr@404 116 */
mas01cr@404 117 if((datum->power && !(adb->header->flags & O2_FLAG_POWER)) ||
mas01cr@404 118 ((adb->header->flags & O2_FLAG_POWER) && !datum->power)) {
mas01cr@404 119 return 1;
mas01cr@404 120 }
mas01cr@404 121 if(datum->times && !(adb->header->flags & O2_FLAG_TIMES)) {
mas01cr@404 122 if(adb->header->numFiles == 0) {
mas01cr@404 123 adb->header->flags |= O2_FLAG_TIMES;
mas01cr@404 124 } else {
mas01cr@404 125 return 1;
mas01cr@404 126 }
mas01cr@404 127 } else if ((adb->header->flags & O2_FLAG_TIMES) && !datum->times) {
mas01cr@404 128 return 1;
mas01cr@404 129 }
mas01cr@409 130 /* 6. write in data, power, times as appropriate; add to track
mas01cr@404 131 * and key tables too;
mas01cr@404 132 */
mas01cr@404 133 offset = adb->header->length;
mas01cr@404 134 nfiles = adb->header->numFiles;
mas01cr@404 135
mas01cr@410 136 /* FIXME: checking for all these lseek()s */
mas01cr@409 137 lseek(adb->fd, adb->header->fileTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@410 138 write_or_goto_error(adb->fd, datum->key, strlen(datum->key)+1);
mas01cr@404 139 lseek(adb->fd, adb->header->trackTableOffset + nfiles * O2_TRACKTABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@410 140 write_or_goto_error(adb->fd, &datum->nvectors, O2_TRACKTABLE_ENTRY_SIZE);
mas01cr@409 141 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@409 142 char cwd[PATH_MAX];
mas01cr@409 143 char slash = '/';
mas01cr@404 144
mas01cr@410 145 if(!getcwd(cwd, PATH_MAX)) {
mas01cr@410 146 goto error;
mas01cr@410 147 }
mas01cr@409 148 lseek(adb->fd, adb->header->dataOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@409 149 if(*((char *) datum->data) != '/') {
mas01cr@410 150 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@410 151 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@409 152 }
mas01cr@410 153 write_or_goto_error(adb->fd, datum->data, strlen((const char *) datum->data)+1);
mas01cr@409 154 if(datum->power) {
mas01cr@409 155 lseek(adb->fd, adb->header->powerTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@409 156 if(*((char *) datum->power) != '/') {
mas01cr@410 157 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@410 158 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@409 159 }
mas01cr@410 160 write_or_goto_error(adb->fd, datum->power, strlen((const char *) datum->power)+1);
mas01cr@409 161 }
mas01cr@409 162 if(datum->times) {
mas01cr@409 163 lseek(adb->fd, adb->header->timesTableOffset + nfiles * O2_FILETABLE_ENTRY_SIZE, SEEK_SET);
mas01cr@409 164 if(*((char *) datum->times) != '/') {
mas01cr@410 165 write_or_goto_error(adb->fd, cwd, strlen(cwd));
mas01cr@410 166 write_or_goto_error(adb->fd, &slash, 1);
mas01cr@409 167 }
mas01cr@410 168 write_or_goto_error(adb->fd, datum->times, strlen((const char *) datum->times)+1);
mas01cr@409 169 }
mas01cr@409 170 } else {
mas01cr@409 171 lseek(adb->fd, adb->header->dataOffset + offset, SEEK_SET);
mas01cr@410 172 write_or_goto_error(adb->fd, datum->data, sizeof(double) * datum->nvectors * datum->dim);
mas01cr@409 173 if(datum->power) {
mas01cr@409 174 lseek(adb->fd, adb->header->powerTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@410 175 write_or_goto_error(adb->fd, datum->power, sizeof(double) * datum->nvectors);
mas01cr@409 176 }
mas01cr@409 177 if(datum->times) {
mas01cr@409 178 lseek(adb->fd, adb->header->timesTableOffset + offset / datum->dim * 2, SEEK_SET);
mas01cr@410 179 write_or_goto_error(adb->fd, datum->times, sizeof(double) * datum->nvectors * 2);
mas01cr@409 180 }
mas01cr@409 181 }
mas01cr@409 182
mas01cr@409 183 /* 7. if O2_FLAG_L2NORM and !O2_FLAG_LARGE_ADB, compute norms and fill
mas01cr@409 184 * in table;
mas01cr@409 185 */
mas01cr@409 186 if((adb->header->flags & O2_FLAG_L2NORM) &&
mas01cr@409 187 !(adb->header->flags & O2_FLAG_LARGE_ADB)) {
mas01cr@408 188 l2norm_buffer = (double *) malloc(datum->nvectors * sizeof(double));
mas01cr@408 189
mas01cr@408 190 /* FIXME: shared code with audiodb_norm_existing() */
mas01cr@408 191 dp = (double *) datum->data;
mas01cr@408 192 lp = l2norm_buffer;
mas01cr@408 193 for(size_t i = 0; i < datum->nvectors; i++) {
mas01cr@408 194 *lp = 0;
mas01cr@408 195 for(unsigned int k = 0; k < datum->dim; k++) {
mas01cr@408 196 *lp += (*dp)*(*dp);
mas01cr@408 197 dp++;
mas01cr@408 198 }
mas01cr@408 199 lp++;
mas01cr@404 200 }
mas01cr@408 201 lseek(adb->fd, adb->header->l2normTableOffset + offset / datum->dim, SEEK_SET);
mas01cr@410 202 write_or_goto_error(adb->fd, l2norm_buffer, sizeof(double) * datum->nvectors);
mas01cr@408 203 free(l2norm_buffer);
mas01cr@410 204 l2norm_buffer = NULL;
mas01cr@404 205 }
mas01cr@404 206
mas01cr@409 207 /* 8. update adb->keys and adb->header; */
mas01cr@404 208 adb->keys->insert(datum->key);
mas01cr@404 209 adb->header->numFiles += 1;
mas01cr@404 210 adb->header->length += sizeof(double) * datum->nvectors * datum->dim;
mas01cr@404 211
mas01cr@409 212 /* 9. sync adb->header with disk. */
mas01cr@404 213 return audiodb_sync_header(adb);
mas01cr@404 214
mas01cr@404 215 error:
mas01cr@410 216 if(l2norm_buffer) {
mas01cr@410 217 free(l2norm_buffer);
mas01cr@410 218 }
mas01cr@404 219 return 1;
mas01cr@404 220 }
mas01cr@239 221
mas01cr@408 222 int audiodb_insert_datum(adb_t *adb, adb_datum_t *datum) {
mas01cr@408 223 adb_datum_internal_t d;
mas01cr@408 224 d.nvectors = datum->nvectors;
mas01cr@408 225 d.dim = datum->dim;
mas01cr@408 226 d.key = datum->key;
mas01cr@408 227 d.data = datum->data;
mas01cr@408 228 d.times = datum->times;
mas01cr@408 229 d.power = datum->power;
mas01cr@408 230 return audiodb_insert_datum_internal(adb, &d);
mas01cr@408 231 }
mas01cr@408 232
mas01cr@406 233 static int audiodb_free_datum(adb_datum_t *datum) {
mas01cr@406 234 if(datum->data) {
mas01cr@406 235 free(datum->data);
mas01cr@406 236 }
mas01cr@406 237 if(datum->power) {
mas01cr@406 238 free(datum->power);
mas01cr@406 239 }
mas01cr@406 240 if(datum->times) {
mas01cr@406 241 free(datum->times);
mas01cr@406 242 }
mas01cr@406 243 return 0;
mas01cr@406 244 }
mas01cr@406 245
mas01cr@406 246 static int audiodb_insert_create_datum(adb_insert_t *insert, adb_datum_t *datum) {
mas01cr@405 247 int fd = 0;
mas01cr@405 248 FILE *file = NULL;
mas01cr@405 249 struct stat st;
mas01cr@404 250 off_t size;
mas01cr@405 251
mas01cr@406 252 datum->data = NULL;
mas01cr@406 253 datum->power = NULL;
mas01cr@406 254 datum->times = NULL;
mas01cr@405 255 if((fd = open(insert->features, O_RDONLY)) == -1) {
mas01cr@405 256 goto error;
mas01cr@370 257 }
mas01cr@405 258 if(fstat(fd, &st)) {
mas01cr@405 259 goto error;
mas01cr@404 260 }
mas01cr@410 261 read_or_goto_error(fd, &(datum->dim), sizeof(uint32_t));
mas01cr@405 262 size = st.st_size - sizeof(uint32_t);
mas01cr@406 263 datum->nvectors = size / (sizeof(double) * datum->dim);
mas01cr@406 264 datum->data = (double *) malloc(size);
mas01cr@406 265 if(!datum->data) {
mas01cr@405 266 goto error;
mas01cr@404 267 }
mas01cr@410 268 read_or_goto_error(fd, datum->data, size);
mas01cr@404 269 close(fd);
mas01cr@405 270 fd = 0;
mas01cr@405 271 if(insert->power) {
mas01cr@405 272 int dim;
mas01cr@405 273 if((fd = open(insert->power, O_RDONLY)) == -1) {
mas01cr@405 274 goto error;
mas01cr@405 275 }
mas01cr@405 276 if(fstat(fd, &st)) {
mas01cr@405 277 goto error;
mas01cr@405 278 }
mas01cr@406 279 if((st.st_size - sizeof(uint32_t)) != (size / datum->dim)) {
mas01cr@405 280 goto error;
mas01cr@405 281 }
mas01cr@410 282 read_or_goto_error(fd, &dim, sizeof(uint32_t));
mas01cr@405 283 if(dim != 1) {
mas01cr@405 284 goto error;
mas01cr@405 285 }
mas01cr@406 286 datum->power = (double *) malloc(size / datum->dim);
mas01cr@406 287 if(!datum->power) {
mas01cr@405 288 goto error;
mas01cr@405 289 }
mas01cr@410 290 read_or_goto_error(fd, datum->power, size / datum->dim);
mas01cr@405 291 close(fd);
mas01cr@405 292 }
mas01cr@405 293 if(insert->times) {
mas01cr@405 294 double t, *tp;
mas01cr@405 295 if(!(file = fopen(insert->times, "r"))) {
mas01cr@405 296 goto error;
mas01cr@405 297 }
mas01cr@406 298 datum->times = (double *) malloc(2 * size / datum->dim);
mas01cr@406 299 if(!datum->times) {
mas01cr@405 300 goto error;
mas01cr@404 301 }
mas01cr@405 302 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@405 303 goto error;
mas01cr@405 304 }
mas01cr@406 305 tp = datum->times;
mas01cr@405 306 *tp++ = t;
mas01cr@406 307 for(unsigned int n = 0; n < datum->nvectors - 1; n++) {
mas01cr@405 308 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@405 309 goto error;
mas01cr@405 310 }
mas01cr@405 311 *tp++ = t;
mas01cr@405 312 *tp++ = t;
mas01cr@405 313 }
mas01cr@405 314 if(fscanf(file, " %lf", &t) != 1) {
mas01cr@405 315 goto error;
mas01cr@405 316 }
mas01cr@405 317 *tp = t;
mas01cr@405 318 fclose(file);
mas01cr@404 319 }
mas01cr@406 320 datum->key = insert->key ? insert->key : insert->features;
mas01cr@406 321 return 0;
mas01cr@405 322
mas01cr@405 323 error:
mas01cr@405 324 if(fd > 0) {
mas01cr@405 325 close(fd);
mas01cr@405 326 }
mas01cr@405 327 if(file) {
mas01cr@405 328 fclose(file);
mas01cr@405 329 }
mas01cr@406 330 audiodb_free_datum(datum);
mas01cr@406 331 return 1;
mas01cr@406 332 }
mas01cr@406 333
mas01cr@406 334 int audiodb_insert(adb_t *adb, adb_insert_t *insert) {
mas01cr@406 335 if(adb->header->flags & O2_FLAG_LARGE_ADB) {
mas01cr@409 336 adb_datum_internal_t d;
mas01cr@409 337 struct stat st;
mas01cr@409 338 int fd;
mas01cr@409 339 int err;
mas01cr@409 340 off_t size;
mas01cr@409 341
mas01cr@409 342 if((fd = open(insert->features, O_RDONLY)) == -1) {
mas01cr@409 343 return 1;
mas01cr@409 344 }
mas01cr@409 345 if(fstat(fd, &st)) {
mas01cr@410 346 goto error;
mas01cr@409 347 }
mas01cr@410 348 read_or_goto_error(fd, &(d.dim), sizeof(uint32_t));
mas01cr@409 349 close(fd);
mas01cr@410 350 fd = 0;
mas01cr@409 351 size = st.st_size - sizeof(uint32_t);
mas01cr@409 352 d.nvectors = size / (sizeof(double) * d.dim);
mas01cr@409 353 d.data = (void *) insert->features;
mas01cr@409 354 if(insert->power) {
mas01cr@409 355 if(stat(insert->power, &st)) {
mas01cr@409 356 return 1;
mas01cr@409 357 }
mas01cr@409 358 }
mas01cr@409 359 d.power = (void *) insert->power;
mas01cr@409 360 if(insert->times) {
mas01cr@409 361 if(stat(insert->times, &st)) {
mas01cr@409 362 return 1;
mas01cr@409 363 }
mas01cr@409 364 }
mas01cr@409 365 d.times = (void *) insert->times;
mas01cr@409 366 d.key = insert->key ? insert->key : insert->features;
mas01cr@409 367 err = audiodb_insert_datum_internal(adb, &d);
mas01cr@409 368
mas01cr@409 369 if(err == 2) {
mas01cr@409 370 return 0;
mas01cr@409 371 } else {
mas01cr@409 372 return err;
mas01cr@409 373 }
mas01cr@410 374 error:
mas01cr@410 375 if(fd) {
mas01cr@410 376 close(fd);
mas01cr@410 377 }
mas01cr@410 378 return 1;
mas01cr@406 379 } else {
mas01cr@406 380 adb_datum_t datum;
mas01cr@406 381 int err;
mas01cr@406 382
mas01cr@406 383 if(audiodb_insert_create_datum(insert, &datum)) {
mas01cr@406 384 return 1;
mas01cr@406 385 }
mas01cr@406 386 err = audiodb_insert_datum(adb, &datum);
mas01cr@406 387 audiodb_free_datum(&datum);
mas01cr@406 388
mas01cr@406 389 if(err == 2) {
mas01cr@406 390 return 0;
mas01cr@409 391 } else {
mas01cr@406 392 return err;
mas01cr@406 393 }
mas01cr@405 394 }
mas01cr@405 395 }
mas01cr@405 396
mas01cr@405 397 int audiodb_batchinsert(adb_t *adb, adb_insert_t *insert, unsigned int size) {
mas01cr@405 398 int err;
mas01cr@405 399 for(unsigned int n = 0; n < size; n++) {
mas01cr@405 400 if((err = audiodb_insert(adb, &(insert[n])))) {
mas01cr@405 401 return err;
mas01cr@404 402 }
mas01cr@404 403 }
mas01cr@405 404 return 0;
mas01cr@239 405 }