Mercurial > hg > audiodb
view dump.cpp @ 400:8c7453fb5bd9 api-inversion
Invert audioDB::power_flag / audiodb_power()
Here the exciting discovery is that the mmap(), memcpy(), munmap()
sequence is in fact not safe. In principle an msync() call should be
inserted before unmapping for in-core changes to mmap()ed files to be
flushed to disk.
In this case we work around the problem entirely, by not mmap()ing
anything and doing everything with file descriptors. Amusingly, that's
probably not desperately safe either, this time because we have to move
the file descriptor position (which is also a shared resource). dup()
doesn't save us, as the duplicate file descriptor shares a file
position.
This applies also to the filling of data_buffer in the query loop, and
in fact basically any call to lseek(), which is why I'm not fixing it
now. Solution: if you have multiple threads all acting at once on a
single database, do one audiodb_open() per thread, for now at least.
author | mas01cr |
---|---|
date | Thu, 27 Nov 2008 16:22:52 +0000 |
parents | a65b31660804 |
children | a8a5f2ca5380 |
line wrap: on
line source
#include "audioDB.h" extern "C" { #include "audioDB_API.h" } int audiodb_dump(adb_t *adb, const char *output) { char *fileTable = 0; /* key_table */ unsigned *trackTable = 0; /* track_size_table */ double *timesTable = 0; /* timestamps_table */ double *powerTable = 0; /* power_table */ size_t fileTableLength = 0; size_t trackTableLength = 0; size_t timesTableLength = 0; size_t powerTableLength = 0; char *featureFileNameTable = 0; char *powerFileNameTable = 0; char *timesFileNameTable = 0; char cwd[PATH_MAX]; int directory_changed = 0; int fLfd = 0, tLfd = 0, pLfd = 0, kLfd = 0; FILE *fLFile = 0, *tLFile = 0, *pLFile = 0, *kLFile = 0; int times, power; char fName[256]; int ffd, pfd; FILE *tFile; unsigned pos = 0; double *data_buffer; size_t data_buffer_size; FILE *scriptFile = 0; unsigned nfiles = adb->header->numFiles; if(adb->header->length > 0) { fileTableLength = ALIGN_PAGE_UP(nfiles * O2_FILETABLE_ENTRY_SIZE); trackTableLength = ALIGN_PAGE_UP(nfiles * O2_TRACKTABLE_ENTRY_SIZE); if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) { off_t length = adb->header->length; unsigned dim = adb->header->dim; timesTableLength = ALIGN_PAGE_UP(2*length/dim); powerTableLength = ALIGN_PAGE_UP(length/dim); } mmap_or_goto_error(char *, fileTable, adb->header->fileTableOffset, fileTableLength); mmap_or_goto_error(unsigned *, trackTable, adb->header->trackTableOffset, trackTableLength); if (adb->header->flags & O2_FLAG_LARGE_ADB) { mmap_or_goto_error(char *, featureFileNameTable, adb->header->dataOffset, fileTableLength); mmap_or_goto_error(char *, powerFileNameTable, adb->header->powerTableOffset, fileTableLength); mmap_or_goto_error(char *, timesFileNameTable, adb->header->timesTableOffset, fileTableLength); } else { mmap_or_goto_error(double *, powerTable, adb->header->powerTableOffset, powerTableLength); mmap_or_goto_error(double *, timesTable, adb->header->timesTableOffset, timesTableLength); } } if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { goto error; } if ((getcwd(cwd, PATH_MAX)) == 0) { goto error; } /* FIXME: Hrm. How does chdir(2) interact with threads? Does each * thread have its own working directory? */ if((chdir(output)) < 0) { goto error; } directory_changed = 1; if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } times = adb->header->flags & O2_FLAG_TIMES; if (times) { if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } } power = adb->header->flags & O2_FLAG_POWER; if (power) { if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } } if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } /* can these fail? I sincerely hope not. */ fLFile = fdopen(fLfd, "w"); if (times) { tLFile = fdopen(tLfd, "w"); } if (power) { pLFile = fdopen(pLfd, "w"); } kLFile = fdopen(kLfd, "w"); lseek(adb->fd, adb->header->dataOffset, SEEK_SET); for(unsigned k = 0; k < nfiles; k++) { fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE); if(adb->header->flags & O2_FLAG_LARGE_ADB) { char *featureFileName = featureFileNameTable+k*O2_FILETABLE_ENTRY_SIZE; if(*featureFileName != '/') { goto error; } fprintf(fLFile, "%s\n", featureFileName); if(times) { char *timesFileName = timesFileNameTable + k*O2_FILETABLE_ENTRY_SIZE; if(*timesFileName != '/') { goto error; } fprintf(tLFile, "%s\n", timesFileName); } if(power) { char *powerFileName = powerFileNameTable + k*O2_FILETABLE_ENTRY_SIZE; if(*powerFileName != '/') { goto error; } fprintf(pLFile, "%s\n", powerFileName); } } else { snprintf(fName, 256, "%05d.features", k); if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } if ((write(ffd, &(adb->header->dim), sizeof(uint32_t))) < 0) { goto error; } /* FIXME: this repeated malloc()/free() of data buffers is inefficient. */ data_buffer_size = trackTable[k] * adb->header->dim * sizeof(double); { void *tmp = malloc(data_buffer_size); if (tmp == NULL) { goto error; } data_buffer = (double *) tmp; } if ((read(adb->fd, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) { goto error; } if ((write(ffd, data_buffer, data_buffer_size)) < 0) { goto error; } free(data_buffer); fprintf(fLFile, "%s\n", fName); close(ffd); ffd = 0; if (times) { snprintf(fName, 256, "%05d.times", k); tFile = fopen(fName, "w"); for(unsigned i = 0; i < trackTable[k]; i++) { // KLUDGE: specifying 16 digits of precision after the decimal // point is (but check this!) sufficient to uniquely identify // doubles; however, that will cause ugliness, as that's // vastly too many for most values of interest. Moving to %a // here and scanf() in the timesFile reading might fix this. // -- CSR, 2007-10-19 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i)); } fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1)); fclose(tFile); fprintf(tLFile, "%s\n", fName); } if (power) { uint32_t one = 1; snprintf(fName, 256, "%05d.power", k); if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { goto error; } if ((write(pfd, &one, sizeof(uint32_t))) < 0) { goto error; } if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) { goto error; } fprintf(pLFile, "%s\n", fName); close(pfd); pfd = 0; } pos += trackTable[k]; std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl; } } scriptFile = fopen("restore.sh", "w"); fprintf(scriptFile, "\ #! /bin/sh\n\ #\n\ # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\ \n\ if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ \"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n", (int) ((adb->header->timesTableOffset - adb->header->dataOffset) / (1024*1024)), // fileTable entries (char[256]) are bigger than trackTable // (int), so the granularity of page aligning is finer. (int) ((adb->header->trackTableOffset - adb->header->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE), (int) ceil(((double) (adb->header->timesTableOffset - adb->header->dataOffset)) / ((double) (adb->header->dbSize - adb->header->l2normTableOffset)))); if(adb->header->flags & O2_FLAG_L2NORM) { fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); } if(power) { fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n"); } fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); if(times) { fprintf(scriptFile, " -T timesList.txt"); } if(power) { fprintf(scriptFile, " -W powerList.txt"); } fprintf(scriptFile, "\n"); fclose(scriptFile); fclose(fLFile); if(times) { fclose(tLFile); } if(power) { fclose(pLFile); } fclose(kLFile); maybe_munmap(fileTable, fileTableLength); maybe_munmap(trackTable, trackTableLength); maybe_munmap(timesTable, timesTableLength); maybe_munmap(powerTable, powerTableLength); maybe_munmap(featureFileNameTable, fileTableLength); maybe_munmap(timesFileNameTable, fileTableLength); maybe_munmap(powerFileNameTable, fileTableLength); if((chdir(cwd)) < 0) { /* don't goto error because the error handling will try to * chdir() */ return 1; } return 0; error: if(fLFile) { fclose(fLFile); } else if(fLfd) { close(fLfd); } if(tLFile) { fclose(tLFile); } else if(tLfd) { close(fLfd); } if(pLFile) { fclose(pLFile); } else if(pLfd) { close(pLfd); } if(kLFile) { fclose(kLFile); } else if(kLfd) { close(kLfd); } if(scriptFile) { fclose(scriptFile); } maybe_munmap(fileTable, fileTableLength); maybe_munmap(trackTable, trackTableLength); maybe_munmap(timesTable, timesTableLength); maybe_munmap(powerTable, powerTableLength); maybe_munmap(featureFileNameTable, fileTableLength); maybe_munmap(timesFileNameTable, fileTableLength); maybe_munmap(powerFileNameTable, fileTableLength); if(directory_changed) { chdir(cwd); } return 1; }