annotate dump.cpp @ 755:37c2b9cce23a multiprobeLSH

Adding mkc_lsh_update branch, trunk candidate with improved LSH: merged trunk 1095 and branch multiprobe_lsh
author mas01mc
date Thu, 25 Nov 2010 13:42:40 +0000
parents cc2b97d020b1
children 31a1556fc2d6
rev   line source
mas01cr@498 1 extern "C" {
mas01cr@498 2 #include "audioDB_API.h"
mas01cr@509 3 }
mas01cr@498 4 #include "audioDB-internals.h"
mas01cr@239 5
mas01cr@498 6 int audiodb_dump(adb_t *adb, const char *output) {
mas01cr@498 7 char *fileTable = 0; /* key_table */
mas01cr@498 8 double *timesTable = 0; /* timestamps_table */
mas01cr@498 9 double *powerTable = 0; /* power_table */
mas01cr@498 10
mas01cr@498 11 size_t fileTableLength = 0;
mas01cr@498 12 size_t timesTableLength = 0;
mas01cr@498 13 size_t powerTableLength = 0;
mas01cr@498 14
mas01cr@498 15 char *featureFileNameTable = 0;
mas01cr@498 16 char *powerFileNameTable = 0;
mas01cr@498 17 char *timesFileNameTable = 0;
mas01cr@498 18
mas01cr@498 19 char cwd[PATH_MAX];
mas01cr@498 20 int directory_changed = 0;
mas01cr@498 21
mas01cr@498 22 int fLfd = 0, tLfd = 0, pLfd = 0, kLfd = 0;
mas01cr@498 23 FILE *fLFile = 0, *tLFile = 0, *pLFile = 0, *kLFile = 0;
mas01cr@498 24
mas01cr@498 25 int times, power;
mas01cr@498 26
mas01cr@498 27 char fName[256];
mas01cr@498 28 int ffd, pfd;
mas01cr@498 29 FILE *tFile;
mas01cr@498 30 unsigned pos = 0;
mas01cr@498 31 double *data_buffer;
mas01cr@498 32 size_t data_buffer_size;
mas01cr@498 33 FILE *scriptFile = 0;
mas01cr@498 34
mas01cr@498 35 unsigned nfiles = adb->header->numFiles;
mas01cr@498 36
mas01cr@498 37 if(adb->header->length > 0) {
mas01cr@509 38 fileTableLength = align_page_up(nfiles * ADB_FILETABLE_ENTRY_SIZE);
mas01cr@509 39 if(!(adb->header->flags & ADB_HEADER_FLAG_REFERENCES)) {
mas01cr@498 40 off_t length = adb->header->length;
mas01cr@498 41 unsigned dim = adb->header->dim;
mas01cr@509 42 timesTableLength = align_page_up(2*length/dim);
mas01cr@509 43 powerTableLength = align_page_up(length/dim);
mas01cr@498 44 }
mas01cr@498 45
mas01cr@498 46 mmap_or_goto_error(char *, fileTable, adb->header->fileTableOffset, fileTableLength);
mas01cr@509 47 if (adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@498 48 mmap_or_goto_error(char *, featureFileNameTable, adb->header->dataOffset, fileTableLength);
mas01cr@498 49 mmap_or_goto_error(char *, powerFileNameTable, adb->header->powerTableOffset, fileTableLength);
mas01cr@498 50 mmap_or_goto_error(char *, timesFileNameTable, adb->header->timesTableOffset, fileTableLength);
mas01cr@498 51 } else {
mas01cr@498 52 mmap_or_goto_error(double *, powerTable, adb->header->powerTableOffset, powerTableLength);
mas01cr@498 53 mmap_or_goto_error(double *, timesTable, adb->header->timesTableOffset, timesTableLength);
mas01cr@498 54 }
mas01cr@239 55 }
mas01cr@239 56
mas01cr@239 57 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
mas01cr@498 58 goto error;
mas01cr@239 59 }
mas01cr@239 60
mas01cr@239 61 if ((getcwd(cwd, PATH_MAX)) == 0) {
mas01cr@498 62 goto error;
mas01cr@239 63 }
mas01cr@239 64
mas01cr@498 65 /* FIXME: Hrm. How does chdir(2) interact with threads? Does each
mas01cr@498 66 * thread have its own working directory? */
mas01cr@239 67 if((chdir(output)) < 0) {
mas01cr@498 68 goto error;
mas01cr@498 69 }
mas01cr@498 70 directory_changed = 1;
mas01cr@498 71
mas01cr@498 72 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 73 goto error;
mas01cr@239 74 }
mas01cr@239 75
mas01cr@509 76 times = adb->header->flags & ADB_HEADER_FLAG_TIMES;
mas01cr@239 77 if (times) {
mas01cr@239 78 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 79 goto error;
mas01cr@239 80 }
mas01cr@239 81 }
mas01cr@239 82
mas01cr@509 83 power = adb->header->flags & ADB_HEADER_FLAG_POWER;
mas01cr@239 84 if (power) {
mas01cr@239 85 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 86 goto error;
mas01cr@239 87 }
mas01cr@239 88 }
mas01cr@239 89
mas01cr@239 90 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 91 goto error;
mas01cr@239 92 }
mas01cr@239 93
mas01cr@239 94 /* can these fail? I sincerely hope not. */
mas01cr@239 95 fLFile = fdopen(fLfd, "w");
mas01cr@239 96 if (times) {
mas01cr@239 97 tLFile = fdopen(tLfd, "w");
mas01cr@239 98 }
mas01cr@239 99 if (power) {
mas01cr@239 100 pLFile = fdopen(pLfd, "w");
mas01cr@239 101 }
mas01cr@239 102 kLFile = fdopen(kLfd, "w");
mas01cr@239 103
mas01cr@498 104 lseek(adb->fd, adb->header->dataOffset, SEEK_SET);
mas01cr@498 105
mas01cr@498 106 for(unsigned k = 0; k < nfiles; k++) {
mas01cr@509 107 fprintf(kLFile, "%s\n", fileTable + k*ADB_FILETABLE_ENTRY_SIZE);
mas01cr@509 108 if(adb->header->flags & ADB_HEADER_FLAG_REFERENCES) {
mas01cr@509 109 char *featureFileName = featureFileNameTable+k*ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 110 if(*featureFileName != '/') {
mas01cr@498 111 goto error;
mas01cr@498 112 }
mas01cr@380 113 fprintf(fLFile, "%s\n", featureFileName);
mas01cr@380 114 if(times) {
mas01cr@509 115 char *timesFileName = timesFileNameTable + k*ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 116 if(*timesFileName != '/') {
mas01cr@498 117 goto error;
mas01cr@498 118 }
mas01cr@380 119 fprintf(tLFile, "%s\n", timesFileName);
mas01cr@239 120 }
mas01cr@380 121 if(power) {
mas01cr@509 122 char *powerFileName = powerFileNameTable + k*ADB_FILETABLE_ENTRY_SIZE;
mas01cr@498 123 if(*powerFileName != '/') {
mas01cr@498 124 goto error;
mas01cr@498 125 }
mas01cr@380 126 fprintf(pLFile, "%s\n", powerFileName);
mas01cr@239 127 }
mas01cr@380 128 } else {
mas01cr@380 129 snprintf(fName, 256, "%05d.features", k);
mas01cr@380 130 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 131 goto error;
mas01cr@239 132 }
mas01cr@498 133 write_or_goto_error(ffd, &(adb->header->dim), sizeof(uint32_t));
mas01cr@380 134
mas01cr@380 135 /* FIXME: this repeated malloc()/free() of data buffers is
mas01cr@380 136 inefficient. */
mas01cr@498 137 data_buffer_size = (*adb->track_lengths)[k] * adb->header->dim * sizeof(double);
mas01cr@380 138
mas01cr@380 139 {
mas01cr@380 140 void *tmp = malloc(data_buffer_size);
mas01cr@380 141 if (tmp == NULL) {
mas01cr@498 142 goto error;
mas01cr@380 143 }
mas01cr@380 144 data_buffer = (double *) tmp;
mas01cr@380 145 }
mas01cr@380 146
mas01cr@498 147 if ((read(adb->fd, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
mas01cr@498 148 goto error;
mas01cr@380 149 }
mas01cr@380 150
mas01cr@498 151 write_or_goto_error(ffd, data_buffer, data_buffer_size);
mas01cr@380 152
mas01cr@380 153 free(data_buffer);
mas01cr@380 154
mas01cr@380 155 fprintf(fLFile, "%s\n", fName);
mas01cr@380 156 close(ffd);
mas01cr@498 157 ffd = 0;
mas01cr@498 158
mas01cr@380 159 if (times) {
mas01cr@380 160 snprintf(fName, 256, "%05d.times", k);
mas01cr@380 161 tFile = fopen(fName, "w");
mas01cr@498 162 for(unsigned i = 0; i < (*adb->track_lengths)[k]; i++) {
mas01cr@380 163 // KLUDGE: specifying 16 digits of precision after the decimal
mas01cr@380 164 // point is (but check this!) sufficient to uniquely identify
mas01cr@380 165 // doubles; however, that will cause ugliness, as that's
mas01cr@380 166 // vastly too many for most values of interest. Moving to %a
mas01cr@380 167 // here and scanf() in the timesFile reading might fix this.
mas01cr@380 168 // -- CSR, 2007-10-19
mas01cr@380 169 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
mas01cr@380 170 }
mas01cr@498 171 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*(*adb->track_lengths)[k]-1));
mas01cr@498 172 fclose(tFile);
mas01cr@380 173
mas01cr@380 174 fprintf(tLFile, "%s\n", fName);
mas01cr@380 175 }
mas01cr@380 176
mas01cr@380 177 if (power) {
mas01cr@380 178 uint32_t one = 1;
mas01cr@380 179 snprintf(fName, 256, "%05d.power", k);
mas01cr@380 180 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@498 181 goto error;
mas01cr@380 182 }
mas01cr@498 183 write_or_goto_error(pfd, &one, sizeof(uint32_t));
mas01cr@498 184 write_or_goto_error(pfd, powerTable + pos, (*adb->track_lengths)[k] * sizeof(double));
mas01cr@380 185 fprintf(pLFile, "%s\n", fName);
mas01cr@380 186 close(pfd);
mas01cr@498 187 pfd = 0;
mas01cr@380 188 }
mas01cr@380 189
mas01cr@498 190 pos += (*adb->track_lengths)[k];
mas01cr@509 191 std::cout << fileTable+k*ADB_FILETABLE_ENTRY_SIZE << " " << (*adb->track_lengths)[k] << std::endl;
mas01cr@380 192 }
mas01cr@239 193 }
mas01cr@239 194
mas01cr@239 195 scriptFile = fopen("restore.sh", "w");
mas01cr@239 196 fprintf(scriptFile, "\
mas01cr@239 197 #! /bin/sh\n\
mas01cr@239 198 #\n\
mas01cr@239 199 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
mas01cr@239 200 \n\
mas01cr@239 201 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
mas01cr@239 202 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
mas01cr@256 203 \"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n",
mas01cr@498 204 (int) ((adb->header->timesTableOffset - adb->header->dataOffset) / (1024*1024)),
mas01cr@256 205 // fileTable entries (char[256]) are bigger than trackTable
mas01cr@256 206 // (int), so the granularity of page aligning is finer.
mas01cr@509 207 (int) ((adb->header->trackTableOffset - adb->header->fileTableOffset) / ADB_FILETABLE_ENTRY_SIZE),
mas01cr@498 208 (int) ceil(((double) (adb->header->timesTableOffset - adb->header->dataOffset)) / ((double) (adb->header->dbSize - adb->header->l2normTableOffset))));
mas01cr@509 209 if(adb->header->flags & ADB_HEADER_FLAG_L2NORM) {
mas01cr@239 210 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
mas01cr@239 211 }
mas01cr@239 212 if(power) {
mas01cr@239 213 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
mas01cr@239 214 }
mas01cr@239 215 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
mas01cr@239 216 if(times) {
mas01cr@239 217 fprintf(scriptFile, " -T timesList.txt");
mas01cr@239 218 }
mas01cr@239 219 if(power) {
mas01cr@239 220 fprintf(scriptFile, " -W powerList.txt");
mas01cr@239 221 }
mas01cr@239 222 fprintf(scriptFile, "\n");
mas01cr@239 223 fclose(scriptFile);
mas01cr@239 224
mas01cr@239 225 fclose(fLFile);
mas01cr@239 226 if(times) {
mas01cr@239 227 fclose(tLFile);
mas01cr@239 228 }
mas01cr@239 229 if(power) {
mas01cr@239 230 fclose(pLFile);
mas01cr@239 231 }
mas01cr@239 232 fclose(kLFile);
mas01cr@239 233
mas01cr@498 234 maybe_munmap(fileTable, fileTableLength);
mas01cr@498 235 maybe_munmap(timesTable, timesTableLength);
mas01cr@498 236 maybe_munmap(powerTable, powerTableLength);
mas01cr@498 237 maybe_munmap(featureFileNameTable, fileTableLength);
mas01cr@498 238 maybe_munmap(timesFileNameTable, fileTableLength);
mas01cr@498 239 maybe_munmap(powerFileNameTable, fileTableLength);
mas01mc@334 240
mas01cr@498 241 if((chdir(cwd)) < 0) {
mas01cr@498 242 /* don't goto error because the error handling will try to
mas01cr@498 243 * chdir() */
mas01cr@498 244 return 1;
mas01mc@334 245 }
mas01mc@334 246
mas01cr@498 247 return 0;
mas01mc@334 248
mas01cr@498 249 error:
mas01cr@498 250 if(fLFile) {
mas01cr@498 251 fclose(fLFile);
mas01cr@498 252 } else if(fLfd) {
mas01cr@498 253 close(fLfd);
mas01cr@498 254 }
mas01cr@498 255 if(tLFile) {
mas01cr@498 256 fclose(tLFile);
mas01cr@498 257 } else if(tLfd) {
mas01cr@498 258 close(fLfd);
mas01cr@498 259 }
mas01cr@498 260 if(pLFile) {
mas01cr@498 261 fclose(pLFile);
mas01cr@498 262 } else if(pLfd) {
mas01cr@498 263 close(pLfd);
mas01cr@498 264 }
mas01cr@498 265 if(kLFile) {
mas01cr@498 266 fclose(kLFile);
mas01cr@498 267 } else if(kLfd) {
mas01cr@498 268 close(kLfd);
mas01cr@498 269 }
mas01cr@498 270 if(scriptFile) {
mas01cr@498 271 fclose(scriptFile);
mas01mc@334 272 }
mas01mc@334 273
mas01cr@498 274 maybe_munmap(fileTable, fileTableLength);
mas01cr@498 275 maybe_munmap(timesTable, timesTableLength);
mas01cr@498 276 maybe_munmap(powerTable, powerTableLength);
mas01cr@498 277 maybe_munmap(featureFileNameTable, fileTableLength);
mas01cr@498 278 maybe_munmap(timesFileNameTable, fileTableLength);
mas01cr@498 279 maybe_munmap(powerFileNameTable, fileTableLength);
mas01cr@498 280
mas01cr@498 281 if(directory_changed) {
mas01cr@498 282 int gcc_warning_workaround = chdir(cwd);
mas01cr@498 283 directory_changed = gcc_warning_workaround;
mas01mc@334 284 }
mas01cr@498 285 return 1;
mas01mc@334 286 }