annotate dump.cpp @ 323:64c844de82d0 large_adb

Fixed an indexing bug where rest of track was ignored after first shingle with power below threshold. Put default O2_LSH_POINT_BITS back to 14 (16384 points), can be altered at compile time with CFLAGS+=-DO2_LSH_POINT_BITS n
author mas01mc
date Thu, 21 Aug 2008 21:02:14 +0000
parents b9eff6896943
children 100cf66a5825
rev   line source
mas01cr@239 1 #include "audioDB.h"
mas01cr@239 2
mas01cr@239 3 void audioDB::dump(const char* dbName){
mas01cr@239 4 if(!dbH) {
mas01cr@239 5 initTables(dbName, 0);
mas01cr@239 6 }
mas01cr@239 7
mas01mc@319 8 if(dbH->flags & O2_FLAG_LARGE_ADB){
mas01mc@319 9 error("error: dump not supported for LARGE_ADB");
mas01mc@319 10 }
mas01mc@319 11
mas01cr@239 12 if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
mas01cr@239 13 error("error making output directory", output, "mkdir");
mas01cr@239 14 }
mas01cr@239 15
mas01cr@239 16 char *cwd = new char[PATH_MAX];
mas01cr@239 17
mas01cr@239 18 if ((getcwd(cwd, PATH_MAX)) == 0) {
mas01cr@239 19 error("error getting working directory", "", "getcwd");
mas01cr@239 20 }
mas01cr@239 21
mas01cr@239 22 if((chdir(output)) < 0) {
mas01cr@239 23 error("error changing working directory", output, "chdir");
mas01cr@239 24 }
mas01cr@239 25
mas01cr@239 26 int fLfd, tLfd = 0, pLfd = 0, kLfd;
mas01cr@239 27 FILE *fLFile, *tLFile = 0, *pLFile = 0, *kLFile;
mas01cr@239 28
mas01cr@239 29 if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 30 error("error creating featureList file", "featureList.txt", "open");
mas01cr@239 31 }
mas01cr@239 32
mas01cr@239 33 int times = dbH->flags & O2_FLAG_TIMES;
mas01cr@239 34 if (times) {
mas01cr@239 35 if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 36 error("error creating timesList file", "timesList.txt", "open");
mas01cr@239 37 }
mas01cr@239 38 }
mas01cr@239 39
mas01cr@239 40 int power = dbH->flags & O2_FLAG_POWER;
mas01cr@239 41 if (power) {
mas01cr@239 42 if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 43 error("error creating powerList file", "powerList.txt", "open");
mas01cr@239 44 }
mas01cr@239 45 }
mas01cr@239 46
mas01cr@239 47 if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 48 error("error creating keyList file", "keyList.txt", "open");
mas01cr@239 49 }
mas01cr@239 50
mas01cr@239 51 /* can these fail? I sincerely hope not. */
mas01cr@239 52 fLFile = fdopen(fLfd, "w");
mas01cr@239 53 if (times) {
mas01cr@239 54 tLFile = fdopen(tLfd, "w");
mas01cr@239 55 }
mas01cr@239 56 if (power) {
mas01cr@239 57 pLFile = fdopen(pLfd, "w");
mas01cr@239 58 }
mas01cr@239 59 kLFile = fdopen(kLfd, "w");
mas01cr@239 60
mas01cr@239 61 char *fName = new char[256];
mas01cr@239 62 int ffd, pfd;
mas01cr@239 63 FILE *tFile;
mas01cr@239 64 unsigned pos = 0;
mas01cr@239 65 lseek(dbfid, dbH->dataOffset, SEEK_SET);
mas01cr@239 66 double *data_buffer;
mas01cr@239 67 size_t data_buffer_size;
mas01cr@239 68 for(unsigned k = 0; k < dbH->numFiles; k++) {
mas01cr@256 69 fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE);
mas01cr@239 70 snprintf(fName, 256, "%05d.features", k);
mas01cr@239 71 if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 72 error("error creating feature file", fName, "open");
mas01cr@239 73 }
mas01cr@239 74 if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) {
mas01cr@239 75 error("error writing dimensions", fName, "write");
mas01cr@239 76 }
mas01cr@239 77
mas01cr@239 78 /* FIXME: this repeated malloc()/free() of data buffers is
mas01cr@239 79 inefficient. */
mas01cr@239 80 data_buffer_size = trackTable[k] * dbH->dim * sizeof(double);
mas01cr@239 81
mas01cr@239 82 {
mas01cr@239 83 void *tmp = malloc(data_buffer_size);
mas01cr@239 84 if (tmp == NULL) {
mas01cr@239 85 error("error allocating data buffer");
mas01cr@239 86 }
mas01cr@239 87 data_buffer = (double *) tmp;
mas01cr@239 88 }
mas01cr@239 89
mas01cr@239 90 if ((read(dbfid, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
mas01cr@239 91 error("error reading data", fName, "read");
mas01cr@239 92 }
mas01cr@239 93
mas01cr@239 94 if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
mas01cr@239 95 error("error writing data", fName, "write");
mas01cr@239 96 }
mas01cr@239 97
mas01cr@239 98 free(data_buffer);
mas01cr@239 99
mas01cr@239 100 fprintf(fLFile, "%s\n", fName);
mas01cr@239 101 close(ffd);
mas01cr@239 102
mas01cr@239 103 if (times) {
mas01cr@239 104 snprintf(fName, 256, "%05d.times", k);
mas01cr@239 105 tFile = fopen(fName, "w");
mas01cr@239 106 for(unsigned i = 0; i < trackTable[k]; i++) {
mas01cr@239 107 // KLUDGE: specifying 16 digits of precision after the decimal
mas01cr@239 108 // point is (but check this!) sufficient to uniquely identify
mas01cr@239 109 // doubles; however, that will cause ugliness, as that's
mas01cr@239 110 // vastly too many for most values of interest. Moving to %a
mas01cr@239 111 // here and scanf() in the timesFile reading might fix this.
mas01cr@239 112 // -- CSR, 2007-10-19
mas01cr@239 113 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
mas01cr@239 114 }
mas01cr@239 115 fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
mas01cr@239 116
mas01cr@239 117 fprintf(tLFile, "%s\n", fName);
mas01cr@239 118 }
mas01cr@239 119
mas01cr@239 120 if (power) {
mas01cr@239 121 uint32_t one = 1;
mas01cr@239 122 snprintf(fName, 256, "%05d.power", k);
mas01cr@239 123 if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
mas01cr@239 124 error("error creating power file", fName, "open");
mas01cr@239 125 }
mas01cr@239 126 if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
mas01cr@239 127 error("error writing one", fName, "write");
mas01cr@239 128 }
mas01cr@239 129 if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
mas01cr@239 130 error("error writing data", fName, "write");
mas01cr@239 131 }
mas01cr@239 132 fprintf(pLFile, "%s\n", fName);
mas01cr@239 133 close(pfd);
mas01cr@239 134 }
mas01cr@239 135
mas01cr@239 136 pos += trackTable[k];
mas01cr@256 137 std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl;
mas01cr@239 138 }
mas01cr@239 139
mas01cr@239 140 FILE *scriptFile;
mas01cr@239 141 scriptFile = fopen("restore.sh", "w");
mas01cr@239 142 fprintf(scriptFile, "\
mas01cr@239 143 #! /bin/sh\n\
mas01cr@239 144 #\n\
mas01cr@239 145 # usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
mas01cr@239 146 \n\
mas01cr@239 147 if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
mas01cr@239 148 if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
mas01cr@256 149 \"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n",
mas01cr@256 150 (int) ((dbH->timesTableOffset - dbH->dataOffset) / (1024*1024)),
mas01cr@256 151 // fileTable entries (char[256]) are bigger than trackTable
mas01cr@256 152 // (int), so the granularity of page aligning is finer.
mas01cr@256 153 (int) ((dbH->trackTableOffset - dbH->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE),
mas01cr@256 154 (int) ceil(((double) (dbH->timesTableOffset - dbH->dataOffset)) / ((double) (dbH->dbSize - dbH->l2normTableOffset))));
mas01cr@239 155 if(dbH->flags & O2_FLAG_L2NORM) {
mas01cr@239 156 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
mas01cr@239 157 }
mas01cr@239 158 if(power) {
mas01cr@239 159 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
mas01cr@239 160 }
mas01cr@239 161 fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
mas01cr@239 162 if(times) {
mas01cr@239 163 fprintf(scriptFile, " -T timesList.txt");
mas01cr@239 164 }
mas01cr@239 165 if(power) {
mas01cr@239 166 fprintf(scriptFile, " -W powerList.txt");
mas01cr@239 167 }
mas01cr@239 168 fprintf(scriptFile, "\n");
mas01cr@239 169 fclose(scriptFile);
mas01cr@239 170
mas01cr@239 171 if((chdir(cwd)) < 0) {
mas01cr@239 172 error("error changing working directory", cwd, "chdir");
mas01cr@239 173 }
mas01cr@239 174
mas01cr@239 175 fclose(fLFile);
mas01cr@239 176 if(times) {
mas01cr@239 177 fclose(tLFile);
mas01cr@239 178 }
mas01cr@239 179 if(power) {
mas01cr@239 180 fclose(pLFile);
mas01cr@239 181 }
mas01cr@239 182 fclose(kLFile);
mas01cr@239 183 delete[] fName;
mas01cr@239 184
mas01cr@239 185 status(dbName);
mas01cr@239 186 }