# HG changeset patch # User mas01cr # Date 1192813486 0 # Node ID 3d931368fab3ee34b0abfaae236bb00803a2f54c # Parent 63ca70f2bf37d2e40593186fdfe6a6a04326f91e Initial cut at a dump utility; binary-based for now. Only very lightly tested; in particular, the times saving functionality is completely untested at present. Restore functionality provided by a shell script written as part of the dump process. diff -r 63ca70f2bf37 -r 3d931368fab3 audioDB.cpp --- a/audioDB.cpp Fri Oct 19 17:03:12 2007 +0000 +++ b/audioDB.cpp Fri Oct 19 17:04:46 2007 +0000 @@ -210,6 +210,7 @@ if(args_info.DUMP_given){ command=COM_DUMP; dbName=args_info.database_arg; + output = args_info.output_arg; return 0; } @@ -869,14 +870,117 @@ } void audioDB::dump(const char* dbName){ - if(!dbH) + if(!dbH) { initTables(dbName, 0, 0); - - for(unsigned k=0, j=0; knumFiles; k++){ - cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; - j+=trackTable[k]; } + if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) { + error("error making output directory", output, "mkdir"); + } + + char *cwd = new char[PATH_MAX]; + + if ((getcwd(cwd, PATH_MAX)) == 0) { + error("error getting working directory", "", "getcwd"); + } + + if((chdir(output)) < 0) { + error("error changing working directory", output, "chdir"); + } + + int fLfd, tLfd = 0, kLfd; + FILE *fLFile, *tLFile = 0, *kLFile; + + if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { + error("error creating featureList file", "featureList.txt", "open"); + } + int times = dbH->flags & O2_FLAG_TIMES; + if (times) { + if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { + error("error creating timesList file", "timesList.txt", "open"); + } + } + if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { + error("error creating keyList file", "keyList.txt", "open"); + } + + /* can these fail? I sincerely hope not. */ + fLFile = fdopen(fLfd, "w"); + if (times) { + tLFile = fdopen(tLfd, "w"); + } + kLFile = fdopen(kLfd, "w"); + + char *fName = new char[256]; + int ffd; + FILE *tFile; + unsigned pos = 0; + for(unsigned k = 0; k < dbH->numFiles; k++) { + fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLESIZE); + snprintf(fName, 256, "%05d.features", k); + if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) { + error("error creating feature file", fName, "open"); + } + if ((write(ffd, &dbH->dim, sizeof(uint32_t))) < 0) { + error("error writing dimensions", fName, "write"); + } + + if ((write(ffd, dataBuf + pos * dbH->dim, trackTable[k] * dbH->dim * sizeof(double))) < 0) { + error("error writing data", fName, "write"); + } + fprintf(fLFile, "%s\n", fName); + close(ffd); + + if(times) { + snprintf(fName, 256, "%05d.times", k); + tFile = fopen(fName, "w"); + for(unsigned i = 0; i < trackTable[k]; i++) { + // KLUDGE: specifying 16 digits of precision after the decimal + // point is (but check this!) sufficient to uniquely identify + // doubles; however, that will cause ugliness, as that's + // vastly too many for most values of interest. Moving to %a + // here and scanf() in the timesFile reading might fix this. + // -- CSR, 2007-10-19 + fprintf(tFile, "%.16e\n", *(timesTable + pos + i)); + } + fprintf(tLFile, "%s\n", fName); + } + + pos += trackTable[k]; + cout << fileTable+k*O2_FILETABLESIZE << " " << trackTable[k] << endl; + } + + FILE *scriptFile; + scriptFile = fopen("restore.sh", "w"); + fprintf(scriptFile, "\ +#! /bin/sh\n\ +#\n\ +# usage: AUDIODB=/path/to/audioDB sh ./restore.sh \n\ +\n\ +if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\ +if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\ +\"${AUDIODB}\" -d \"$1\" -N --size=%d\n", dbH->dbSize / 1000000); + if(dbH->flags & O2_FLAG_L2NORM) { + fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n"); + } + fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt"); + if(times) { + fprintf(scriptFile, " -T timesList.txt"); + } + fprintf(scriptFile, "\n"); + fclose(scriptFile); + + if((chdir(cwd)) < 0) { + error("error changing working directory", cwd, "chdir"); + } + + fclose(fLFile); + if(times) { + fclose(tLFile); + } + fclose(kLFile); + delete[] fName; + status(dbName); } diff -r 63ca70f2bf37 -r 3d931368fab3 audioDB.h --- a/audioDB.h Fri Oct 19 17:03:12 2007 +0000 +++ b/audioDB.h Fri Oct 19 17:04:46 2007 +0000 @@ -120,6 +120,7 @@ const char* trackFileName; ifstream *trackFile; const char *command; + const char *output; const char *timesFileName; ifstream *timesFile; @@ -207,6 +208,7 @@ trackFileName(0), \ trackFile(0), \ command(0), \ + output(0), \ timesFileName(0), \ timesFile(0), \ dbfid(0), \ diff -r 63ca70f2bf37 -r 3d931368fab3 gengetopt.in --- a/gengetopt.in Fri Oct 19 17:03:12 2007 +0000 +++ b/gengetopt.in Fri Oct 19 17:04:46 2007 +0000 @@ -12,6 +12,7 @@ option "size" - "size of database file (in MB)" int dependon="NEW" default="2000" optional option "STATUS" S "output database information to stdout." dependon="database" optional option "DUMP" D "output all entries: index key size." dependon="database" optional +option "output" - "output directory" string dependon="DUMP" default="audioDB.dump" optional option "L2NORM" L "unit norm vectors and norm all future inserts." dependon="database" optional section "Database Insertion" sectiondesc="The following commands insert feature files, with optional keys and timestamps.\n"