view dump.cpp @ 409:99e6cbad7f76 api-inversion

The lesser of two evils, part 2. Implement paths through audiodb_insert_datum_internal() for databases with O2_FLAG_LARGE_ADB, including in some of the helper functions. Most of the nasty stuff is concentrated in writing out the paths in what is now step 6, and everything else looks much as before, apart from a renumbering of the steps taken. Now we can implement audiodb_insert() for O2_FLAG_LARGE_ADB databases; we need to construct an adb_datum_internal_t from our adb_insert_t, but that's straightforward -- even just about straightforward enough to do it inline. Then audioDB::batchinsert() can be rewritten completely in terms of API functions, and doesn't need any kind of special treatment for the large case. Hooray. The real point of that is of course that we can now delete wodges of dead code, and move out audioDB::insert and audioDB::batchinsert into audioDB.cpp, because all they're doing now is dealing with command-line logic. This point marks the limit of what can be achieved in terms of "API inversion" at this time; the only remaining function, audiodb_query() / audioDB::query cannot be inverted because its API implementation is incomplete. Future plans, in some order: - merge this branch to trunk (check with current API/ABI clients); - complete audiodb_query() implementation; - invert audioDB::query / audiodb_query(); - MORE TESTS; - remove audioDB.cpp from list of files compiled into the library; - implement missing API functions (index, liszt, sample) directly; - source code rearrangement into library and command-line directories; - include bindings to library for some plausible candidate environments (Perl, Python, Lisp, Pd, Max/MSP) as examples; - API documentation.
author mas01cr
date Tue, 09 Dec 2008 22:48:30 +0000
parents a8a5f2ca5380
children d7e590d58c85
line wrap: on
line source
#include "audioDB.h"
extern "C" {
#include "audioDB_API.h"
#include "audioDB-internals.h"
}

int audiodb_dump(adb_t *adb, const char *output) {
  char *fileTable = 0; /* key_table */
  unsigned *trackTable = 0; /* track_size_table */
  double *timesTable = 0; /* timestamps_table */
  double *powerTable = 0; /* power_table */

  size_t fileTableLength = 0;
  size_t trackTableLength = 0;
  size_t timesTableLength = 0;
  size_t powerTableLength = 0;

  char *featureFileNameTable = 0;
  char *powerFileNameTable = 0;
  char *timesFileNameTable = 0;
 
  char cwd[PATH_MAX];
  int directory_changed = 0;

  int fLfd = 0, tLfd = 0, pLfd = 0, kLfd = 0;
  FILE *fLFile = 0, *tLFile = 0, *pLFile = 0, *kLFile = 0;

  int times, power;

  char fName[256];
  int ffd, pfd;
  FILE *tFile;
  unsigned pos = 0;
  double *data_buffer;
  size_t data_buffer_size;
  FILE *scriptFile = 0;

  unsigned nfiles = adb->header->numFiles;

  if(adb->header->length > 0) {
    fileTableLength = ALIGN_PAGE_UP(nfiles * O2_FILETABLE_ENTRY_SIZE);
    trackTableLength = ALIGN_PAGE_UP(nfiles * O2_TRACKTABLE_ENTRY_SIZE);
    if(!(adb->header->flags & O2_FLAG_LARGE_ADB)) {
      off_t length = adb->header->length;
      unsigned dim = adb->header->dim;
      timesTableLength = ALIGN_PAGE_UP(2*length/dim);
      powerTableLength = ALIGN_PAGE_UP(length/dim);
    }

    mmap_or_goto_error(char *, fileTable, adb->header->fileTableOffset, fileTableLength);
    mmap_or_goto_error(unsigned *, trackTable, adb->header->trackTableOffset, trackTableLength);
    if (adb->header->flags & O2_FLAG_LARGE_ADB) {
      mmap_or_goto_error(char *, featureFileNameTable, adb->header->dataOffset, fileTableLength);
      mmap_or_goto_error(char *, powerFileNameTable, adb->header->powerTableOffset, fileTableLength);
      mmap_or_goto_error(char *, timesFileNameTable, adb->header->timesTableOffset, fileTableLength);
    } else {
      mmap_or_goto_error(double *, powerTable, adb->header->powerTableOffset, powerTableLength);
      mmap_or_goto_error(double *, timesTable, adb->header->timesTableOffset, timesTableLength);
    }
  }

  if((mkdir(output, S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
    goto error;
  }

  if ((getcwd(cwd, PATH_MAX)) == 0) {
    goto error;
  }

  /* FIXME: Hrm.  How does chdir(2) interact with threads?  Does each
   * thread have its own working directory? */
  if((chdir(output)) < 0) {
    goto error;
  }
  directory_changed = 1;

  if ((fLfd = open("featureList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
    goto error;
  }

  times = adb->header->flags & O2_FLAG_TIMES;
  if (times) {
    if ((tLfd = open("timesList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
      goto error;
    }
  }

  power = adb->header->flags & O2_FLAG_POWER;
  if (power) {
    if ((pLfd = open("powerList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
      goto error;
    }
  }

  if ((kLfd = open("keyList.txt", O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
    goto error;
  }
  
  /* can these fail?  I sincerely hope not. */
  fLFile = fdopen(fLfd, "w");
  if (times) {
    tLFile = fdopen(tLfd, "w");
  }
  if (power) {
    pLFile = fdopen(pLfd, "w");
  }
  kLFile = fdopen(kLfd, "w");

  lseek(adb->fd, adb->header->dataOffset, SEEK_SET);

  for(unsigned k = 0; k < nfiles; k++) {
    fprintf(kLFile, "%s\n", fileTable + k*O2_FILETABLE_ENTRY_SIZE);
    if(adb->header->flags & O2_FLAG_LARGE_ADB) {
      char *featureFileName = featureFileNameTable+k*O2_FILETABLE_ENTRY_SIZE;
      if(*featureFileName != '/') {
        goto error;
      }
      fprintf(fLFile, "%s\n", featureFileName);
      if(times) {
	char *timesFileName = timesFileNameTable + k*O2_FILETABLE_ENTRY_SIZE;
	if(*timesFileName != '/') {
          goto error;
	}
	fprintf(tLFile, "%s\n", timesFileName);
      }
      if(power) {
	char *powerFileName = powerFileNameTable + k*O2_FILETABLE_ENTRY_SIZE;
	if(*powerFileName != '/') {
          goto error;
	}
	fprintf(pLFile, "%s\n", powerFileName);
      }
    } else {
      snprintf(fName, 256, "%05d.features", k);
      if ((ffd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
        goto error;
      }
      if ((write(ffd, &(adb->header->dim), sizeof(uint32_t))) < 0) {
        goto error;
      }
      
      /* FIXME: this repeated malloc()/free() of data buffers is
	 inefficient. */
      data_buffer_size = trackTable[k] * adb->header->dim * sizeof(double);
      
      {
	void *tmp = malloc(data_buffer_size);
	if (tmp == NULL) {
          goto error;
	}
	data_buffer = (double *) tmp;
      }
      
      if ((read(adb->fd, data_buffer, data_buffer_size)) != (ssize_t) data_buffer_size) {
        goto error;
      }
      
      if ((write(ffd, data_buffer, data_buffer_size)) < 0) {
        goto error;
      }
      
      free(data_buffer);
      
      fprintf(fLFile, "%s\n", fName);
      close(ffd);
      ffd = 0;

      if (times) {
	snprintf(fName, 256, "%05d.times", k);
	tFile = fopen(fName, "w");
	for(unsigned i = 0; i < trackTable[k]; i++) {
	  // KLUDGE: specifying 16 digits of precision after the decimal
	  // point is (but check this!) sufficient to uniquely identify
	  // doubles; however, that will cause ugliness, as that's
	  // vastly too many for most values of interest.  Moving to %a
	  // here and scanf() in the timesFile reading might fix this.
	  // -- CSR, 2007-10-19
	  fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*i));
	}
	fprintf(tFile, "%.16e\n", *(timesTable + 2*pos + 2*trackTable[k]-1));
        fclose(tFile);
	
	fprintf(tLFile, "%s\n", fName);
      }
      
      if (power) {
	uint32_t one = 1;
	snprintf(fName, 256, "%05d.power", k);
	if ((pfd = open(fName, O_CREAT|O_RDWR|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) {
          goto error;
	}
	if ((write(pfd, &one, sizeof(uint32_t))) < 0) {
          goto error;
	}
	if ((write(pfd, powerTable + pos, trackTable[k] * sizeof(double))) < 0) {
          goto error;
	}
	fprintf(pLFile, "%s\n", fName);
	close(pfd);
        pfd = 0;
      } 
      
      pos += trackTable[k];
      std::cout << fileTable+k*O2_FILETABLE_ENTRY_SIZE << " " << trackTable[k] << std::endl;
    }
  }

  scriptFile = fopen("restore.sh", "w");
  fprintf(scriptFile, "\
#! /bin/sh\n\
#\n\
# usage: AUDIODB=/path/to/audioDB sh ./restore.sh <newdb>\n\
\n\
if [ -z \"${AUDIODB}\" ]; then echo set AUDIODB variable; exit 1; fi\n\
if [ -z \"$1\" ]; then echo usage: $0 newdb; exit 1; fi\n\n\
\"${AUDIODB}\" -d \"$1\" -N --datasize=%d --ntracks=%d --datadim=%d\n",
          (int) ((adb->header->timesTableOffset - adb->header->dataOffset) / (1024*1024)),
          // fileTable entries (char[256]) are bigger than trackTable
          // (int), so the granularity of page aligning is finer.
          (int) ((adb->header->trackTableOffset - adb->header->fileTableOffset) / O2_FILETABLE_ENTRY_SIZE),
          (int) ceil(((double) (adb->header->timesTableOffset - adb->header->dataOffset)) / ((double) (adb->header->dbSize - adb->header->l2normTableOffset))));
  if(adb->header->flags & O2_FLAG_L2NORM) {
    fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -L\n");
  }
  if(power) {
    fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -P\n");
  }
  fprintf(scriptFile, "\"${AUDIODB}\" -d \"$1\" -B -F featureList.txt -K keyList.txt");
  if(times) {
    fprintf(scriptFile, " -T timesList.txt");
  }
  if(power) {
    fprintf(scriptFile, " -W powerList.txt");
  }
  fprintf(scriptFile, "\n");
  fclose(scriptFile);

  fclose(fLFile);
  if(times) {
    fclose(tLFile);
  }
  if(power) {
    fclose(pLFile);
  }
  fclose(kLFile);
    
  maybe_munmap(fileTable, fileTableLength);
  maybe_munmap(trackTable, trackTableLength);
  maybe_munmap(timesTable, timesTableLength);
  maybe_munmap(powerTable, powerTableLength);
  maybe_munmap(featureFileNameTable, fileTableLength);
  maybe_munmap(timesFileNameTable, fileTableLength);
  maybe_munmap(powerFileNameTable, fileTableLength);

  if((chdir(cwd)) < 0) {
    /* don't goto error because the error handling will try to
     * chdir() */
    return 1;
  }

  return 0;

 error:
  if(fLFile) {
    fclose(fLFile);
  } else if(fLfd) {
    close(fLfd);
  }
  if(tLFile) {
    fclose(tLFile);
  } else if(tLfd) {
    close(fLfd);
  }
  if(pLFile) {
    fclose(pLFile);
  } else if(pLfd) {
    close(pLfd);
  }
  if(kLFile) {
    fclose(kLFile);
  } else if(kLfd) {
    close(kLfd);
  }
  if(scriptFile) {
    fclose(scriptFile);
  }

  maybe_munmap(fileTable, fileTableLength);
  maybe_munmap(trackTable, trackTableLength);
  maybe_munmap(timesTable, timesTableLength);
  maybe_munmap(powerTable, powerTableLength);
  maybe_munmap(featureFileNameTable, fileTableLength);
  maybe_munmap(timesFileNameTable, fileTableLength);
  maybe_munmap(powerFileNameTable, fileTableLength);

  if(directory_changed) {
    chdir(cwd);
  }
  return 1;
}