annotate runner/BinaryFeatureWriter.cpp @ 71:9600f5b8076f tip

latest version before copying fextractor
author gyorgyf
date Sun, 22 Sep 2013 21:55:40 +0200
parents 0ef80ae6493c
children
rev   line source
gyorgyf@60 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
gyorgyf@60 2
gyorgyf@60 3 /*
gyorgyf@60 4 Sonic Annotator
gyorgyf@60 5 A utility for batch feature extraction from audio files.
gyorgyf@60 6 Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
gyorgyf@60 7 Copyright 2007-2008 QMUL.
gyorgyf@60 8
gyorgyf@60 9 This program is free software; you can redistribute it and/or
gyorgyf@60 10 modify it under the terms of the GNU General Public License as
gyorgyf@60 11 published by the Free Software Foundation; either version 2 of the
gyorgyf@60 12 License, or (at your option) any later version. See the file
gyorgyf@60 13 COPYING included with this distribution for more information.
gyorgyf@60 14 */
gyorgyf@60 15
gyorgyf@60 16 #include <fstream>
gyorgyf@60 17
gyorgyf@60 18 #include <QFileInfo>
gyorgyf@60 19
gyorgyf@60 20 #include <ctime>
gyorgyf@60 21
gyorgyf@60 22 #include <stdint.h>
gyorgyf@60 23
gyorgyf@60 24 #include "BinaryFeatureWriter.h"
gyorgyf@60 25
gyorgyf@60 26 #include "base/RealTime.h"
gyorgyf@60 27
gyorgyf@60 28 #include "../version.h"
gyorgyf@60 29
gyorgyf@60 30 #ifdef _WIN32
gyorgyf@60 31 #define platform "Windows"
gyorgyf@60 32 #elif __APPLE__
gyorgyf@60 33 #define platform "MacOS"
gyorgyf@60 34 #elif __linux__
gyorgyf@60 35 #define platform "Linux"
gyorgyf@60 36 #else
gyorgyf@60 37 #define platform "Unix"
gyorgyf@60 38 #endif
gyorgyf@60 39 #ifdef __LP64__ //__x86_64__
gyorgyf@60 40 #define arch "64"
gyorgyf@60 41 #elif _WIN64
gyorgyf@60 42 #define arch "64"
gyorgyf@60 43 #else
gyorgyf@60 44 #define arch "32"
gyorgyf@60 45 #endif
gyorgyf@60 46
gyorgyf@60 47 # define MAJOR_VERSION_PY 0
gyorgyf@60 48
gyorgyf@60 49 using namespace std;
gyorgyf@60 50 using namespace Vamp;
gyorgyf@60 51
gyorgyf@60 52 // Parameter names
gyorgyf@60 53 string
gyorgyf@60 54 BinaryFeatureWriter::outputFileParam = "output";
gyorgyf@60 55
gyorgyf@60 56 struct BinaryFeatureWriter::OutputStream
gyorgyf@60 57 {
gyorgyf@60 58 ofstream* stream;
gyorgyf@60 59 bool newtransform;
gyorgyf@60 60 const Transform *transform;
gyorgyf@60 61 OutputStream() : newtransform(true),stream(NULL),transform(NULL) { }
gyorgyf@60 62 ~OutputStream() { if (stream != NULL) {stream->close(); delete stream;} }
gyorgyf@60 63
gyorgyf@60 64 struct header_t {
gyorgyf@66 65 int16_t BOM16; // 16-bit BOM (FEFF as in UTF-16) 2 bytes
gyorgyf@66 66 int32_t BOM32; // 32-bit BOM (human readable: e.g. ABCD) 4 bytes
gyorgyf@64 67 char major_version; // check for binary compatibility
gyorgyf@64 68 char minor_version; // changes in txt parts only
gyorgyf@64 69 char compression; // use of stream compression (e.g. gzip)
gyorgyf@64 70 char reserved1; // reserved byte
gyorgyf@64 71 char reserved2; // reserved byte
gyorgyf@64 72 char float_size; // size of float
gyorgyf@64 73 char int_size; // size of int
gyorgyf@64 74 char info[160]; // 160 byte text field
gyorgyf@64 75 char null; }; // NULL
gyorgyf@60 76
gyorgyf@60 77 bool open(string filename, bool append = true) {
gyorgyf@60 78
gyorgyf@60 79 if (stream) return true;
gyorgyf@60 80
gyorgyf@66 81 header_t header = {(int16_t) 0xFEFF, (int32_t) 0x41424344,MAJOR_VERSION_PY,2,0,NULL,NULL,(char)sizeof(float),(char)sizeof(int),{NULL},NULL};
gyorgyf@60 82 char* p_header = reinterpret_cast<char*>(&header);
gyorgyf@60 83
gyorgyf@60 84 if (append)
gyorgyf@60 85 stream = new ofstream(filename.c_str(), fstream::binary | ios_base::out | ios_base::in | ofstream::ate);
gyorgyf@60 86 else
gyorgyf@60 87 stream = new ofstream(filename.c_str(), fstream::binary);
gyorgyf@65 88
gyorgyf@65 89 if (!stream || !stream->is_open()) {
gyorgyf@65 90 if (append) {
gyorgyf@65 91 cerr << endl << "NOTE: Writing new binary output file: " << filename << endl;
gyorgyf@65 92 delete stream;
gyorgyf@65 93 stream = NULL;
gyorgyf@65 94 return open(filename,false);
gyorgyf@65 95 } else {
gyorgyf@65 96 cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): can't open file " << filename << endl;
gyorgyf@65 97 return false;
gyorgyf@65 98 }
gyorgyf@60 99 }
gyorgyf@60 100
gyorgyf@60 101 // verify input file format
gyorgyf@60 102 if (append) {
gyorgyf@60 103 ifstream istream;
gyorgyf@60 104 istream.open(filename.c_str(), fstream::binary | ios::in);
gyorgyf@60 105 if (!istream.is_open()) {
gyorgyf@60 106 cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): Can not verify supplied output stream." << endl;
gyorgyf@60 107 return false;
gyorgyf@60 108 }
gyorgyf@60 109 header_t iheader;
gyorgyf@60 110 istream.read(reinterpret_cast<char*>(&iheader),sizeof(header_t));
gyorgyf@60 111 istream.close();
gyorgyf@60 112 int16_t FEFF = 0xFEFF;
gyorgyf@60 113 if (iheader.BOM16 != FEFF) {
gyorgyf@60 114 if (iheader.BOM16 == (int16_t) 0xFFFE) {
gyorgyf@60 115 cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): This file apperas to have created on a different platform. Can not be appended. "
gyorgyf@60 116 << "Byte order mark: " << iheader.BOM32 << endl;
gyorgyf@60 117 return false;
gyorgyf@60 118 } else {
gyorgyf@60 119 cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): Invalid target file for this writer." << endl;
gyorgyf@60 120 return false;
gyorgyf@60 121 }
gyorgyf@60 122 }
gyorgyf@60 123 if (iheader.major_version != (char) MAJOR_VERSION_PY || iheader.BOM32 != (int32_t) 0x41424344) {
gyorgyf@60 124 cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): This file is not binary compatible with this version of the writer."
gyorgyf@60 125 << "file version: " << iheader.major_version << " required: " << MAJOR_VERSION_PY << endl;
gyorgyf@60 126 return false;
gyorgyf@60 127 }
gyorgyf@60 128 }
gyorgyf@60 129
gyorgyf@60 130 if (append) stream->seekp(0,ios_base::end);
gyorgyf@60 131
gyorgyf@60 132 if (!append) {
gyorgyf@60 133 time_t now = time(0);
gyorgyf@60 134 tm* gmtm = gmtime(&now);
gyorgyf@60 135 QString timestamp;
gyorgyf@60 136 if (gmtm != NULL)
gyorgyf@60 137 timestamp = QString("%1 %2").arg(", Created: ").arg(asctime(gmtm)).trimmed();
gyorgyf@66 138 string info = QString("_SONIC ANNOTATOR v%1 PYTHON BINARY V0.2, Platform: %2-%3bit%4")
gyorgyf@60 139 .arg(RUNNER_VERSION)
gyorgyf@60 140 .arg(platform)
gyorgyf@60 141 .arg(arch)
gyorgyf@60 142 .arg(timestamp).toStdString();
gyorgyf@60 143 strncpy(reinterpret_cast<char*>(&header.info),info.c_str(), info.length() <= 160 ? info.length() : 160);
gyorgyf@60 144 stream->write(p_header,sizeof(header_t));
gyorgyf@60 145 }
gyorgyf@60 146 return true;
gyorgyf@60 147 }
gyorgyf@60 148 };
gyorgyf@60 149
gyorgyf@60 150
gyorgyf@60 151 BinaryFeatureWriter::BinaryFeatureWriter() :
gyorgyf@60 152 outputFile("features")
gyorgyf@60 153 {
gyorgyf@60 154 binary = new OutputStream();
gyorgyf@60 155 }
gyorgyf@60 156
gyorgyf@60 157 BinaryFeatureWriter::~BinaryFeatureWriter()
gyorgyf@60 158 {
gyorgyf@60 159 if (binary) delete binary;
gyorgyf@60 160 }
gyorgyf@60 161
gyorgyf@60 162 BinaryFeatureWriter::ParameterList
gyorgyf@60 163 BinaryFeatureWriter::getSupportedParameters() const
gyorgyf@60 164 {
gyorgyf@60 165 ParameterList pl;
gyorgyf@60 166 Parameter p;
gyorgyf@60 167
gyorgyf@60 168 p.name = outputFileParam;
gyorgyf@60 169 p.description = "Binary output file path";
gyorgyf@60 170 p.hasArg = true;
gyorgyf@60 171 pl.push_back(p);
gyorgyf@60 172
gyorgyf@60 173 return pl;
gyorgyf@60 174 }
gyorgyf@60 175
gyorgyf@60 176 void
gyorgyf@60 177 BinaryFeatureWriter::setParameters(map<string, string> &params)
gyorgyf@60 178 {
gyorgyf@60 179 if (params.find(outputFileParam) != params.end()) {
gyorgyf@60 180 setOutputFile(params[outputFileParam]);
gyorgyf@60 181 params.erase(outputFileParam);
gyorgyf@60 182 }
gyorgyf@60 183 }
gyorgyf@60 184
gyorgyf@60 185 void
gyorgyf@60 186 BinaryFeatureWriter::setOutputFile(const string &file)
gyorgyf@60 187 {
gyorgyf@60 188 outputFile = file;
gyorgyf@60 189 }
gyorgyf@60 190
gyorgyf@60 191 void BinaryFeatureWriter::write(QString trackid,
gyorgyf@60 192 const Transform &transform,
gyorgyf@60 193 const Vamp::Plugin::OutputDescriptor& output,
gyorgyf@60 194 const Vamp::Plugin::FeatureList& featureList,
gyorgyf@60 195 std::string summaryType)
gyorgyf@60 196 {
gyorgyf@60 197 //!!! use summaryType
gyorgyf@60 198 if (summaryType != "") {
gyorgyf@60 199 //!!! IMPLEMENT
gyorgyf@60 200 cerr << "ERROR: BinaryFeatureWriter::write: Writing summaries is not yet implemented!" << endl;
gyorgyf@60 201 exit(1);
gyorgyf@60 202 }
gyorgyf@60 203
gyorgyf@60 204 // TODO: Consider writing out NumPy arrays directly following this documentation:
gyorgyf@60 205 // https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt
gyorgyf@60 206 // and using the .npy format
gyorgyf@60 207
gyorgyf@60 208 // return if file could not be opened
gyorgyf@60 209 if(!openBinaryFile()) {
gyorgyf@60 210 cerr << "ERROR: BinaryFeatureWriter::write: Error opening binary output file!" << endl;
gyorgyf@60 211 exit(1);
gyorgyf@60 212 }
gyorgyf@60 213
gyorgyf@60 214 ofstream &ofs = *(binary->stream);
gyorgyf@60 215
gyorgyf@60 216 // The manager does not call finish() after writing different outputs from the same plugin, but we need this behaviour here:
gyorgyf@60 217 if (!binary->newtransform && binary->transform != NULL && binary->transform != &transform) finish();
gyorgyf@60 218
gyorgyf@60 219 // write a python dictionary string containing (some) metadata needed to interpret the results
gyorgyf@60 220 // this can be evaluated in python using the expression : d = eval(f.readline())
gyorgyf@60 221 // given f is an open file, which should yield a valid dictionary.
gyorgyf@60 222 /*
gyorgyf@60 223 enum SampleType {
gyorgyf@60 224
gyorgyf@60 225 /// Results from each process() align with that call's block start
gyorgyf@60 226 0: OneSamplePerStep,
gyorgyf@60 227
gyorgyf@60 228 /// Results are evenly spaced in time
gyorgyf@60 229 1: FixedSampleRate,
gyorgyf@60 230
gyorgyf@60 231 /// Results are unevenly spaced and have individual timestamps
gyorgyf@60 232 2: VariableSampleRate
gyorgyf@60 233 };
gyorgyf@60 234 */
gyorgyf@60 235
gyorgyf@60 236 if (binary->newtransform) {
gyorgyf@60 237 binary->newtransform = false;
gyorgyf@60 238 output_binCount = output.binCount;
gyorgyf@69 239 feature_count = 0;
gyorgyf@69 240 element_count = 0;
gyorgyf@60 241 binary->transform = &transform;
gyorgyf@60 242
gyorgyf@60 243 ofs << endl << "{"
gyorgyf@60 244 << "\"track_id\":\"" << trackid << "\","
gyorgyf@60 245 << "\"transform_id\":\"" << transform.getIdentifier() << "\","
gyorgyf@60 246 << "\"sample_rate\":" << transform.getSampleRate() << ","
gyorgyf@60 247 << "\"step_size\":" << transform.getStepSize() << ","
gyorgyf@60 248 << "\"block_size\":" << transform.getBlockSize() << ","
gyorgyf@60 249 << "\"window_type\":" << transform.getWindowType() << ","
gyorgyf@60 250
gyorgyf@60 251 << "\"features_list\":" << featureList.size() << ","
gyorgyf@60 252 << "\"bin_count\":" << output.binCount << ","
gyorgyf@60 253 // << "\"output_description\":\"" << output.description << "\","
gyorgyf@60 254 << "\"output_sample_type\":" << output.sampleType << ","
gyorgyf@60 255 << "\"output_sample_rate\":" << output.sampleRate << ",";
gyorgyf@60 256
gyorgyf@60 257 // Write start time and duration if the transform is not for the whole file
gyorgyf@60 258 if (transform.getDuration().toString() != "0.000000000") {
gyorgyf@60 259 ofs << "\"start_time\":\"" << transform.getStartTime().toString() << "\",";
gyorgyf@60 260 ofs << "\"duration\":\"" << transform.getDuration().toString() << "\",";
gyorgyf@60 261 }
gyorgyf@60 262 // Write plugin version if known. (NOTE: using RDF transforms, it remains empty for some reason)
gyorgyf@60 263 if (!transform.getPluginVersion().isEmpty())
gyorgyf@60 264 ofs << "\"plugin_version\":\"" << transform.getPluginVersion() << "\",";
gyorgyf@60 265
gyorgyf@60 266 // write transform parameters into a dict: parameters:{"parameter_name":value,...} where value is float
gyorgyf@60 267 ofs << "\"parameters\":{" ;
gyorgyf@60 268 ParameterMap m = transform.getParameters();
gyorgyf@60 269
gyorgyf@60 270 for (ParameterMap::const_iterator i = m.begin(); i != m.end(); ++i)
gyorgyf@60 271 // note last comma is ignored by python
gyorgyf@60 272 if (i == m.begin())
gyorgyf@60 273 ofs << QString("\"%1\":%2").arg(i->first).arg(i->second);
gyorgyf@60 274 else
gyorgyf@60 275 ofs << QString(",\"%1\":%2").arg(i->first).arg(i->second);
gyorgyf@60 276 ofs << "}";
gyorgyf@60 277
gyorgyf@60 278 // write the data size last, and close the line.
gyorgyf@60 279 data_size_pos = ofs.tellp();
gyorgyf@60 280 ofs << " }" << endl;
gyorgyf@60 281 }
gyorgyf@60 282
gyorgyf@68 283 // BUG: File corruption after about 34000 runs due to beat spectrum plugin returning less data than advertised !!!
gyorgyf@68 284
gyorgyf@60 285 // write the feature data
gyorgyf@60 286 feature_count += featureList.size();
gyorgyf@60 287 for (size_t i = 0; i < featureList.size(); ++i) {
gyorgyf@69 288 for (size_t j = 0; j < featureList[i].values.size(); ++j) {
gyorgyf@60 289 ofs.write( (const char*) &featureList[i].values[j], sizeof(featureList[i].values[j]) );
gyorgyf@69 290 element_count++;
gyorgyf@69 291 }
gyorgyf@60 292 }
gyorgyf@60 293 // ofs << endl;
gyorgyf@60 294
gyorgyf@60 295 // // write time stamp data
gyorgyf@60 296 // for (int i = 0; i < featureList.size(); ++i) {
gyorgyf@60 297 // for (int j = 0; j < featureList[i].values.size(); ++j) {
gyorgyf@60 298 // // float sec = (int) featureList[i].timestamp.sec;
gyorgyf@60 299 // // float nsec = (int) featureList[i].timestamp.nsec;
gyorgyf@60 300 // // (*dbfiles[id].ofs).write( (const char*) &sec, sizeof(int));
gyorgyf@60 301 // // (*dbfiles[id].ofs).write( (const char*) &nsec, sizeof(int));
gyorgyf@60 302 // ofs.write( (const char*) &featureList[i].timestamp.sec, sizeof(int));
gyorgyf@60 303 // ofs.write( (const char*) &featureList[i].timestamp.nsec, sizeof(int));
gyorgyf@60 304 //
gyorgyf@60 305 // }
gyorgyf@60 306 //
gyorgyf@60 307 // }
gyorgyf@60 308
gyorgyf@60 309 // -- UNCOMMENT - TO - HERE --
gyorgyf@60 310
gyorgyf@60 311
gyorgyf@60 312
gyorgyf@60 313 }
gyorgyf@60 314
gyorgyf@60 315 bool BinaryFeatureWriter::openBinaryFile()
gyorgyf@60 316 {
gyorgyf@64 317 if (outputFile.rfind(".") == string::npos)
gyorgyf@64 318 return binary->open(outputFile + ".bin");
gyorgyf@64 319 else
gyorgyf@64 320 return binary->open(outputFile);
gyorgyf@60 321 }
gyorgyf@60 322
gyorgyf@60 323 void BinaryFeatureWriter::finish()
gyorgyf@60 324 {
gyorgyf@60 325 ofstream &ofs = *(binary->stream);
gyorgyf@60 326 binary->newtransform = true;
gyorgyf@60 327 // ofs << endl;
gyorgyf@60 328 long t = ofs.tellp();
gyorgyf@60 329 ofs.seekp(data_size_pos);
gyorgyf@60 330 // fill in the missing information in the transform python dict that is required to read the output into an array
gyorgyf@60 331 ofs << ",\"feature_count\":"
gyorgyf@60 332 << feature_count
gyorgyf@60 333 << ",\"data_size\":"
gyorgyf@69 334 // << feature_count * output_binCount * sizeof(float);
gyorgyf@69 335 << element_count * sizeof(float);
gyorgyf@69 336 ofs.seekp(t);
gyorgyf@69 337 if (element_count != feature_count * output_binCount)
gyorgyf@69 338 cerr << "ERROR: The number of feature values written is different from the expected value given by the output bin count and the number of features returned by the plugin. \n"
gyorgyf@69 339 << "Otput bin count: " << output_binCount << ", Feature count: " << feature_count << ", Expected elements: " << feature_count * output_binCount
gyorgyf@69 340 << ", Written elements: " << element_count << ", Resulting float32 data size: " << element_count * sizeof(float) << " bytes."
gyorgyf@69 341 << endl;
gyorgyf@60 342 // FileFeatureWriter::finish();
gyorgyf@60 343 }