changeset 60:da84d2efd7a3

Added NumPy compatible binary feature writer for track-level outputs
author gyorgyf
date Sun, 12 Feb 2012 19:55:06 +0000
parents 52b9d58edb78
children 0eadb9b0ddb8
files runner/AudioDBFeatureWriter.h runner/BinaryFeatureWriter.cpp runner/BinaryFeatureWriter.h runner/FeatureExtractionManager.cpp runner/FeatureWriterFactory.cpp runner/main.cpp
diffstat 6 files changed, 442 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/runner/AudioDBFeatureWriter.h	Mon Jan 09 16:30:44 2012 +0000
+++ b/runner/AudioDBFeatureWriter.h	Sun Feb 12 19:55:06 2012 +0000
@@ -53,7 +53,12 @@
     static string catalogueIdParam;
     static string baseDirParam;
     
-    struct TrackStream;
+    struct TrackStream; // defined in .cpp as follows:
+	// struct BinaryFeatureWriter::TrackStream
+	// {
+	//     QString trackid;
+	//     ofstream* ofs;
+	// };
     map<string, TrackStream> dbfiles;
     
     bool openDBFile(QString trackid, const string& identifier);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runner/BinaryFeatureWriter.cpp	Sun Feb 12 19:55:06 2012 +0000
@@ -0,0 +1,330 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Annotator
+    A utility for batch feature extraction from audio files.
+    Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
+    Copyright 2007-2008 QMUL.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#include <fstream>
+
+#include <QFileInfo>
+
+#include <ctime>
+
+#include <stdint.h>
+
+#include "BinaryFeatureWriter.h"
+
+#include "base/RealTime.h"
+
+#include "../version.h"
+
+#ifdef _WIN32
+#define platform "Windows"
+#elif __APPLE__
+#define platform "MacOS"
+#elif __linux__
+#define platform "Linux"
+#else
+#define platform "Unix"
+#endif
+#ifdef __LP64__ //__x86_64__
+#define arch "64" 
+#elif _WIN64
+#define arch "64" 
+#else
+#define arch "32"
+#endif
+
+# define MAJOR_VERSION_PY 0
+
+using namespace std;
+using namespace Vamp;
+
+// Parameter names
+string
+BinaryFeatureWriter::outputFileParam = "output";
+
+struct BinaryFeatureWriter::OutputStream
+{
+    ofstream* stream;
+	bool newtransform;
+	const Transform *transform;
+	OutputStream() : newtransform(true),stream(NULL),transform(NULL) { }
+	~OutputStream() { if (stream != NULL) {stream->close(); delete stream;} }
+	
+	struct header_t { 
+		int16_t BOM16; 			// 16-bit BOM (FEFF as in UTF-16)
+		int32_t BOM32; 			// 32-bit BOM (human readable: e.g. ABCD)
+		char major_version; 	// check for binary compatibility
+		char minor_version; 	// changes in txt parts only
+		char compression; 		// use of stream compression (e.g. gzip)
+		char reserved1; 		// reserved byte
+		char reserved2; 		// reserved byte
+		char float_size; 		// size of float
+		char int_size; 			// size of int
+		char info[160]; 		// 160 byte text field
+		char null; };			// NULL
+
+	bool open(string filename, bool append = true) {
+
+		if (stream) return true;
+		
+		header_t header = {(int16_t) 0xFEFF,0x41424344,MAJOR_VERSION_PY,1,0,NULL,NULL,(char)sizeof(float),(char)sizeof(int),{NULL},NULL}; 	
+		char* p_header = reinterpret_cast<char*>(&header);
+
+		if (append) 
+			stream = new ofstream(filename.c_str(), fstream::binary | ios_base::out | ios_base::in | ofstream::ate);
+		else 
+			stream = new ofstream(filename.c_str(), fstream::binary);
+
+	    if (!stream)
+	    {    
+	        cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): can't open file " << filename << endl;
+	        return false;
+	    }
+
+		if (append && !stream->is_open()) { 
+			cerr << "NOTE: Writing new binary output file: " << filename << endl;
+			delete stream; 
+			stream = NULL;
+			return open(filename,false);
+		}
+		
+		// verify input file format
+		if (append) {
+			ifstream istream;
+			istream.open(filename.c_str(), fstream::binary | ios::in);
+			if (!istream.is_open()) { 
+				cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): Can not verify supplied output stream." << endl;
+				return false;
+			}
+			header_t iheader;
+			istream.read(reinterpret_cast<char*>(&iheader),sizeof(header_t));
+			istream.close();
+			int16_t FEFF = 0xFEFF;
+			if (iheader.BOM16 != FEFF) {
+				if (iheader.BOM16 == (int16_t) 0xFFFE) {
+					cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): This file apperas to have created on a different platform. Can not be appended. " 
+					<< "Byte order mark: " << iheader.BOM32 << endl;
+					return false;
+				} else {
+					cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): Invalid target file for this writer." << endl;
+					return false;
+				}
+			}
+			if (iheader.major_version != (char) MAJOR_VERSION_PY || iheader.BOM32 != (int32_t) 0x41424344) {
+				cerr << "ERROR: BinaryFeatureWriter::OutputStream::open(): This file is not binary compatible with this version of the writer." 
+				<< "file version: " << iheader.major_version << " required: " << MAJOR_VERSION_PY << endl;
+				return false;
+			}
+		}
+
+		if (append) stream->seekp(0,ios_base::end);	
+
+		if (!append) { 
+			time_t now = time(0);
+			tm* gmtm = gmtime(&now);
+			QString timestamp;
+			if (gmtm != NULL) 
+				timestamp = QString("%1 %2").arg(", Created: ").arg(asctime(gmtm)).trimmed();
+			string info = QString(" SONIC ANNOTATOR v%1 PYTHON BINARY V0.1, Platform: %2-%3bit%4")
+				.arg(RUNNER_VERSION)
+				.arg(platform)
+				.arg(arch)
+				.arg(timestamp).toStdString();
+			strncpy(reinterpret_cast<char*>(&header.info),info.c_str(), info.length() <= 160 ? info.length() : 160);
+			stream->write(p_header,sizeof(header_t));
+		}
+		return true;
+	}    
+};
+
+
+BinaryFeatureWriter::BinaryFeatureWriter() : 
+    outputFile("features")
+{
+	binary = new OutputStream();
+}
+
+BinaryFeatureWriter::~BinaryFeatureWriter()
+{
+	if (binary) delete binary;
+}
+
+BinaryFeatureWriter::ParameterList
+BinaryFeatureWriter::getSupportedParameters() const
+{
+    ParameterList pl;
+    Parameter p;
+
+    p.name = outputFileParam;
+    p.description = "Binary output file path";
+    p.hasArg = true;
+    pl.push_back(p);
+
+    return pl;
+}
+
+void
+BinaryFeatureWriter::setParameters(map<string, string> &params)
+{
+    if (params.find(outputFileParam) != params.end()) {
+        setOutputFile(params[outputFileParam]);
+        params.erase(outputFileParam);
+    }
+}
+
+void
+BinaryFeatureWriter::setOutputFile(const string &file)
+{
+    outputFile = file;
+}
+
+void BinaryFeatureWriter::write(QString trackid,
+                                 const Transform &transform,
+                                 const Vamp::Plugin::OutputDescriptor& output, 
+                                 const Vamp::Plugin::FeatureList& featureList,
+                                 std::string summaryType)
+{
+    //!!! use summaryType
+    if (summaryType != "") {
+        //!!! IMPLEMENT
+        cerr << "ERROR: BinaryFeatureWriter::write: Writing summaries is not yet implemented!" << endl;
+        exit(1);
+    }
+
+	// TODO: Consider writing out NumPy arrays directly following this documentation:
+	// https://github.com/numpy/numpy/blob/master/doc/neps/npy-format.txt 
+	// and using the .npy format
+		
+	// return if file could not be opened
+	if(!openBinaryFile()) {
+		cerr << "ERROR: BinaryFeatureWriter::write: Error opening binary output file!" << endl;
+	    exit(1);	        
+	}	
+		
+	ofstream &ofs = *(binary->stream); 
+	
+	// The manager does not call finish() after writing different outputs from the same plugin, but we need this behaviour here:
+	if (!binary->newtransform && binary->transform != NULL && binary->transform != &transform) finish();
+	
+	// write a python dictionary string containing (some) metadata needed to interpret the results
+	// this can be evaluated in python using the expression : d = eval(f.readline())
+	// given f is an open file, which should yield a valid dictionary. 
+/*	
+	enum SampleType {
+	
+	    /// Results from each process() align with that call's block start
+	    0: OneSamplePerStep, 
+	
+	    /// Results are evenly spaced in time 
+	    1: FixedSampleRate,
+	
+	    /// Results are unevenly spaced and have individual timestamps
+	    2: VariableSampleRate 
+	};
+*/
+	
+	if (binary->newtransform) {
+		binary->newtransform = false;
+		output_binCount = output.binCount;
+		feature_count = 0;		
+		binary->transform = &transform;
+	
+	ofs << endl << "{"
+	<< "\"track_id\":\"" << trackid << "\"," 
+	<< "\"transform_id\":\"" << transform.getIdentifier() << "\","
+	<< "\"sample_rate\":" << transform.getSampleRate() << ","
+	<< "\"step_size\":" << transform.getStepSize() << ","
+	<< "\"block_size\":" << transform.getBlockSize() << ","
+	<< "\"window_type\":" << transform.getWindowType() << ","
+	
+	<< "\"features_list\":" << featureList.size() << ","
+	<< "\"bin_count\":" << output.binCount << ","
+	// << "\"output_description\":\"" << output.description << "\","
+	<< "\"output_sample_type\":" << output.sampleType << ","
+	<< "\"output_sample_rate\":" << output.sampleRate << ",";
+	
+	// Write start time and duration if the transform is not for the whole file
+	if (transform.getDuration().toString() != "0.000000000") {
+		ofs << "\"start_time\":\"" << transform.getStartTime().toString() << "\",";	
+		ofs << "\"duration\":\"" << transform.getDuration().toString() << "\",";
+	}
+	// Write plugin version if known. (NOTE: using RDF transforms, it remains empty for some reason)
+	if (!transform.getPluginVersion().isEmpty())
+		ofs << "\"plugin_version\":\"" << transform.getPluginVersion() << "\",";
+	
+	// write transform parameters into a dict: parameters:{"parameter_name":value,...} where value is float	
+	ofs << "\"parameters\":{" ;
+	ParameterMap m = transform.getParameters();	
+	
+	for (ParameterMap::const_iterator i = m.begin(); i != m.end(); ++i) 
+		// note last comma is ignored by python
+		if (i == m.begin())
+			ofs << QString("\"%1\":%2").arg(i->first).arg(i->second);
+		else
+			ofs << QString(",\"%1\":%2").arg(i->first).arg(i->second);
+	ofs << "}"; 
+	
+	// write the data size last, and close the line.
+	data_size_pos = ofs.tellp();	
+	ofs << "                                                   }" << endl;	
+	}
+	
+	// write the feature data
+	feature_count += featureList.size();	
+	for (size_t i = 0; i < featureList.size(); ++i) {
+		for (size_t j = 0; j < featureList[i].values.size(); ++j)
+			ofs.write( (const char*) &featureList[i].values[j], sizeof(featureList[i].values[j]) );
+	}
+	// ofs << endl;
+	
+	// // write time stamp data
+	// for (int i = 0; i < featureList.size(); ++i) {
+	// 	for (int j = 0; j < featureList[i].values.size(); ++j) {
+	// 		// float sec = (int) featureList[i].timestamp.sec;
+	// 		// float nsec = (int) featureList[i].timestamp.nsec;
+	// 		// (*dbfiles[id].ofs).write( (const char*) &sec, sizeof(int));
+	// 		// (*dbfiles[id].ofs).write( (const char*) &nsec, sizeof(int));
+	// 		ofs.write( (const char*) &featureList[i].timestamp.sec, sizeof(int));
+	// 		ofs.write( (const char*) &featureList[i].timestamp.nsec, sizeof(int));
+	// 		
+	// 	}
+	// 		
+	// }
+	
+	// -- UNCOMMENT - TO - HERE --
+		
+	
+
+}
+
+bool BinaryFeatureWriter::openBinaryFile()
+{
+	return binary->open(outputFile + ".bin");
+}
+
+void BinaryFeatureWriter::finish()
+{
+	ofstream &ofs = *(binary->stream);
+	binary->newtransform = true;
+	// ofs << endl;
+	long t = ofs.tellp();
+	ofs.seekp(data_size_pos);
+	// fill in the missing information in the transform python dict that is required to read the output into an array
+	ofs << ",\"feature_count\":" 
+		<< feature_count
+		<< ",\"data_size\":" 	
+		<< feature_count * output_binCount * sizeof(float);	
+	ofs.seekp(t);	
+    // FileFeatureWriter::finish();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runner/BinaryFeatureWriter.h	Sun Feb 12 19:55:06 2012 +0000
@@ -0,0 +1,74 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Annotator
+    A utility for batch feature extraction from audio files.
+    Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
+    Copyright 2007-2008 QMUL.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#ifndef _BINARY_FEATURE_WRITER_H_
+#define _BINARY_FEATURE_WRITER_H_
+
+#include <string>
+#include <map>
+#include <fstream>
+
+using std::string;
+using std::map;
+
+#include "transform/FeatureWriter.h"
+
+class BinaryFeatureWriter : public FeatureWriter
+{
+public:
+    BinaryFeatureWriter();
+    virtual ~BinaryFeatureWriter();
+
+    virtual ParameterList getSupportedParameters() const;
+    virtual void setParameters(map<string, string> &params);
+
+    virtual void setOutputFile(const string &);
+
+    virtual void write(QString trackid,
+                       const Transform &transform,
+                       const Vamp::Plugin::OutputDescriptor &output,
+                       const Vamp::Plugin::FeatureList &features,
+                       std::string summaryType = "");
+    
+    virtual void finish();
+
+    virtual QString getWriterTag() const { return "binary"; }
+
+private:
+	// Parameters
+	string outputFile;
+	
+	// Parameter names (class vars)
+    static string outputFileParam;
+    
+	struct OutputStream;
+	/*
+	struct BinaryFeatureWriter::TrackStream
+	{
+	    QString trackid;
+	    ofstream* ofs;
+	};
+	*/
+	typedef std::map<QString, float> ParameterMap;
+	
+	OutputStream *binary;
+    
+	bool openBinaryFile();
+	long feature_count;
+	long data_size_pos;
+	long output_binCount;
+};
+
+#endif
--- a/runner/FeatureExtractionManager.cpp	Mon Jan 09 16:30:44 2012 +0000
+++ b/runner/FeatureExtractionManager.cpp	Sun Feb 12 19:55:06 2012 +0000
@@ -474,7 +474,7 @@
             cerr << "Taking default channel count of "
                  << reader->getChannelCount() << " from file" << endl;
         }
-
+		
         if (m_defaultSampleRate == 0) {
             m_defaultSampleRate = reader->getNativeRate();
             cerr << "Taking default sample rate of "
@@ -708,7 +708,8 @@
         int index;
         int fc = (int)frames.size();
 
-        if (m_channels == 1) { // only case in which we can sensibly mix down
+	    if (m_channels == 1) { // only case in which we can sensibly mix down	
+			//cerr << "Note: Mixing down..." << endl;
             for (int j = 0; j < m_blockSize; ++j) {
                 data[0][j] = 0.f;
             }
--- a/runner/FeatureWriterFactory.cpp	Mon Jan 09 16:30:44 2012 +0000
+++ b/runner/FeatureWriterFactory.cpp	Sun Feb 12 19:55:06 2012 +0000
@@ -19,6 +19,7 @@
 #include "DefaultFeatureWriter.h"
 #include "rdf/RDFFeatureWriter.h"
 #include "AudioDBFeatureWriter.h"
+#include "BinaryFeatureWriter.h"
 #include "transform/CSVFeatureWriter.h"
 
 set<string>
@@ -29,6 +30,7 @@
     tags.insert("rdf");
     tags.insert("audiodb");
     tags.insert("csv");
+    tags.insert("binary");
     return tags;
 }
 
@@ -41,6 +43,8 @@
         return new RDFFeatureWriter();
     } else if (tag == "audiodb") {
         return new AudioDBFeatureWriter();
+	} else if (tag == "binary") {
+        return new BinaryFeatureWriter();
     } else if (tag == "csv") {
         return new CSVFeatureWriter();
     }
--- a/runner/main.cpp	Mon Jan 09 16:30:44 2012 +0000
+++ b/runner/main.cpp	Sun Feb 12 19:55:06 2012 +0000
@@ -369,6 +369,8 @@
     bool recursive = false;
     bool list = false;
     bool summaryOnly = false;
+	int channels = 0;
+	int dSampleRate = 0;
     QString skeletonFor = "";
     QString myname = args[0];
     myname = QFileInfo(myname).baseName();
@@ -465,6 +467,26 @@
                 requestedSummaryTypes.insert(summary);
                 continue;
             }
+		} else if (arg == "-c" || arg == "--channels") {
+            if (last || args[i+1].startsWith("-")) {
+                cerr << myname.toStdString() << ": argument expected for \""
+                     << arg.toStdString() << "\" option" << endl;
+                cerr << helpStr.toStdString() << endl;
+                exit(2);
+            } else {
+				channels = args[++i].toInt();
+                continue;
+            }
+		} else if (arg == "-sr" || arg == "--samplingrate") {
+            if (last || args[i+1].startsWith("-")) {
+                cerr << myname.toStdString() << ": argument expected for \""
+                     << arg.toStdString() << "\" option" << endl;
+                cerr << helpStr.toStdString() << endl;
+                exit(2);
+            } else {
+				dSampleRate = args[++i].toInt();
+                continue;
+            }		
         } else if (arg == "--summary-only") {
             summaryOnly = true;
             continue;
@@ -519,7 +541,7 @@
             otherArgs.push_back(args[i]);
         }
     }
-
+	
     if (list) {
         if (!requestedWriterTags.empty() || skeletonFor != "") {
             cerr << helpStr.toStdString() << endl;
@@ -700,6 +722,8 @@
 
     for (QStringList::const_iterator i = sources.begin();
          i != sources.end(); ++i) {
+		 if (channels != 0) manager.setChannels(channels);
+		 if (dSampleRate != 0) manager.setDefaultSampleRate(dSampleRate);
         try {
             manager.addSource(*i);
         } catch (const std::exception &e) {