Mercurial > hg > nnls-chroma

diff NNLSChroma.cpp @ 0:8aa2e8b3a778 matthiasm-plugin
on the way to dynamic kernel
author: matthiasm
date: Tue, 18 May 2010 09:29:39 +0000
children: 2a491d71057d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NNLSChroma.cpp	Tue May 18 09:29:39 2010 +0000
@@ -0,0 +1,1031 @@
+
+#include "NNLSChroma.h"
+#include <cmath>
+#include <list>
+#include <iostream>
+#include <sstream>
+#include <cassert>
+#include <cstdio>
+// #include "cblas.h"
+#include "chorddict.cpp"
+using namespace std;
+
+const float sinvalue = 0.866025404;
+const float cosvalue = -0.5;
+const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
+const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
+const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
+const char* notenames[24] = {"A  (bass)","Bb (bass)","B  (bass)","C  (bass)","C# (bass)","D  (bass)","Eb (bass)","E  (bass)","F  (bass)","F# (bass)","G  (bass)","Ab (bass)",
+"A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
+const vector<float> hw(hammingwind, hammingwind+19);
+const int nNote = 256;
+
+/** Special Convolution
+special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the 
+convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
+as the first (last) valid convolution bin.
+**/
+
+const bool debug_on = false;
+
+vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
+{
+    float s;
+    int m, n;
+    int lenConvolvee = convolvee.size();
+    int lenKernel = kernel.size();
+
+    vector<float> Z(256,0);
+    assert(lenKernel % 2 != 0); // no exception handling !!!
+    
+    for (n = lenKernel - 1; n < lenConvolvee; n++) {
+    	s=0.0;
+    	for (m = 0; m < lenKernel; m++) {
+            // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
+            s += convolvee[n-m] * kernel[m];
+            // if (debug_on) cerr << "--> s = " << s << '\n';
+    	}
+        // cerr << n - lenKernel/2 << endl;
+        Z[n -lenKernel/2] = s;
+    }
+    
+    // fill upper and lower pads
+    for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];    
+    for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] = 
+        Z[lenConvolvee - lenKernel/2 -  1];
+    return Z;
+}
+
+// vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
+// {
+// 	vector<float> freq(binnumbers.size, 0.0);
+// 	for (unsigned i = 0; i < binnumbers.size; ++i) {
+// 		freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;	
+// 	}
+// 	return freq;
+// }
+
+float cospuls(float x, float centre, float width) 
+{
+	float recipwidth = 1.0/width;
+	if (abs(x - centre) <= 0.5 * width) {
+		return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
+	}
+	return 0.0;
+}
+
+float pitchCospuls(float x, float centre, int binsperoctave) 
+{
+	float warpedf = -binsperoctave * (log2(centre) - log2(x));
+	float out = cospuls(warpedf, 0.0, 2.0);
+	// now scale to correct for note density
+	float c = log(2.0)/binsperoctave;
+	if (x > 0) {
+		out = out / (c * x);
+	} else {
+		out = 0;
+	}
+	return out;
+}
+
+bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
+	
+	int binspersemitone = 3; // this must be 3
+	int minoctave = 0; // this must be 0
+	int maxoctave = 7; // this must be 7
+	int oversampling = 20;
+	
+	// linear frequency vector
+	vector<float> fft_f;
+	for (int i = 0; i < blocksize/2; ++i) {
+		fft_f.push_back(i * (fs * 1.0 / blocksize));
+	}
+	float fft_width = fs * 2.0 / blocksize;
+	
+	// linear oversampled frequency vector
+	vector<float> oversampled_f;
+	for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
+		oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
+	}
+	
+	// pitch-spaced frequency vector
+	int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
+	int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
+	vector<float> cq_f;
+	float oob = 1.0/binspersemitone; // one over binspersemitone
+	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
+	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
+	for (int i = minMIDI + 1; i < maxMIDI; ++i) {
+		for (int k = -1; k < 2; ++k)	 {
+			cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
+		}
+	}
+	cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
+	cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
+
+	// limit the linear vectors to the frequencies used
+	float maxfreq = *cq_f.end() * pow(2.0,0.083333);
+	while (*oversampled_f.end() > maxfreq) {
+		oversampled_f.pop_back();
+	}
+	while (*fft_f.end() > maxfreq) {
+		fft_f.pop_back();
+	}
+	
+	int nFFT = fft_f.size();
+	
+	// for (int i = 0; i < 100; i++) {
+	// 	cerr << oversampled_f[i] << " " << cospuls(oversampled_f[i],fft_f[1],fft_width) << endl;
+	// }
+	
+	vector<float> fft_activation;
+	for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
+		float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
+		fft_activation.push_back(cosp);
+		// cerr << cosp << endl;
+	}
+	
+	float cq_activation;
+	for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
+		// find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
+		int curr_start = oversampling * iFFT - oversampling;
+		int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
+		// cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
+		for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
+			outmatrix[iFFT + nFFT * iCQ] = 0;
+			if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width/2 > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width/2 < fft_f[iFFT]) { // within a generous neighbourhood
+				for (int iOS = curr_start; iOS < curr_end; ++iOS) {
+					cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
+					// cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
+					outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
+				}				
+				// if (iCQ == 1 || iCQ == 2) {
+				// 	cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
+				// }
+			}
+		}
+	}
+	return true;	
+}
+
+
+NNLSChroma::NNLSChroma(float inputSampleRate) :
+  Plugin(inputSampleRate),
+  m_fl(0),
+  m_blockSize(0),
+  m_stepSize(0),
+  m_lengthOfNoteIndex(0),
+  m_meanTuning0(0),
+  m_meanTuning1(0),
+  m_meanTuning2(0),
+  m_localTuning0(0),
+  m_localTuning1(0),
+  m_localTuning2(0),
+  m_paling(0),
+  m_localTuning(0),
+  m_kernelValue(0),
+  m_kernelFftIndex(0),
+  m_kernelNoteIndex(0),
+  m_tuneLocal(false),
+  m_dictID(0)
+{
+	if (debug_on) cerr << "--> NNLSChroma" << endl;
+}
+
+
+NNLSChroma::~NNLSChroma()
+{
+		if (debug_on) cerr << "--> ~NNLSChroma" << endl;
+}
+
+string
+NNLSChroma::getIdentifier() const
+{
+	if (debug_on) cerr << "--> getIdentifier" << endl;
+    return "nnls_chroma";
+}
+
+string
+NNLSChroma::getName() const
+{
+		if (debug_on) cerr << "--> getName" << endl;
+    return "NNLS Chroma";
+}
+
+string
+NNLSChroma::getDescription() const
+{
+    // Return something helpful here!
+	if (debug_on) cerr << "--> getDescription" << endl;
+    return "";
+}
+
+string
+NNLSChroma::getMaker() const
+{
+		if (debug_on) cerr << "--> getMaker" << endl;
+    // Your name here
+    return "Matthias Mauch";
+}
+
+int
+NNLSChroma::getPluginVersion() const
+{
+		if (debug_on) cerr << "--> getPluginVersion" << endl;
+    // Increment this each time you release a version that behaves
+    // differently from the previous one
+    return 1;
+}
+
+string
+NNLSChroma::getCopyright() const
+{
+		if (debug_on) cerr << "--> getCopyright" << endl;
+    // This function is not ideally named.  It does not necessarily
+    // need to say who made the plugin -- getMaker does that -- but it
+    // should indicate the terms under which it is distributed.  For
+    // example, "Copyright (year). All Rights Reserved", or "GPL"
+    return "Copyright (2010). All rights reserved.";
+}
+
+NNLSChroma::InputDomain
+NNLSChroma::getInputDomain() const
+{
+		if (debug_on) cerr << "--> getInputDomain" << endl;
+    return FrequencyDomain;
+}
+
+size_t
+NNLSChroma::getPreferredBlockSize() const
+{
+		if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
+    return 16384; // 0 means "I can handle any block size"
+}
+
+size_t 
+NNLSChroma::getPreferredStepSize() const
+{
+		if (debug_on) cerr << "--> getPreferredStepSize" << endl;
+    return 2048; // 0 means "anything sensible"; in practice this
+              // means the same as the block size for TimeDomain
+              // plugins, or half of it for FrequencyDomain plugins
+}
+
+size_t
+NNLSChroma::getMinChannelCount() const
+{
+	if (debug_on) cerr << "--> getMinChannelCount" << endl;
+    return 1;
+}
+
+size_t
+NNLSChroma::getMaxChannelCount() const
+{
+		if (debug_on) cerr << "--> getMaxChannelCount" << endl;
+    return 1;
+}
+
+NNLSChroma::ParameterList
+NNLSChroma::getParameterDescriptors() const
+{
+		if (debug_on) cerr << "--> getParameterDescriptors" << endl;
+    ParameterList list;
+
+    ParameterDescriptor d0;
+    d0.identifier = "notedict";
+    d0.name = "note dictionary";
+    d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
+    d0.unit = "";
+    d0.minValue = 0;
+    d0.maxValue = 2;
+    d0.defaultValue = 0;
+    d0.isQuantized = true;
+    d0.valueNames.push_back("s = 0.6");
+    d0.valueNames.push_back("s = 0.9");
+    d0.valueNames.push_back("s linearly spaced");
+    d0.valueNames.push_back("no NNLS");
+    d0.quantizeStep = 1.0;
+    list.push_back(d0);
+
+    ParameterDescriptor d1;
+    d1.identifier = "tuningmode";
+    d1.name = "tuning mode";
+    d1.description = "Tuning can be performed locally or on the whole extraction area.";
+    d1.unit = "";
+    d1.minValue = 0;
+    d1.maxValue = 1;
+    d1.defaultValue = 1;
+    d1.isQuantized = true;
+    d1.valueNames.push_back("global tuning");
+    d1.valueNames.push_back("local tuning");
+    d1.quantizeStep = 1.0;
+    list.push_back(d1);
+
+    ParameterDescriptor d2;
+    d2.identifier = "paling";
+    d2.name = "spectral paling";
+    d2.description = "Spectral paling: no paling - 0; whitening - 1.";
+    d2.unit = "";
+    d2.minValue = 0;
+    d2.maxValue = 1;
+    d2.defaultValue = 0.5;
+    d2.isQuantized = false;
+    // d1.valueNames.push_back("global tuning");
+    // d1.valueNames.push_back("local tuning");
+    // d1.quantizeStep = 0.1;
+    list.push_back(d2);
+
+    return list;
+}
+
+float
+NNLSChroma::getParameter(string identifier) const
+{
+		if (debug_on) cerr << "--> getParameter" << endl;
+    if (identifier == "notedict") {
+        return m_dictID; 
+    }
+    
+    if (identifier == "paling") {
+        return m_paling; 
+    }
+    
+    if (identifier == "tuningmode") {
+        if (m_tuneLocal) {
+            return 1.0;
+        } else {
+            return 0.0;
+        }
+    }
+
+    return 0;
+    
+}
+
+void
+NNLSChroma::setParameter(string identifier, float value) 
+{
+		if (debug_on) cerr << "--> setParameter" << endl;
+    if (identifier == "notedict") {
+        m_dictID = (int) value;
+    }
+    
+    if (identifier == "paling") {
+        m_paling = value;
+    }
+    
+    if (identifier == "tuningmode") {
+        m_tuneLocal = (value > 0) ? true : false;
+        // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
+    }
+}
+
+NNLSChroma::ProgramList
+NNLSChroma::getPrograms() const
+{
+		if (debug_on) cerr << "--> getPrograms" << endl;
+    ProgramList list;
+
+    // If you have no programs, return an empty list (or simply don't
+    // implement this function or getCurrentProgram/selectProgram)
+
+    return list;
+}
+
+string
+NNLSChroma::getCurrentProgram() const
+{
+		if (debug_on) cerr << "--> getCurrentProgram" << endl;
+    return ""; // no programs
+}
+
+void
+NNLSChroma::selectProgram(string name)
+{
+		if (debug_on) cerr << "--> selectProgram" << endl;
+}
+
+
+NNLSChroma::OutputList
+NNLSChroma::getOutputDescriptors() const
+{
+		if (debug_on) cerr << "--> getOutputDescriptors" << endl;
+    OutputList list;
+    
+    // Make chroma names for the binNames property
+    vector<string> chromanames;
+    vector<string> bothchromanames;
+    for (int iNote = 0; iNote < 24; iNote++) {
+        bothchromanames.push_back(notenames[iNote]);
+        if (iNote < 12) {
+            chromanames.push_back(notenames[iNote]);
+        }
+    }
+    
+	int nNote = 84;
+
+    // See OutputDescriptor documentation for the possibilities here.
+    // Every plugin must have at least one output.
+
+    OutputDescriptor d0;
+    d0.identifier = "tuning";
+    d0.name = "Tuning";
+    d0.description = "The concert pitch.";
+    d0.unit = "Hz";
+    d0.hasFixedBinCount = true;
+    d0.binCount = 0;
+    d0.hasKnownExtents = true;
+	d0.minValue = 427.47;
+	d0.maxValue = 452.89;
+    d0.isQuantized = false;
+    d0.sampleType = OutputDescriptor::VariableSampleRate;
+    d0.hasDuration = false;
+    list.push_back(d0);
+	
+	OutputDescriptor d1;
+    d1.identifier = "logfreqspec";
+    d1.name = "Log-Frequency Spectrum";
+    d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
+    d1.unit = "";
+    d1.hasFixedBinCount = true;
+    d1.binCount = nNote;
+    d1.hasKnownExtents = false;
+    d1.isQuantized = false;
+    d1.sampleType = OutputDescriptor::FixedSampleRate;
+    d1.hasDuration = false;
+    d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d1);
+
+	OutputDescriptor d2;
+    d2.identifier = "tunedlogfreqspec";
+    d2.name = "Tuned Log-Frequency Spectrum";
+    d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
+    d2.unit = "";
+    d2.hasFixedBinCount = true;
+    d2.binCount = 256;
+    d2.hasKnownExtents = false;
+    d2.isQuantized = false;
+    d2.sampleType = OutputDescriptor::FixedSampleRate;
+    d2.hasDuration = false;
+    d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d2);
+    
+    OutputDescriptor d3;
+    d3.identifier = "semitonespectrum";
+    d3.name = "Semitone Spectrum";
+    d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
+    d3.unit = "";
+    d3.hasFixedBinCount = true;
+    d3.binCount = 84;
+    d3.hasKnownExtents = false;
+    d3.isQuantized = false;
+    d3.sampleType = OutputDescriptor::FixedSampleRate;
+    d3.hasDuration = false;
+    d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d3);
+    
+    OutputDescriptor d4;
+    d4.identifier = "chroma";
+    d4.name = "Chromagram";
+    d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
+    d4.unit = "";
+    d4.hasFixedBinCount = true;
+    d4.binCount = 12;
+    d4.binNames = chromanames;
+    d4.hasKnownExtents = false;
+    d4.isQuantized = false;
+    d4.sampleType = OutputDescriptor::FixedSampleRate;
+    d4.hasDuration = false;
+    d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d4);
+    
+    OutputDescriptor d5;
+    d5.identifier = "basschroma";
+    d5.name = "Bass Chromagram";
+    d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
+    d5.unit = "";
+    d5.hasFixedBinCount = true;
+    d5.binCount = 12;
+    d5.binNames = chromanames;
+    d5.hasKnownExtents = false;
+    d5.isQuantized = false;
+    d5.sampleType = OutputDescriptor::FixedSampleRate;
+    d5.hasDuration = false;
+    d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d5);
+    
+    OutputDescriptor d6;
+    d6.identifier = "bothchroma";
+    d6.name = "Chromagram and Bass Chromagram";
+    d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
+    d6.unit = "";
+    d6.hasFixedBinCount = true;
+    d6.binCount = 24;
+    d6.binNames = bothchromanames;
+    d6.hasKnownExtents = false;
+    d6.isQuantized = false;
+    d6.sampleType = OutputDescriptor::FixedSampleRate;
+    d6.hasDuration = false;
+    d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d6);
+    
+    OutputDescriptor d7;
+    d7.identifier = "simplechord";
+    d7.name = "Simple Chord Estimate";
+    d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
+    d7.unit = "";
+    d7.hasFixedBinCount = true;
+    d7.binCount = 0;
+    d7.hasKnownExtents = false;
+    d7.isQuantized = false;
+    d7.sampleType = OutputDescriptor::VariableSampleRate;
+    d7.hasDuration = false;
+    d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d7);
+    
+    OutputDescriptor d8;
+    d8.identifier = "inconsistency";
+    d8.name = "Harmonic inconsistency value";
+    d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
+    d8.unit = "";
+    d8.hasFixedBinCount = true;
+    d8.binCount = 1;
+    d8.hasKnownExtents = false;
+    d8.isQuantized = false;
+    d8.sampleType = OutputDescriptor::FixedSampleRate;
+    d8.hasDuration = false;
+    d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d8);
+    
+    OutputDescriptor d9;
+    d9.identifier = "inconsistencysegment";
+    d9.name = "Harmonic inconsistency segmenter";
+    d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
+    d9.unit = "";
+    d9.hasFixedBinCount = true;
+    d9.binCount = 0;
+    d9.hasKnownExtents = true;
+    d9.minValue = 0.1;
+	d9.maxValue = 0.9;
+    d9.isQuantized = false;
+    d9.sampleType = OutputDescriptor::VariableSampleRate;
+    d9.hasDuration = false;
+    d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d9);
+
+    OutputDescriptor d10;
+    d10.identifier = "localtuning";
+    d10.name = "Local tuning";
+    d10.description = "";
+    d10.unit = "Hz";
+    d10.hasFixedBinCount = true;
+    d10.binCount = 1;
+    d10.hasKnownExtents = true;
+	d10.minValue = 427.47;
+	d10.maxValue = 452.89;
+    d10.isQuantized = false;
+    d10.sampleType = OutputDescriptor::OneSamplePerStep;
+    d10.hasDuration = false;
+    d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
+    list.push_back(d10);
+
+    return list;
+}
+
+
+bool
+NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
+{	
+		if (debug_on) cerr << "--> initialise";
+    if (channels < getMinChannelCount() ||
+	channels > getMaxChannelCount()) return false;
+    m_blockSize = blockSize;
+    m_stepSize = stepSize;
+    frameCount = 0;
+	int tempn = 256 * m_blockSize/2;
+	cerr << tempn;
+	float *tempkernel = new float[tempn];
+	
+	// vector<float> m_kernelValue;
+	// vector<int> m_kernelFftIndex;
+	// vector<int> m_kernelNoteIndex;
+	
+	
+	logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
+
+	for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
+			for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
+				if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
+					m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
+					m_kernelFftIndex.push_back(iFFT);
+					m_kernelNoteIndex.push_back(iNote);				
+				}
+			}
+		}
+	delete tempkernel;
+	// int count = 0;
+	// for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
+	// 	cerr << m_kernelFftIndex[count] << " -- " << m_kernelNoteIndex[count] << " -- " << m_kernelValue[count] << endl;
+	// 	count++;
+	// }
+    return true;
+}
+
+void
+NNLSChroma::reset()
+{
+		if (debug_on) cerr << "--> reset";
+    // Clear buffers, reset stored values, etc
+	    frameCount = 0;
+        m_dictID = 0;
+}
+
+NNLSChroma::FeatureSet
+NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
+{   
+		if (debug_on) cerr << "--> process" << endl;
+	// int nNote = 84; // TODO: this should be globally set and/or depend on the kernel matrix
+    
+	frameCount++;   
+	float *magnitude = new float[m_blockSize/2];
+	
+	Feature f10; // local tuning
+	
+	const float *fbuf = inputBuffers[0];	
+	
+	// make magnitude
+	for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
+		magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] + 
+			fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
+	}
+	
+	
+	// note magnitude mapping using pre-calculated matrix
+	float *nm  = new float[nNote]; // note magnitude
+	for (size_t iNote = 0; iNote < nNote; iNote++) {
+		nm[iNote] = 0; // initialise as 0
+	}
+	int binCount = 0;
+	for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
+		// cerr << ".";
+		nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];	
+		binCount++;	
+	}
+	cerr << nm[20];
+	cerr << endl;
+	
+	
+    float one_over_N = 1.0/frameCount;
+    // update means of complex tuning variables
+    m_meanTuning0 *= float(frameCount-1)*one_over_N;
+    m_meanTuning1 *= float(frameCount-1)*one_over_N;
+    m_meanTuning2 *= float(frameCount-1)*one_over_N;
+	
+    for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
+        m_meanTuning0 += nm[iTone + 0]*one_over_N;
+    	m_meanTuning1 += nm[iTone + 1]*one_over_N;
+    	m_meanTuning2 += nm[iTone + 2]*one_over_N;
+        m_localTuning0 *= 0.99994; m_localTuning0 += nm[iTone + 0];
+        m_localTuning1 *= 0.99994; m_localTuning1 += nm[iTone + 1];
+        m_localTuning2 *= 0.99994; m_localTuning2 += nm[iTone + 2];
+    }
+	
+    // if (m_tuneLocal) {
+    	// local tuning
+        float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
+        float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
+        float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
+        m_localTuning.push_back(normalisedtuning);
+        float tuning440 = 440 * pow(2,normalisedtuning/12);
+        f10.values.push_back(tuning440);
+    // }
+    
+	Feature f1; // logfreqspec
+	f1.hasTimestamp = true;
+    f1.timestamp = timestamp;
+	for (size_t iNote = 0; iNote < nNote; iNote++) {
+		f1.values.push_back(nm[iNote]);
+	}
+	
+	FeatureSet fs;
+	fs[1].push_back(f1);
+    fs[10].push_back(f10);
+
+    // deletes
+    delete[] magnitude;
+    delete[] nm;
+
+    m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
+	return fs;	
+}
+
+NNLSChroma::FeatureSet
+NNLSChroma::getRemainingFeatures()
+{
+		if (debug_on) cerr << "--> getRemainingFeatures" << endl;
+    FeatureSet fsOut;
+	// 
+	// /**  Calculate Tuning
+	// calculate tuning from (using the angle of the complex number defined by the 
+	// cumulative mean real and imag values)
+	// **/
+	// float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
+	//     float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
+	//     float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
+	//     float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
+	//     int intShift = floor(normalisedtuning * 3);
+	//     float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
+	//     
+	//     char buffer0 [50];
+	// 
+	//     sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
+	//     
+	//     // cerr << "normalisedtuning: " << normalisedtuning << '\n';
+	//     
+	//     // push tuning to FeatureSet fsOut
+	// Feature f0; // tuning
+	// f0.hasTimestamp = true;
+	//     f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
+	//     f0.label = buffer0;
+	//     fsOut[0].push_back(f0);  
+	//     
+	//     /** Tune Log-Frequency Spectrogram
+	//     calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 
+	//     perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
+	//     **/    
+	// 
+	//     float tempValue = 0;
+	//     float dbThreshold = 0; // relative to the background spectrum
+	//     float thresh = pow(10,dbThreshold/20);
+	//     // cerr << "tune local ? " << m_tuneLocal << endl;
+	//     int count = 0;
+	// 
+	//     for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
+	//         Feature f1 = *i;
+	//         Feature f2; // tuned log-frequency spectrum
+	//         f2.hasTimestamp = true;
+	//         f2.timestamp = f1.timestamp;
+	//         f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
+	// 
+	//         if (m_tuneLocal) {
+	//             intShift = floor(m_localTuning[count] * 3);
+	//             intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
+	//         }
+	//         
+	//         // cerr << intShift << " " << intFactor << endl;
+	//         
+	//         for (int k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
+	//             tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
+	//             f2.values.push_back(tempValue);
+	//         }
+	//         
+	//         f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
+	//         vector<float> runningmean = SpecialConvolution(f2.values,hw);
+	//         vector<float> runningstd;
+	//         for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
+	//             runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
+	//         }
+	//         runningstd = SpecialConvolution(runningstd,hw); // second step convolve
+	//         for (int i = 0; i < 256; i++) { 
+	//             runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
+	//             if (runningstd[i] > 0) {
+	//                 f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ? 
+	//                     (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
+	//             }
+	//             if (f2.values[i] < 0) {
+	//                 cerr << "ERROR: negative value in logfreq spectrum" << endl;
+	//             }
+	//         }
+	//         fsOut[2].push_back(f2);
+	//         count++;
+	//     }
+	//     
+	//     /** Semitone spectrum and chromagrams
+	//     Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
+	//     is inferred using a non-negative least squares algorithm.
+	//     Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 
+	//     bass and treble stacked onto each other).
+	//     **/
+	//     // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
+	//     
+	//     vector<vector<float> > chordogram;
+	//     vector<float> oldchroma = vector<float>(12,0);
+	//     vector<float> oldbasschroma = vector<float>(12,0);
+	//     count = 0;
+	// 
+	//     for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
+	//         Feature f2 = *it; // logfreq spectrum
+	//         Feature f3; // semitone spectrum
+	//         Feature f4; // treble chromagram
+	//         Feature f5; // bass chromagram
+	//         Feature f6; // treble and bass chromagram
+	// 
+	//         f3.hasTimestamp = true;
+	//         f3.timestamp = f2.timestamp;
+	//         
+	//         f4.hasTimestamp = true;
+	//         f4.timestamp = f2.timestamp;
+	//         
+	//         f5.hasTimestamp = true;
+	//         f5.timestamp = f2.timestamp;
+	//         
+	//         f6.hasTimestamp = true;
+	//         f6.timestamp = f2.timestamp;
+	//         
+	// 	float b[256];
+	// 
+	//         bool some_b_greater_zero = false;
+	//         for (int i = 0; i < 256; i++) {
+	//             b[i] = f2.values[i];
+	//             if (b[i] > 0) {
+	//                 some_b_greater_zero = true;
+	//             }            
+	//         }
+	//     
+	//         // here's where the non-negative least squares algorithm calculates the note activation x
+	// 
+	//         vector<float> chroma = vector<float>(12, 0);
+	//         vector<float> basschroma = vector<float>(12, 0);
+	//         if (some_b_greater_zero) {
+	//         }
+	// 
+	//         f4.values = chroma;
+	//         f5.values = basschroma;
+	//         chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 
+	//         f6.values = chroma; 
+	//         
+	//         // local chord estimation
+	//         vector<float> currentChordSalience;
+	//         float tempchordvalue = 0;
+	//         float sumchordvalue = 0;
+	//         int nChord = nChorddict / 24;
+	//         for (int iChord = 0; iChord < nChord; iChord++) {
+	//             tempchordvalue = 0;
+	//             for (int iBin = 0; iBin < 12; iBin++) {
+	//                 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
+	//             }
+	//             for (int iBin = 12; iBin < 24; iBin++) {
+	//                 tempchordvalue += chorddict[24 * iChord + iBin] * chroma[iBin];
+	//             }
+	//             sumchordvalue+=tempchordvalue;
+	//             currentChordSalience.push_back(tempchordvalue);
+	//         }
+	//         for (int iChord = 0; iChord < nChord; iChord++) {
+	//             currentChordSalience[iChord] /= sumchordvalue;
+	//         }
+	//         chordogram.push_back(currentChordSalience);
+	//         
+	//         fsOut[3].push_back(f3);
+	//         fsOut[4].push_back(f4);
+	//         fsOut[5].push_back(f5);
+	//         fsOut[6].push_back(f6);
+	// // if (x) free(x);
+	//         // delete[] b;
+	//         count++;
+	//     }
+	//     // cerr << m_stepSize << endl<< endl;
+	//     count = 0;
+	//     int kernelwidth = (49 * 2048) / m_stepSize;
+	//     int nChord = nChorddict / 24;
+	//     int musicitykernelwidth = (50 * 2048) / m_stepSize;
+	//     
+	//     /* Simple chord estimation
+	//     I just take the local chord estimates ("currentChordSalience") and average them over time, then
+	//     take the maximum. Very simple, don't do this at home...
+	//     */
+	//     vector<int> chordSequence;
+	//     for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+	//     
+	//         int startIndex = max(count - kernelwidth/2 + 1,0);
+	//         int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1 + 1);
+	//         vector<float> temp = vector<float>(nChord,0);
+	//         for (int iChord = 0; iChord < nChord; iChord++) {
+	//             float val = 0;
+	//             for (int i = startIndex; i < endIndex; i++) {
+	//                 val += chordogram[i][iChord] * 
+	//                     (kernelwidth - abs(i - startIndex - kernelwidth * 0.5)); // weigthed sum (triangular window)
+	//             }
+	//             temp[iChord] = val; // sum
+	//         }
+	//         
+	//         // get maximum for "chord estimate"
+	//         
+	//         float bestChordValue = 0;
+	//         int bestChordIndex = nChord-1; // "no chord" is default
+	//         for (int iChord = 0; iChord < nChord; iChord++) {
+	//             if (temp[iChord] > bestChordValue) {
+	//                 bestChordValue = temp[iChord];
+	//                 bestChordIndex = iChord;
+	//             }
+	//         }
+	//         // cerr << bestChordIndex << endl;
+	//         chordSequence.push_back(bestChordIndex);
+	//         count++;
+	//     }
+	//     // mode filter on chordSequence
+	//     count = 0;
+	//     int oldChordIndex = -1;
+	//     for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
+	//         Feature f6 = *it;
+	//         Feature f7; // chord estimate
+	//         
+	//         f7.hasTimestamp = true;
+	//         f7.timestamp = f6.timestamp;
+	//         
+	//         vector<int> chordCount = vector<int>(121,0);
+	//         
+	//         int maxChordCount = 0;
+	//         int maxChordIndex = 120;
+	//         int startIndex = max(count - kernelwidth/2,0);
+	//         int endIndex = min(int(chordogram.size()), startIndex + kernelwidth - 1);
+	//         for (int i = startIndex; i < endIndex; i++) {
+	//             chordCount[chordSequence[i]]++;
+	//             if (chordCount[chordSequence[i]] > maxChordCount) {
+	//                 maxChordCount++;
+	//                 maxChordIndex = chordSequence[i];
+	//             }
+	//         }
+	//         if (oldChordIndex != maxChordIndex) {
+	//             oldChordIndex = maxChordIndex;
+	// 
+	//             char buffer1 [50];
+	//             if (maxChordIndex < nChord - 1) {
+	//                 sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
+	//             } else {
+	//                 sprintf(buffer1, "N");
+	//             }
+	//             f7.label = buffer1;
+	//             fsOut[7].push_back(f7);
+	//         }
+	//         count++;
+	//     }
+	//     // musicity
+	//     count = 0;
+	//     int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
+	//     vector<float> musicityValue; 
+	//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
+	//         Feature f4 = *it;
+	//         
+	//         int startIndex = max(count - musicitykernelwidth/2,0);
+	//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
+	//         float chromasum = 0;
+	//         float diffsum = 0;
+	//         for (int k = 0; k < 12; k++) {
+	//             for (int i = startIndex + 1; i < endIndex; i++) {
+	//                 chromasum += pow(fsOut[4][i].values[k],2);
+	//                 diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
+	//             }
+	//         }
+	//         diffsum /= chromasum;
+	//         musicityValue.push_back(diffsum);        
+	//         count++;
+	//     }
+	//     
+	//     float musicityThreshold = 0.44;
+	//     if (m_stepSize == 4096) {
+	//         musicityThreshold = 0.74;
+	//     }
+	//     if (m_stepSize == 4410) {
+	//         musicityThreshold = 0.77;
+	//     }
+	//     
+	//     count = 0;
+	//     for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
+	//         Feature f4 = *it;
+	//         Feature f8; // musicity
+	//         Feature f9; // musicity segmenter
+	//         
+	//         f8.hasTimestamp = true;
+	//         f8.timestamp = f4.timestamp;
+	//         f9.hasTimestamp = true;
+	//         f9.timestamp = f4.timestamp;    
+	//         
+	//         int startIndex = max(count - musicitykernelwidth/2,0);
+	//         int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
+	//         int musicityCount = 0;
+	//         for (int i = startIndex; i <= endIndex; i++) {
+	//             if (musicityValue[i] > musicityThreshold) musicityCount++;
+	//         }
+	//         bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1); 
+	//         
+	//         if (isSpeech) {
+	//             if (oldlabeltype != 2) {
+	//                 f9.label = "Speech";
+	//                 fsOut[9].push_back(f9);
+	//                 oldlabeltype = 2;
+	//             }
+	//         } else {
+	//             if (oldlabeltype != 1) {
+	//                 f9.label = "Music";
+	//                 fsOut[9].push_back(f9);
+	//                 oldlabeltype = 1;
+	//             }
+	//         }
+	//         f8.values.push_back(musicityValue[count]);
+	//         fsOut[8].push_back(f8);
+	//         count++;
+	//      }
+     return fsOut;     
+
+}
+
author	matthiasm
date	Tue, 18 May 2010 09:29:39 +0000
parents
children	2a491d71057d