annotate NNLSChroma.cpp @ 9:0f270f3d2131 matthiasm-plugin

working with new optional chord dictionary in the file chord.dict
author matthiasm
date Thu, 03 Jun 2010 09:10:40 +0000
parents 84db8ce38fd3
children a02d56ac1464
rev   line source
matthiasm@0 1
matthiasm@0 2 #include "NNLSChroma.h"
matthiasm@0 3 #include <cmath>
matthiasm@0 4 #include <list>
matthiasm@0 5 #include <iostream>
matthiasm@3 6 #include <fstream>
matthiasm@0 7 #include <sstream>
matthiasm@0 8 #include <cassert>
matthiasm@7 9 #include <cstdlib>
matthiasm@0 10 #include <cstdio>
matthiasm@7 11 #include <boost/tokenizer.hpp>
matthiasm@7 12 #include <boost/iostreams/device/file.hpp>
matthiasm@7 13 #include <boost/iostreams/stream.hpp>
matthiasm@7 14 #include <boost/lexical_cast.hpp>
matthiasm@1 15 #include "nnls.h"
matthiasm@0 16 #include "chorddict.cpp"
matthiasm@9 17
matthiasm@9 18 #include <omp.h>
matthiasm@9 19 #define N 1000
matthiasm@9 20 #define CHUNKSIZE 100
matthiasm@9 21
matthiasm@9 22
matthiasm@0 23 using namespace std;
matthiasm@7 24 using namespace boost;
matthiasm@0 25
matthiasm@0 26 const float sinvalue = 0.866025404;
matthiasm@0 27 const float cosvalue = -0.5;
matthiasm@0 28 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0 29 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0 30 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0 31 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0 32 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@7 33
matthiasm@7 34 const char* bassnames[12][12] ={
matthiasm@7 35 {"A","","B","C","C#","D","","E","","F#","G","G#"},
matthiasm@7 36 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
matthiasm@7 37 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
matthiasm@7 38 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
matthiasm@7 39 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
matthiasm@7 40 {"D","","E","F","F#","G","","A","","B","C","C#"},
matthiasm@7 41 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
matthiasm@7 42 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
matthiasm@7 43 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
matthiasm@7 44 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
matthiasm@7 45 {"G","","A","Bb","B","C","","D","","E","F","F#"},
matthiasm@7 46 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
matthiasm@7 47 };
matthiasm@0 48 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 49 const int nNote = 256;
matthiasm@0 50
matthiasm@0 51 /** Special Convolution
matthiasm@0 52 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0 53 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0 54 as the first (last) valid convolution bin.
matthiasm@0 55 **/
matthiasm@0 56
matthiasm@0 57 const bool debug_on = false;
matthiasm@0 58
matthiasm@0 59 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0 60 {
matthiasm@0 61 float s;
matthiasm@0 62 int m, n;
matthiasm@0 63 int lenConvolvee = convolvee.size();
matthiasm@0 64 int lenKernel = kernel.size();
matthiasm@0 65
matthiasm@0 66 vector<float> Z(256,0);
matthiasm@0 67 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0 68
matthiasm@0 69 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0 70 s=0.0;
matthiasm@0 71 for (m = 0; m < lenKernel; m++) {
matthiasm@0 72 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0 73 s += convolvee[n-m] * kernel[m];
matthiasm@0 74 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0 75 }
matthiasm@0 76 // cerr << n - lenKernel/2 << endl;
matthiasm@0 77 Z[n -lenKernel/2] = s;
matthiasm@0 78 }
matthiasm@0 79
matthiasm@0 80 // fill upper and lower pads
matthiasm@0 81 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0 82 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0 83 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0 84 return Z;
matthiasm@0 85 }
matthiasm@0 86
matthiasm@0 87 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0 88 // {
matthiasm@0 89 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0 90 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0 91 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0 92 // }
matthiasm@0 93 // return freq;
matthiasm@0 94 // }
matthiasm@0 95
matthiasm@0 96 float cospuls(float x, float centre, float width)
matthiasm@0 97 {
matthiasm@0 98 float recipwidth = 1.0/width;
matthiasm@0 99 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0 100 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
matthiasm@0 101 }
matthiasm@0 102 return 0.0;
matthiasm@0 103 }
matthiasm@0 104
matthiasm@0 105 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0 106 {
matthiasm@0 107 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0 108 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0 109 // now scale to correct for note density
matthiasm@0 110 float c = log(2.0)/binsperoctave;
matthiasm@0 111 if (x > 0) {
matthiasm@0 112 out = out / (c * x);
matthiasm@0 113 } else {
matthiasm@0 114 out = 0;
matthiasm@0 115 }
matthiasm@0 116 return out;
matthiasm@0 117 }
matthiasm@0 118
matthiasm@0 119 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0 120
matthiasm@0 121 int binspersemitone = 3; // this must be 3
matthiasm@0 122 int minoctave = 0; // this must be 0
matthiasm@0 123 int maxoctave = 7; // this must be 7
matthiasm@1 124 int oversampling = 80;
matthiasm@0 125
matthiasm@0 126 // linear frequency vector
matthiasm@0 127 vector<float> fft_f;
matthiasm@0 128 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0 129 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0 130 }
matthiasm@0 131 float fft_width = fs * 2.0 / blocksize;
matthiasm@0 132
matthiasm@0 133 // linear oversampled frequency vector
matthiasm@0 134 vector<float> oversampled_f;
matthiasm@0 135 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0 136 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0 137 }
matthiasm@0 138
matthiasm@0 139 // pitch-spaced frequency vector
matthiasm@0 140 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0 141 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0 142 vector<float> cq_f;
matthiasm@0 143 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0 144 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0 145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0 146 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0 147 for (int k = -1; k < 2; ++k) {
matthiasm@0 148 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0 149 }
matthiasm@0 150 }
matthiasm@0 151 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0 152 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0 153
matthiasm@0 154 int nFFT = fft_f.size();
matthiasm@0 155
matthiasm@0 156 vector<float> fft_activation;
matthiasm@0 157 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0 158 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0 159 fft_activation.push_back(cosp);
matthiasm@0 160 // cerr << cosp << endl;
matthiasm@0 161 }
matthiasm@0 162
matthiasm@0 163 float cq_activation;
matthiasm@0 164 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0 165 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0 166 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0 167 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0 168 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0 169 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0 170 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1 171 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0 172 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0 173 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0 174 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0 175 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0 176 }
matthiasm@0 177 // if (iCQ == 1 || iCQ == 2) {
matthiasm@0 178 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0 179 // }
matthiasm@0 180 }
matthiasm@0 181 }
matthiasm@0 182 }
matthiasm@0 183 return true;
matthiasm@0 184 }
matthiasm@0 185
matthiasm@3 186 bool dictionaryMatrix(float* dm) {
matthiasm@1 187 int binspersemitone = 3; // this must be 3
matthiasm@1 188 int minoctave = 0; // this must be 0
matthiasm@1 189 int maxoctave = 7; // this must be 7
matthiasm@4 190 float s_param = 0.7;
matthiasm@1 191
matthiasm@1 192 // pitch-spaced frequency vector
matthiasm@1 193 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1 194 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1 195 vector<float> cq_f;
matthiasm@1 196 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1 197 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1 198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1 199 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1 200 for (int k = -1; k < 2; ++k) {
matthiasm@1 201 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1 202 }
matthiasm@1 203 }
matthiasm@1 204 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1 205 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1 206
matthiasm@1 207 float curr_f;
matthiasm@1 208 float floatbin;
matthiasm@1 209 float curr_amp;
matthiasm@1 210 // now for every combination calculate the matrix element
matthiasm@1 211 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@3 212 // cerr << iOut << endl;
matthiasm@1 213 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1 214 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
matthiasm@3 215 // if (curr_f > cq_f[nNote-1]) break;
matthiasm@3 216 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@3 217 // cerr << floatbin << endl;
matthiasm@1 218 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@3 219 // cerr << "curramp" << curr_amp << endl;
matthiasm@1 220 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@3 221 if (abs(iNote+1.0-floatbin)<2) {
matthiasm@3 222 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
matthiasm@3 223 // dm[iNote + nNote * iOut] += 1 * curr_amp;
matthiasm@3 224 }
matthiasm@1 225 }
matthiasm@3 226 }
matthiasm@1 227 }
matthiasm@3 228
matthiasm@3 229
matthiasm@1 230 }
matthiasm@1 231
matthiasm@7 232 string get_env_var( std::string const & key ) {
matthiasm@7 233 char * val;
matthiasm@7 234 val = getenv( key.c_str() );
matthiasm@7 235 string retval;
matthiasm@7 236 if (val != NULL) {
matthiasm@7 237 retval = val;
matthiasm@7 238 }
matthiasm@7 239 return retval;
matthiasm@7 240 }
matthiasm@7 241
matthiasm@7 242
matthiasm@9 243 vector<string> chordDictionary(vector<float> *mchorddict) {
matthiasm@7 244 // ifstream chordDictFile;
matthiasm@7 245 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
matthiasm@7 246 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
matthiasm@7 247 typedef tokenizer<char_separator<char> > Tok;
matthiasm@7 248 // char_separator<char> sep; // default constructed
matthiasm@7 249 char_separator<char> sep(",; ",":");
matthiasm@7 250 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
matthiasm@7 251 string line;
matthiasm@7 252 int iElement = 0;
matthiasm@7 253 int nChord = 0;
matthiasm@7 254
matthiasm@7 255 vector<string> loadedChordNames;
matthiasm@7 256 vector<float> loadedChordDict;
matthiasm@7 257 if (chordDictFile.is_open()) {
matthiasm@7 258 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
matthiasm@7 259 // first, get the chord definition
matthiasm@7 260 string chordType;
matthiasm@7 261 vector<float> tempPCVector;
matthiasm@7 262 // cerr << line << endl;
matthiasm@7 263 if (!line.empty() && line.substr(0,1) != "#") {
matthiasm@7 264 Tok tok(line, sep);
matthiasm@7 265 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
matthiasm@7 266 string tempString = *tok_iter;
matthiasm@7 267 // cerr << tempString << endl;
matthiasm@7 268 if (tok_iter == tok.begin()) { // either the chord name or a colon
matthiasm@7 269 if (tempString == ":") {
matthiasm@7 270 chordType = "";
matthiasm@7 271 } else {
matthiasm@7 272 chordType = tempString;
matthiasm@7 273 tok_iter++; // is this cheating ? :)
matthiasm@7 274 }
matthiasm@7 275 } else {
matthiasm@7 276 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
matthiasm@7 277 }
matthiasm@7 278 }
matthiasm@7 279
matthiasm@7 280 // now make all 12 chords of every type
matthiasm@7 281 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
matthiasm@7 282 // add bass slash notation
matthiasm@7 283 string slashNotation = "";
matthiasm@7 284 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
matthiasm@7 285 if (tempPCVector[(kSemitone) % 12] > 0.99) {
matthiasm@7 286 slashNotation = bassnames[iSemitone][kSemitone];
matthiasm@7 287 }
matthiasm@7 288 }
matthiasm@7 289 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
matthiasm@9 290 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
matthiasm@9 291 float bassValue = 0;
matthiasm@9 292 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
matthiasm@9 293 bassValue = 1;
matthiasm@9 294 } else {
matthiasm@9 295 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.2;
matthiasm@9 296 }
matthiasm@9 297 loadedChordDict.push_back(bassValue);
matthiasm@7 298 }
matthiasm@7 299 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
matthiasm@7 300 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
matthiasm@7 301 }
matthiasm@7 302 ostringstream os;
matthiasm@7 303 if (slashNotation.empty()) {
matthiasm@7 304 os << notenames[12+iSemitone] << chordType;
matthiasm@7 305 } else {
matthiasm@7 306 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
matthiasm@7 307 }
matthiasm@7 308
matthiasm@7 309 loadedChordNames.push_back(os.str());
matthiasm@7 310 }
matthiasm@7 311 }
matthiasm@7 312 }
matthiasm@7 313 // N type
matthiasm@7 314 loadedChordNames.push_back("N");
matthiasm@7 315 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
matthiasm@7 316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
matthiasm@7 317
matthiasm@7 318 // normalise
matthiasm@7 319 float sum = 0;
matthiasm@7 320 for (int i = 0; i < loadedChordDict.size(); i++) {
matthiasm@7 321 sum += pow(loadedChordDict[i],2);
matthiasm@7 322 if (i % 24 == 23) {
matthiasm@7 323 float invertedsum = 1.0/sqrt(sum);
matthiasm@7 324 for (int k = 0; k < 24; k++) {
matthiasm@7 325 loadedChordDict[i-k] *= invertedsum;
matthiasm@7 326 }
matthiasm@7 327 sum = 0;
matthiasm@7 328 }
matthiasm@7 329
matthiasm@7 330 }
matthiasm@7 331
matthiasm@7 332
matthiasm@7 333 nChord = 0;
matthiasm@7 334 for (int i = 0; i < loadedChordNames.size(); i++) {
matthiasm@7 335 nChord++;
matthiasm@7 336 }
matthiasm@7 337 chordDictFile.close();
matthiasm@7 338
matthiasm@7 339
matthiasm@9 340 // mchorddict = new float[nChord*24];
matthiasm@7 341 for (int i = 0; i < nChord*24; i++) {
matthiasm@9 342 mchorddict->push_back(loadedChordDict[i]);
matthiasm@7 343 }
matthiasm@9 344
matthiasm@7 345 } else {// use default from chorddict.cpp
matthiasm@9 346 // mchorddict = new float[nChorddict];
matthiasm@7 347 for (int i = 0; i < nChorddict; i++) {
matthiasm@9 348 mchorddict->push_back(chorddict[i]);
matthiasm@7 349 }
matthiasm@7 350
matthiasm@7 351 nChord = nChorddict/24;
matthiasm@7 352 // mchordnames = new string[nChorddict/24];
matthiasm@7 353 char buffer1 [50];
matthiasm@7 354 for (int i = 0; i < nChorddict/24; i++) {
matthiasm@7 355 if (i < nChorddict/24 - 1) {
matthiasm@7 356 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
matthiasm@7 357 } else {
matthiasm@7 358 sprintf(buffer1, "N");
matthiasm@7 359 }
matthiasm@7 360 ostringstream os;
matthiasm@7 361 os << buffer1;
matthiasm@9 362 loadedChordNames.push_back(os.str());
matthiasm@9 363
matthiasm@7 364 }
matthiasm@7 365
matthiasm@7 366 }
matthiasm@9 367 // cerr << "before leaving" << chordnames[1] << endl;
matthiasm@9 368 return loadedChordNames;
matthiasm@7 369 }
matthiasm@0 370
matthiasm@0 371 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0 372 Plugin(inputSampleRate),
matthiasm@0 373 m_fl(0),
matthiasm@0 374 m_blockSize(0),
matthiasm@0 375 m_stepSize(0),
matthiasm@0 376 m_lengthOfNoteIndex(0),
matthiasm@0 377 m_meanTuning0(0),
matthiasm@0 378 m_meanTuning1(0),
matthiasm@0 379 m_meanTuning2(0),
matthiasm@0 380 m_localTuning0(0),
matthiasm@0 381 m_localTuning1(0),
matthiasm@0 382 m_localTuning2(0),
matthiasm@4 383 m_paling(1.0),
matthiasm@3 384 m_preset(0.0),
matthiasm@0 385 m_localTuning(0),
matthiasm@0 386 m_kernelValue(0),
matthiasm@0 387 m_kernelFftIndex(0),
matthiasm@0 388 m_kernelNoteIndex(0),
matthiasm@1 389 m_dict(0),
matthiasm@0 390 m_tuneLocal(false),
matthiasm@7 391 m_dictID(0),
matthiasm@7 392 m_chorddict(0),
matthiasm@7 393 m_chordnames(0)
matthiasm@0 394 {
matthiasm@0 395 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@7 396
matthiasm@7 397 // make the *note* dictionary matrix
matthiasm@3 398 m_dict = new float[nNote * 84];
matthiasm@3 399 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
matthiasm@1 400 dictionaryMatrix(m_dict);
matthiasm@7 401
matthiasm@7 402 // get the *chord* dictionary from file (if the file exists)
matthiasm@9 403 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 404 }
matthiasm@0 405
matthiasm@0 406
matthiasm@0 407 NNLSChroma::~NNLSChroma()
matthiasm@0 408 {
matthiasm@0 409 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1 410 delete [] m_dict;
matthiasm@9 411 // delete [] m_chorddict;
matthiasm@7 412 // delete m_chordnames;
matthiasm@0 413 }
matthiasm@0 414
matthiasm@0 415 string
matthiasm@0 416 NNLSChroma::getIdentifier() const
matthiasm@0 417 {
matthiasm@0 418 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 419 return "nnls_chroma";
matthiasm@0 420 }
matthiasm@0 421
matthiasm@0 422 string
matthiasm@0 423 NNLSChroma::getName() const
matthiasm@0 424 {
matthiasm@0 425 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 426 return "NNLS Chroma";
matthiasm@0 427 }
matthiasm@0 428
matthiasm@0 429 string
matthiasm@0 430 NNLSChroma::getDescription() const
matthiasm@0 431 {
matthiasm@0 432 // Return something helpful here!
matthiasm@0 433 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@4 434 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
matthiasm@0 435 }
matthiasm@0 436
matthiasm@0 437 string
matthiasm@0 438 NNLSChroma::getMaker() const
matthiasm@0 439 {
matthiasm@0 440 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 441 // Your name here
matthiasm@0 442 return "Matthias Mauch";
matthiasm@0 443 }
matthiasm@0 444
matthiasm@0 445 int
matthiasm@0 446 NNLSChroma::getPluginVersion() const
matthiasm@0 447 {
matthiasm@0 448 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 449 // Increment this each time you release a version that behaves
matthiasm@0 450 // differently from the previous one
matthiasm@0 451 return 1;
matthiasm@0 452 }
matthiasm@0 453
matthiasm@0 454 string
matthiasm@0 455 NNLSChroma::getCopyright() const
matthiasm@0 456 {
matthiasm@0 457 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 458 // This function is not ideally named. It does not necessarily
matthiasm@0 459 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 460 // should indicate the terms under which it is distributed. For
matthiasm@0 461 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 462 return "Copyright (2010). All rights reserved.";
matthiasm@0 463 }
matthiasm@0 464
matthiasm@0 465 NNLSChroma::InputDomain
matthiasm@0 466 NNLSChroma::getInputDomain() const
matthiasm@0 467 {
matthiasm@0 468 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 469 return FrequencyDomain;
matthiasm@0 470 }
matthiasm@0 471
matthiasm@0 472 size_t
matthiasm@0 473 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 474 {
matthiasm@0 475 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 476 return 16384; // 0 means "I can handle any block size"
matthiasm@0 477 }
matthiasm@0 478
matthiasm@0 479 size_t
matthiasm@0 480 NNLSChroma::getPreferredStepSize() const
matthiasm@0 481 {
matthiasm@0 482 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 483 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0 484 // means the same as the block size for TimeDomain
matthiasm@0 485 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 486 }
matthiasm@0 487
matthiasm@0 488 size_t
matthiasm@0 489 NNLSChroma::getMinChannelCount() const
matthiasm@0 490 {
matthiasm@0 491 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 492 return 1;
matthiasm@0 493 }
matthiasm@0 494
matthiasm@0 495 size_t
matthiasm@0 496 NNLSChroma::getMaxChannelCount() const
matthiasm@0 497 {
matthiasm@0 498 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 499 return 1;
matthiasm@0 500 }
matthiasm@0 501
matthiasm@0 502 NNLSChroma::ParameterList
matthiasm@0 503 NNLSChroma::getParameterDescriptors() const
matthiasm@0 504 {
matthiasm@0 505 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 506 ParameterList list;
matthiasm@0 507
matthiasm@3 508 ParameterDescriptor d3;
matthiasm@3 509 d3.identifier = "preset";
matthiasm@3 510 d3.name = "preset";
matthiasm@3 511 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 512 d3.unit = "";
matthiasm@3 513 d3.isQuantized = true;
matthiasm@3 514 d3.quantizeStep = 1;
matthiasm@3 515 d3.minValue = 0.0;
matthiasm@4 516 d3.maxValue = 3.0;
matthiasm@3 517 d3.defaultValue = 0.0;
matthiasm@3 518 d3.valueNames.push_back("polyphonic pop");
matthiasm@3 519 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 520 d3.valueNames.push_back("solo keyboard");
matthiasm@3 521 d3.valueNames.push_back("manual");
matthiasm@3 522 list.push_back(d3);
matthiasm@4 523
matthiasm@4 524 // ParameterDescriptor d0;
matthiasm@4 525 // d0.identifier = "notedict";
matthiasm@4 526 // d0.name = "note dictionary";
matthiasm@4 527 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 528 // d0.unit = "";
matthiasm@4 529 // d0.minValue = 0;
matthiasm@4 530 // d0.maxValue = 1;
matthiasm@4 531 // d0.defaultValue = 0;
matthiasm@4 532 // d0.isQuantized = true;
matthiasm@4 533 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 534 // d0.valueNames.push_back("no NNLS");
matthiasm@4 535 // d0.quantizeStep = 1.0;
matthiasm@4 536 // list.push_back(d0);
matthiasm@4 537
matthiasm@4 538 ParameterDescriptor d1;
matthiasm@4 539 d1.identifier = "tuningmode";
matthiasm@4 540 d1.name = "tuning mode";
matthiasm@4 541 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 542 d1.unit = "";
matthiasm@4 543 d1.minValue = 0;
matthiasm@4 544 d1.maxValue = 1;
matthiasm@4 545 d1.defaultValue = 0;
matthiasm@4 546 d1.isQuantized = true;
matthiasm@4 547 d1.valueNames.push_back("global tuning");
matthiasm@4 548 d1.valueNames.push_back("local tuning");
matthiasm@4 549 d1.quantizeStep = 1.0;
matthiasm@4 550 list.push_back(d1);
matthiasm@4 551
matthiasm@4 552 // ParameterDescriptor d2;
matthiasm@4 553 // d2.identifier = "paling";
matthiasm@4 554 // d2.name = "spectral paling";
matthiasm@4 555 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@4 556 // d2.unit = "";
matthiasm@4 557 // d2.isQuantized = true;
matthiasm@4 558 // // d2.quantizeStep = 0.1;
matthiasm@4 559 // d2.minValue = 0.0;
matthiasm@4 560 // d2.maxValue = 1.0;
matthiasm@4 561 // d2.defaultValue = 1.0;
matthiasm@4 562 // d2.isQuantized = false;
matthiasm@4 563 // list.push_back(d2);
matthiasm@4 564
matthiasm@0 565 return list;
matthiasm@0 566 }
matthiasm@0 567
matthiasm@0 568 float
matthiasm@0 569 NNLSChroma::getParameter(string identifier) const
matthiasm@0 570 {
matthiasm@3 571 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 572 if (identifier == "notedict") {
matthiasm@0 573 return m_dictID;
matthiasm@0 574 }
matthiasm@0 575
matthiasm@0 576 if (identifier == "paling") {
matthiasm@0 577 return m_paling;
matthiasm@0 578 }
matthiasm@0 579
matthiasm@0 580 if (identifier == "tuningmode") {
matthiasm@0 581 if (m_tuneLocal) {
matthiasm@0 582 return 1.0;
matthiasm@0 583 } else {
matthiasm@0 584 return 0.0;
matthiasm@0 585 }
matthiasm@0 586 }
matthiasm@3 587 if (identifier == "preset") {
matthiasm@3 588 return m_preset;
matthiasm@3 589 }
matthiasm@0 590 return 0;
matthiasm@0 591
matthiasm@0 592 }
matthiasm@0 593
matthiasm@0 594 void
matthiasm@0 595 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 596 {
matthiasm@3 597 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 598 if (identifier == "notedict") {
matthiasm@0 599 m_dictID = (int) value;
matthiasm@0 600 }
matthiasm@0 601
matthiasm@0 602 if (identifier == "paling") {
matthiasm@0 603 m_paling = value;
matthiasm@0 604 }
matthiasm@0 605
matthiasm@0 606 if (identifier == "tuningmode") {
matthiasm@0 607 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 608 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 609 }
matthiasm@3 610 if (identifier == "preset") {
matthiasm@3 611 m_preset = value;
matthiasm@3 612 if (m_preset == 0.0) {
matthiasm@3 613 m_tuneLocal = false;
matthiasm@3 614 m_paling = 1.0;
matthiasm@3 615 m_dictID = 0.0;
matthiasm@3 616 }
matthiasm@3 617 if (m_preset == 1.0) {
matthiasm@3 618 m_tuneLocal = false;
matthiasm@3 619 m_paling = 1.0;
matthiasm@3 620 m_dictID = 1.0;
matthiasm@3 621 }
matthiasm@3 622 if (m_preset == 2.0) {
matthiasm@3 623 m_tuneLocal = false;
matthiasm@3 624 m_paling = 0.7;
matthiasm@3 625 m_dictID = 0.0;
matthiasm@3 626 }
matthiasm@3 627 }
matthiasm@0 628 }
matthiasm@0 629
matthiasm@0 630 NNLSChroma::ProgramList
matthiasm@0 631 NNLSChroma::getPrograms() const
matthiasm@0 632 {
matthiasm@0 633 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 634 ProgramList list;
matthiasm@0 635
matthiasm@0 636 // If you have no programs, return an empty list (or simply don't
matthiasm@0 637 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 638
matthiasm@0 639 return list;
matthiasm@0 640 }
matthiasm@0 641
matthiasm@0 642 string
matthiasm@0 643 NNLSChroma::getCurrentProgram() const
matthiasm@0 644 {
matthiasm@0 645 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 646 return ""; // no programs
matthiasm@0 647 }
matthiasm@0 648
matthiasm@0 649 void
matthiasm@0 650 NNLSChroma::selectProgram(string name)
matthiasm@0 651 {
matthiasm@0 652 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 653 }
matthiasm@0 654
matthiasm@0 655
matthiasm@0 656 NNLSChroma::OutputList
matthiasm@0 657 NNLSChroma::getOutputDescriptors() const
matthiasm@0 658 {
matthiasm@0 659 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 660 OutputList list;
matthiasm@0 661
matthiasm@0 662 // Make chroma names for the binNames property
matthiasm@0 663 vector<string> chromanames;
matthiasm@0 664 vector<string> bothchromanames;
matthiasm@0 665 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 666 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 667 if (iNote < 12) {
matthiasm@0 668 chromanames.push_back(notenames[iNote]);
matthiasm@0 669 }
matthiasm@0 670 }
matthiasm@0 671
matthiasm@1 672 // int nNote = 84;
matthiasm@0 673
matthiasm@0 674 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 675 // Every plugin must have at least one output.
matthiasm@0 676
matthiasm@0 677 OutputDescriptor d0;
matthiasm@0 678 d0.identifier = "tuning";
matthiasm@0 679 d0.name = "Tuning";
matthiasm@0 680 d0.description = "The concert pitch.";
matthiasm@0 681 d0.unit = "Hz";
matthiasm@0 682 d0.hasFixedBinCount = true;
matthiasm@0 683 d0.binCount = 0;
matthiasm@0 684 d0.hasKnownExtents = true;
matthiasm@0 685 d0.minValue = 427.47;
matthiasm@0 686 d0.maxValue = 452.89;
matthiasm@0 687 d0.isQuantized = false;
matthiasm@0 688 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 689 d0.hasDuration = false;
matthiasm@0 690 list.push_back(d0);
matthiasm@0 691
matthiasm@0 692 OutputDescriptor d1;
matthiasm@0 693 d1.identifier = "logfreqspec";
matthiasm@0 694 d1.name = "Log-Frequency Spectrum";
matthiasm@0 695 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 696 d1.unit = "";
matthiasm@0 697 d1.hasFixedBinCount = true;
matthiasm@0 698 d1.binCount = nNote;
matthiasm@0 699 d1.hasKnownExtents = false;
matthiasm@0 700 d1.isQuantized = false;
matthiasm@0 701 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 702 d1.hasDuration = false;
matthiasm@0 703 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 704 list.push_back(d1);
matthiasm@0 705
matthiasm@0 706 OutputDescriptor d2;
matthiasm@0 707 d2.identifier = "tunedlogfreqspec";
matthiasm@0 708 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 709 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 710 d2.unit = "";
matthiasm@0 711 d2.hasFixedBinCount = true;
matthiasm@0 712 d2.binCount = 256;
matthiasm@0 713 d2.hasKnownExtents = false;
matthiasm@0 714 d2.isQuantized = false;
matthiasm@0 715 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 716 d2.hasDuration = false;
matthiasm@0 717 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 718 list.push_back(d2);
matthiasm@0 719
matthiasm@0 720 OutputDescriptor d3;
matthiasm@0 721 d3.identifier = "semitonespectrum";
matthiasm@0 722 d3.name = "Semitone Spectrum";
matthiasm@0 723 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 724 d3.unit = "";
matthiasm@0 725 d3.hasFixedBinCount = true;
matthiasm@0 726 d3.binCount = 84;
matthiasm@0 727 d3.hasKnownExtents = false;
matthiasm@0 728 d3.isQuantized = false;
matthiasm@0 729 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 730 d3.hasDuration = false;
matthiasm@0 731 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 732 list.push_back(d3);
matthiasm@0 733
matthiasm@0 734 OutputDescriptor d4;
matthiasm@0 735 d4.identifier = "chroma";
matthiasm@0 736 d4.name = "Chromagram";
matthiasm@0 737 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 738 d4.unit = "";
matthiasm@0 739 d4.hasFixedBinCount = true;
matthiasm@0 740 d4.binCount = 12;
matthiasm@0 741 d4.binNames = chromanames;
matthiasm@0 742 d4.hasKnownExtents = false;
matthiasm@0 743 d4.isQuantized = false;
matthiasm@0 744 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 745 d4.hasDuration = false;
matthiasm@0 746 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 747 list.push_back(d4);
matthiasm@0 748
matthiasm@0 749 OutputDescriptor d5;
matthiasm@0 750 d5.identifier = "basschroma";
matthiasm@0 751 d5.name = "Bass Chromagram";
matthiasm@0 752 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 753 d5.unit = "";
matthiasm@0 754 d5.hasFixedBinCount = true;
matthiasm@0 755 d5.binCount = 12;
matthiasm@0 756 d5.binNames = chromanames;
matthiasm@0 757 d5.hasKnownExtents = false;
matthiasm@0 758 d5.isQuantized = false;
matthiasm@0 759 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 760 d5.hasDuration = false;
matthiasm@0 761 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 762 list.push_back(d5);
matthiasm@0 763
matthiasm@0 764 OutputDescriptor d6;
matthiasm@0 765 d6.identifier = "bothchroma";
matthiasm@0 766 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 767 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 768 d6.unit = "";
matthiasm@0 769 d6.hasFixedBinCount = true;
matthiasm@0 770 d6.binCount = 24;
matthiasm@0 771 d6.binNames = bothchromanames;
matthiasm@0 772 d6.hasKnownExtents = false;
matthiasm@0 773 d6.isQuantized = false;
matthiasm@0 774 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 775 d6.hasDuration = false;
matthiasm@0 776 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 777 list.push_back(d6);
matthiasm@0 778
matthiasm@0 779 OutputDescriptor d7;
matthiasm@0 780 d7.identifier = "simplechord";
matthiasm@0 781 d7.name = "Simple Chord Estimate";
matthiasm@0 782 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 783 d7.unit = "";
matthiasm@0 784 d7.hasFixedBinCount = true;
matthiasm@0 785 d7.binCount = 0;
matthiasm@0 786 d7.hasKnownExtents = false;
matthiasm@0 787 d7.isQuantized = false;
matthiasm@0 788 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 789 d7.hasDuration = false;
matthiasm@0 790 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 791 list.push_back(d7);
matthiasm@0 792
matthiasm@1 793 // OutputDescriptor d8;
matthiasm@1 794 // d8.identifier = "inconsistency";
matthiasm@1 795 // d8.name = "Harmonic inconsistency value";
matthiasm@1 796 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1 797 // d8.unit = "";
matthiasm@1 798 // d8.hasFixedBinCount = true;
matthiasm@1 799 // d8.binCount = 1;
matthiasm@1 800 // d8.hasKnownExtents = false;
matthiasm@1 801 // d8.isQuantized = false;
matthiasm@1 802 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 803 // d8.hasDuration = false;
matthiasm@1 804 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 805 // list.push_back(d8);
matthiasm@1 806 //
matthiasm@1 807 // OutputDescriptor d9;
matthiasm@1 808 // d9.identifier = "inconsistencysegment";
matthiasm@1 809 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1 810 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1 811 // d9.unit = "";
matthiasm@1 812 // d9.hasFixedBinCount = true;
matthiasm@1 813 // d9.binCount = 0;
matthiasm@1 814 // d9.hasKnownExtents = true;
matthiasm@1 815 // d9.minValue = 0.1;
matthiasm@1 816 // d9.maxValue = 0.9;
matthiasm@1 817 // d9.isQuantized = false;
matthiasm@1 818 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1 819 // d9.hasDuration = false;
matthiasm@1 820 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 821 // list.push_back(d9);
matthiasm@1 822 //
matthiasm@1 823 OutputDescriptor d10;
matthiasm@1 824 d10.identifier = "localtuning";
matthiasm@1 825 d10.name = "Local tuning";
matthiasm@4 826 d10.description = "Tuning based on the history up to this timestamp.";
matthiasm@1 827 d10.unit = "Hz";
matthiasm@1 828 d10.hasFixedBinCount = true;
matthiasm@1 829 d10.binCount = 1;
matthiasm@1 830 d10.hasKnownExtents = true;
matthiasm@1 831 d10.minValue = 427.47;
matthiasm@1 832 d10.maxValue = 452.89;
matthiasm@1 833 d10.isQuantized = false;
matthiasm@3 834 d10.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 835 d10.hasDuration = false;
matthiasm@3 836 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 837 list.push_back(d10);
matthiasm@1 838
matthiasm@0 839 return list;
matthiasm@0 840 }
matthiasm@0 841
matthiasm@0 842
matthiasm@0 843 bool
matthiasm@0 844 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 845 {
matthiasm@1 846 if (debug_on) {
matthiasm@1 847 cerr << "--> initialise";
matthiasm@1 848 }
matthiasm@1 849
matthiasm@0 850 if (channels < getMinChannelCount() ||
matthiasm@0 851 channels > getMaxChannelCount()) return false;
matthiasm@0 852 m_blockSize = blockSize;
matthiasm@0 853 m_stepSize = stepSize;
matthiasm@0 854 frameCount = 0;
matthiasm@0 855 int tempn = 256 * m_blockSize/2;
matthiasm@4 856 // cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1 857 float *tempkernel;
matthiasm@1 858
matthiasm@1 859 tempkernel = new float[tempn];
matthiasm@1 860
matthiasm@0 861 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1 862 m_kernelValue.clear();
matthiasm@1 863 m_kernelFftIndex.clear();
matthiasm@1 864 m_kernelNoteIndex.clear();
matthiasm@1 865 int countNonzero = 0;
matthiasm@0 866 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1 867 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1 868 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 869 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0 870 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 871 countNonzero++;
matthiasm@0 872 }
matthiasm@1 873 m_kernelFftIndex.push_back(iFFT);
matthiasm@1 874 m_kernelNoteIndex.push_back(iNote);
matthiasm@0 875 }
matthiasm@0 876 }
matthiasm@1 877 }
matthiasm@4 878 // cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1 879 delete [] tempkernel;
matthiasm@3 880 ofstream myfile;
matthiasm@3 881 myfile.open ("matrix.txt");
matthiasm@3 882 // myfile << "Writing this to a file.\n";
matthiasm@3 883 for (int i = 0; i < nNote * 84; ++i) {
matthiasm@3 884 myfile << m_dict[i] << endl;
matthiasm@3 885 }
matthiasm@3 886 myfile.close();
matthiasm@0 887 return true;
matthiasm@0 888 }
matthiasm@0 889
matthiasm@0 890 void
matthiasm@0 891 NNLSChroma::reset()
matthiasm@0 892 {
matthiasm@4 893 if (debug_on) cerr << "--> reset";
matthiasm@4 894
matthiasm@0 895 // Clear buffers, reset stored values, etc
matthiasm@4 896 frameCount = 0;
matthiasm@4 897 m_dictID = 0;
matthiasm@4 898 m_fl.clear();
matthiasm@4 899 m_meanTuning0 = 0;
matthiasm@4 900 m_meanTuning1 = 0;
matthiasm@4 901 m_meanTuning2 = 0;
matthiasm@4 902 m_localTuning0 = 0;
matthiasm@4 903 m_localTuning1 = 0;
matthiasm@4 904 m_localTuning2 = 0;
matthiasm@4 905 m_localTuning.clear();
matthiasm@0 906 }
matthiasm@0 907
matthiasm@0 908 NNLSChroma::FeatureSet
matthiasm@0 909 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 910 {
matthiasm@4 911 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 912 frameCount++;
matthiasm@0 913 float *magnitude = new float[m_blockSize/2];
matthiasm@0 914
matthiasm@0 915 Feature f10; // local tuning
matthiasm@3 916 f10.hasTimestamp = true;
matthiasm@4 917 f10.timestamp = timestamp;
matthiasm@0 918 const float *fbuf = inputBuffers[0];
matthiasm@0 919
matthiasm@0 920 // make magnitude
matthiasm@0 921 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0 922 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0 923 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@0 924 }
matthiasm@4 925
matthiasm@0 926 // note magnitude mapping using pre-calculated matrix
matthiasm@0 927 float *nm = new float[nNote]; // note magnitude
matthiasm@0 928 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 929 nm[iNote] = 0; // initialise as 0
matthiasm@0 930 }
matthiasm@0 931 int binCount = 0;
matthiasm@0 932 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0 933 // cerr << ".";
matthiasm@1 934 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1 935 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0 936 binCount++;
matthiasm@0 937 }
matthiasm@1 938 // cerr << nm[20];
matthiasm@1 939 // cerr << endl;
matthiasm@0 940
matthiasm@0 941
matthiasm@0 942 float one_over_N = 1.0/frameCount;
matthiasm@0 943 // update means of complex tuning variables
matthiasm@0 944 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 945 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 946 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 947
matthiasm@0 948 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 949 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 950 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 951 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@3 952 float ratioOld = 0.997;
matthiasm@3 953 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 954 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 955 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 956 }
matthiasm@0 957
matthiasm@0 958 // if (m_tuneLocal) {
matthiasm@0 959 // local tuning
matthiasm@0 960 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0 961 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0 962 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0 963 m_localTuning.push_back(normalisedtuning);
matthiasm@0 964 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0 965 f10.values.push_back(tuning440);
matthiasm@3 966 // cerr << tuning440 << endl;
matthiasm@0 967 // }
matthiasm@0 968
matthiasm@0 969 Feature f1; // logfreqspec
matthiasm@0 970 f1.hasTimestamp = true;
matthiasm@0 971 f1.timestamp = timestamp;
matthiasm@0 972 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 973 f1.values.push_back(nm[iNote]);
matthiasm@0 974 }
matthiasm@0 975
matthiasm@0 976 FeatureSet fs;
matthiasm@0 977 fs[1].push_back(f1);
matthiasm@3 978 fs[8].push_back(f10);
matthiasm@0 979
matthiasm@0 980 // deletes
matthiasm@0 981 delete[] magnitude;
matthiasm@0 982 delete[] nm;
matthiasm@0 983
matthiasm@0 984 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@7 985 char * pPath;
matthiasm@7 986 pPath = getenv ("VAMP_PATH");
matthiasm@7 987
matthiasm@7 988
matthiasm@0 989 return fs;
matthiasm@0 990 }
matthiasm@0 991
matthiasm@0 992 NNLSChroma::FeatureSet
matthiasm@0 993 NNLSChroma::getRemainingFeatures()
matthiasm@0 994 {
matthiasm@4 995 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@4 996 FeatureSet fsOut;
matthiasm@4 997 if (m_fl.size() == 0) return fsOut;
matthiasm@9 998 int nChord = m_chordnames.size();
matthiasm@0 999 //
matthiasm@1 1000 /** Calculate Tuning
matthiasm@1 1001 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1 1002 cumulative mean real and imag values)
matthiasm@1 1003 **/
matthiasm@1 1004 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1 1005 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1 1006 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1 1007 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1 1008 int intShift = floor(normalisedtuning * 3);
matthiasm@1 1009 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1010
matthiasm@1 1011 char buffer0 [50];
matthiasm@1 1012
matthiasm@1 1013 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 1014
matthiasm@1 1015 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 1016
matthiasm@1 1017 // push tuning to FeatureSet fsOut
matthiasm@1 1018 Feature f0; // tuning
matthiasm@1 1019 f0.hasTimestamp = true;
matthiasm@1 1020 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1 1021 f0.label = buffer0;
matthiasm@1 1022 fsOut[0].push_back(f0);
matthiasm@1 1023
matthiasm@1 1024 /** Tune Log-Frequency Spectrogram
matthiasm@1 1025 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1 1026 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@1 1027 **/
matthiasm@1 1028
matthiasm@1 1029 float tempValue = 0;
matthiasm@1 1030 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1 1031 float thresh = pow(10,dbThreshold/20);
matthiasm@1 1032 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1 1033 int count = 0;
matthiasm@1 1034
matthiasm@1 1035 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1 1036 Feature f1 = *i;
matthiasm@1 1037 Feature f2; // tuned log-frequency spectrum
matthiasm@1 1038 f2.hasTimestamp = true;
matthiasm@1 1039 f2.timestamp = f1.timestamp;
matthiasm@1 1040 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 1041
matthiasm@1 1042 if (m_tuneLocal) {
matthiasm@1 1043 intShift = floor(m_localTuning[count] * 3);
matthiasm@1 1044 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1045 }
matthiasm@1 1046
matthiasm@1 1047 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 1048
matthiasm@4 1049 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1 1050 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1 1051 f2.values.push_back(tempValue);
matthiasm@1 1052 }
matthiasm@1 1053
matthiasm@1 1054 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1 1055 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1 1056 vector<float> runningstd;
matthiasm@1 1057 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1 1058 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1 1059 }
matthiasm@1 1060 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1 1061 for (int i = 0; i < 256; i++) {
matthiasm@1 1062 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1 1063 if (runningstd[i] > 0) {
matthiasm@1 1064 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1 1065 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1066 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1 1067 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1068 }
matthiasm@1 1069 if (f2.values[i] < 0) {
matthiasm@1 1070 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1 1071 }
matthiasm@1 1072 }
matthiasm@1 1073 fsOut[2].push_back(f2);
matthiasm@1 1074 count++;
matthiasm@1 1075 }
matthiasm@1 1076
matthiasm@1 1077 /** Semitone spectrum and chromagrams
matthiasm@1 1078 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1 1079 is inferred using a non-negative least squares algorithm.
matthiasm@1 1080 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1 1081 bass and treble stacked onto each other).
matthiasm@1 1082 **/
matthiasm@1 1083 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
matthiasm@1 1084
matthiasm@1 1085 vector<vector<float> > chordogram;
matthiasm@3 1086 vector<vector<int> > scoreChordogram;
matthiasm@1 1087 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1 1088 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1 1089 count = 0;
matthiasm@9 1090
matthiasm@1 1091 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1 1092 Feature f2 = *it; // logfreq spectrum
matthiasm@1 1093 Feature f3; // semitone spectrum
matthiasm@1 1094 Feature f4; // treble chromagram
matthiasm@1 1095 Feature f5; // bass chromagram
matthiasm@1 1096 Feature f6; // treble and bass chromagram
matthiasm@1 1097
matthiasm@1 1098 f3.hasTimestamp = true;
matthiasm@1 1099 f3.timestamp = f2.timestamp;
matthiasm@1 1100
matthiasm@1 1101 f4.hasTimestamp = true;
matthiasm@1 1102 f4.timestamp = f2.timestamp;
matthiasm@1 1103
matthiasm@1 1104 f5.hasTimestamp = true;
matthiasm@1 1105 f5.timestamp = f2.timestamp;
matthiasm@1 1106
matthiasm@1 1107 f6.hasTimestamp = true;
matthiasm@1 1108 f6.timestamp = f2.timestamp;
matthiasm@1 1109
matthiasm@3 1110 float b[256];
matthiasm@1 1111
matthiasm@1 1112 bool some_b_greater_zero = false;
matthiasm@3 1113 float sumb = 0;
matthiasm@1 1114 for (int i = 0; i < 256; i++) {
matthiasm@3 1115 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@3 1116 b[i] = f2.values[i];
matthiasm@3 1117 sumb += b[i];
matthiasm@1 1118 if (b[i] > 0) {
matthiasm@1 1119 some_b_greater_zero = true;
matthiasm@1 1120 }
matthiasm@1 1121 }
matthiasm@1 1122
matthiasm@1 1123 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 1124
matthiasm@1 1125 vector<float> chroma = vector<float>(12, 0);
matthiasm@1 1126 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1 1127 float currval;
matthiasm@1 1128 unsigned iSemitone = 0;
matthiasm@1 1129
matthiasm@1 1130 if (some_b_greater_zero) {
matthiasm@3 1131 if (m_dictID == 1) {
matthiasm@1 1132 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1 1133 currval = 0;
matthiasm@3 1134 currval += b[iNote + 1 + -1] * 0.5;
matthiasm@3 1135 currval += b[iNote + 1 + 0] * 1.0;
matthiasm@3 1136 currval += b[iNote + 1 + 1] * 0.5;
matthiasm@1 1137 f3.values.push_back(currval);
matthiasm@1 1138 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1 1139 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1 1140 iSemitone++;
matthiasm@1 1141 }
matthiasm@1 1142
matthiasm@1 1143 } else {
matthiasm@3 1144 float x[84+1000];
matthiasm@3 1145 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
matthiasm@3 1146 // for (int i = 0; i < 84; ++i) {
matthiasm@3 1147 // x[i] = b[3*i+3];
matthiasm@3 1148 // }
matthiasm@3 1149 float rnorm;
matthiasm@3 1150 float w[84+1000];
matthiasm@3 1151 float zz[84+1000];
matthiasm@3 1152 int indx[84+1000];
matthiasm@1 1153 int mode;
matthiasm@3 1154 float curr_dict[256*84];
matthiasm@3 1155 for (unsigned i = 0; i < 256 * 84; ++i) {
matthiasm@3 1156 curr_dict[i] = 1.0 * m_dict[i];
matthiasm@3 1157 }
matthiasm@3 1158 nnls(curr_dict, nNote, nNote, 84, b, x, &rnorm, w, zz, indx, &mode);
matthiasm@3 1159 for (unsigned iNote = 0; iNote < 84; ++iNote) {
matthiasm@3 1160 // for (unsigned kNote = 0; kNote < 256; ++kNote) {
matthiasm@3 1161 // x[iNote] += m_dict[kNote + nNote * iNote] * b[kNote];
matthiasm@3 1162 // }
matthiasm@3 1163 f3.values.push_back(x[iNote]);
matthiasm@3 1164 // cerr << mode << endl;
matthiasm@3 1165 chroma[iNote % 12] += x[iNote] * treblewindow[iNote];
matthiasm@3 1166 basschroma[iNote % 12] += x[iNote] * basswindow[iNote];
matthiasm@3 1167 // iSemitone++;
matthiasm@3 1168 }
matthiasm@1 1169 }
matthiasm@1 1170 }
matthiasm@1 1171
matthiasm@1 1172 f4.values = chroma;
matthiasm@1 1173 f5.values = basschroma;
matthiasm@1 1174 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1 1175 f6.values = chroma;
matthiasm@1 1176
matthiasm@1 1177 // local chord estimation
matthiasm@1 1178 vector<float> currentChordSalience;
matthiasm@1 1179 float tempchordvalue = 0;
matthiasm@1 1180 float sumchordvalue = 0;
matthiasm@9 1181
matthiasm@1 1182 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1183 tempchordvalue = 0;
matthiasm@1 1184 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@9 1185 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1186 }
matthiasm@1 1187 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@9 1188 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1189 }
matthiasm@1 1190 sumchordvalue+=tempchordvalue;
matthiasm@1 1191 currentChordSalience.push_back(tempchordvalue);
matthiasm@1 1192 }
matthiasm@1 1193 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1194 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1 1195 }
matthiasm@1 1196 chordogram.push_back(currentChordSalience);
matthiasm@1 1197
matthiasm@1 1198 fsOut[3].push_back(f3);
matthiasm@1 1199 fsOut[4].push_back(f4);
matthiasm@1 1200 fsOut[5].push_back(f5);
matthiasm@1 1201 fsOut[6].push_back(f6);
matthiasm@1 1202 count++;
matthiasm@1 1203 }
matthiasm@0 1204 // int musicitykernelwidth = (50 * 2048) / m_stepSize;
matthiasm@0 1205 //
matthiasm@3 1206 /* Simple chord estimation
matthiasm@3 1207 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@3 1208 take the maximum. Very simple, don't do this at home...
matthiasm@3 1209 */
matthiasm@3 1210 count = 0;
matthiasm@3 1211 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@3 1212 vector<int> chordSequence;
matthiasm@3 1213 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
matthiasm@3 1214 vector<int> temp = vector<int>(nChord,0);
matthiasm@3 1215 scoreChordogram.push_back(temp);
matthiasm@3 1216 }
matthiasm@4 1217 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
matthiasm@3 1218 int startIndex = count + 1;
matthiasm@3 1219 int endIndex = count + 2 * halfwindowlength;
matthiasm@3 1220 vector<float> temp = vector<float>(nChord,0);
matthiasm@3 1221 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@4 1222 float maxindex = 0; //... and the index thereof
matthiasm@3 1223 unsigned bestchordL = 0; // index of the best "left" chord
matthiasm@3 1224 unsigned bestchordR = 0; // index of the best "right" chord
matthiasm@4 1225 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@3 1226 // now find the max values on both sides of iWF
matthiasm@3 1227 // left side:
matthiasm@3 1228 float maxL = 0;
matthiasm@3 1229 unsigned maxindL = nChord-1;
matthiasm@3 1230 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3 1231 float currsum = 0;
matthiasm@3 1232 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@3 1233 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1234 }
matthiasm@3 1235 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1236 if (currsum > maxL) {
matthiasm@3 1237 maxL = currsum;
matthiasm@3 1238 maxindL = iChord;
matthiasm@3 1239 }
matthiasm@3 1240 }
matthiasm@3 1241 // right side:
matthiasm@3 1242 float maxR = 0;
matthiasm@3 1243 unsigned maxindR = nChord-1;
matthiasm@3 1244 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3 1245 float currsum = 0;
matthiasm@3 1246 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1247 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1248 }
matthiasm@3 1249 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1250 if (currsum > maxR) {
matthiasm@3 1251 maxR = currsum;
matthiasm@3 1252 maxindR = iChord;
matthiasm@3 1253 }
matthiasm@3 1254 }
matthiasm@3 1255 if (maxL+maxR > maxval) {
matthiasm@3 1256 maxval = maxL+maxR;
matthiasm@3 1257 maxindex = iWF;
matthiasm@3 1258 bestchordL = maxindL;
matthiasm@3 1259 bestchordR = maxindR;
matthiasm@3 1260 }
matthiasm@3 1261
matthiasm@3 1262 }
matthiasm@3 1263 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@3 1264 // add a score to every chord-frame-point that was part of a maximum
matthiasm@3 1265 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@3 1266 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@3 1267 }
matthiasm@3 1268 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1269 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@3 1270 }
matthiasm@3 1271 count++;
matthiasm@3 1272 }
matthiasm@3 1273
matthiasm@3 1274 count = 0;
matthiasm@3 1275 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1276 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3 1277 float maxindex = 0; //... and the index thereof
matthiasm@3 1278 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3 1279 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@3 1280 maxval = scoreChordogram[count][iChord];
matthiasm@3 1281 maxindex = iChord;
matthiasm@4 1282 // cerr << iChord << endl;
matthiasm@3 1283 }
matthiasm@3 1284 }
matthiasm@3 1285 chordSequence.push_back(maxindex);
matthiasm@4 1286 // cerr << "before modefilter, maxindex: " << maxindex << endl;
matthiasm@3 1287 count++;
matthiasm@3 1288 }
matthiasm@3 1289
matthiasm@3 1290
matthiasm@3 1291 // mode filter on chordSequence
matthiasm@3 1292 count = 0;
matthiasm@3 1293 int oldChordIndex = -1;
matthiasm@3 1294 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1295 Feature f6 = *it;
matthiasm@3 1296 Feature f7; // chord estimate
matthiasm@3 1297 f7.hasTimestamp = true;
matthiasm@3 1298 f7.timestamp = f6.timestamp;
matthiasm@3 1299 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@3 1300 int maxChordCount = 0;
matthiasm@3 1301 int maxChordIndex = nChord-1;
matthiasm@4 1302 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@4 1303 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@4 1304 for (int i = startIndex; i < endIndex; i++) {
matthiasm@4 1305 chordCount[chordSequence[i]]++;
matthiasm@4 1306 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@7 1307 // cerr << "start index " << startIndex << endl;
matthiasm@4 1308 maxChordCount++;
matthiasm@4 1309 maxChordIndex = chordSequence[i];
matthiasm@4 1310 }
matthiasm@4 1311 }
matthiasm@4 1312 // chordSequence[count] = maxChordIndex;
matthiasm@7 1313 // cerr << maxChordIndex << endl;
matthiasm@3 1314 if (oldChordIndex != maxChordIndex) {
matthiasm@3 1315 oldChordIndex = maxChordIndex;
matthiasm@3 1316
matthiasm@9 1317 // char buffer1 [50];
matthiasm@9 1318 // if (maxChordIndex < nChord - 1) {
matthiasm@9 1319 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@9 1320 // } else {
matthiasm@9 1321 // sprintf(buffer1, "N");
matthiasm@9 1322 // }
matthiasm@9 1323 // f7.label = buffer1;
matthiasm@9 1324 f7.label = m_chordnames[maxChordIndex];
matthiasm@3 1325 fsOut[7].push_back(f7);
matthiasm@3 1326 }
matthiasm@3 1327 count++;
matthiasm@3 1328 }
matthiasm@0 1329 // // musicity
matthiasm@0 1330 // count = 0;
matthiasm@0 1331 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0 1332 // vector<float> musicityValue;
matthiasm@0 1333 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1334 // Feature f4 = *it;
matthiasm@0 1335 //
matthiasm@0 1336 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1337 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1338 // float chromasum = 0;
matthiasm@0 1339 // float diffsum = 0;
matthiasm@0 1340 // for (int k = 0; k < 12; k++) {
matthiasm@0 1341 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0 1342 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0 1343 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0 1344 // }
matthiasm@0 1345 // }
matthiasm@0 1346 // diffsum /= chromasum;
matthiasm@0 1347 // musicityValue.push_back(diffsum);
matthiasm@0 1348 // count++;
matthiasm@0 1349 // }
matthiasm@0 1350 //
matthiasm@0 1351 // float musicityThreshold = 0.44;
matthiasm@0 1352 // if (m_stepSize == 4096) {
matthiasm@0 1353 // musicityThreshold = 0.74;
matthiasm@0 1354 // }
matthiasm@0 1355 // if (m_stepSize == 4410) {
matthiasm@0 1356 // musicityThreshold = 0.77;
matthiasm@0 1357 // }
matthiasm@0 1358 //
matthiasm@0 1359 // count = 0;
matthiasm@0 1360 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1361 // Feature f4 = *it;
matthiasm@0 1362 // Feature f8; // musicity
matthiasm@0 1363 // Feature f9; // musicity segmenter
matthiasm@0 1364 //
matthiasm@0 1365 // f8.hasTimestamp = true;
matthiasm@0 1366 // f8.timestamp = f4.timestamp;
matthiasm@0 1367 // f9.hasTimestamp = true;
matthiasm@0 1368 // f9.timestamp = f4.timestamp;
matthiasm@0 1369 //
matthiasm@0 1370 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1371 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1372 // int musicityCount = 0;
matthiasm@0 1373 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0 1374 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0 1375 // }
matthiasm@0 1376 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0 1377 //
matthiasm@0 1378 // if (isSpeech) {
matthiasm@0 1379 // if (oldlabeltype != 2) {
matthiasm@0 1380 // f9.label = "Speech";
matthiasm@0 1381 // fsOut[9].push_back(f9);
matthiasm@0 1382 // oldlabeltype = 2;
matthiasm@0 1383 // }
matthiasm@0 1384 // } else {
matthiasm@0 1385 // if (oldlabeltype != 1) {
matthiasm@0 1386 // f9.label = "Music";
matthiasm@0 1387 // fsOut[9].push_back(f9);
matthiasm@0 1388 // oldlabeltype = 1;
matthiasm@0 1389 // }
matthiasm@0 1390 // }
matthiasm@0 1391 // f8.values.push_back(musicityValue[count]);
matthiasm@0 1392 // fsOut[8].push_back(f8);
matthiasm@0 1393 // count++;
matthiasm@0 1394 // }
matthiasm@0 1395 return fsOut;
matthiasm@0 1396
matthiasm@0 1397 }
matthiasm@0 1398