annotate NNLSChroma.cpp @ 12:54f28d8ac098 matthiasm-plugin

consecutive chords with the same label are merged
author matthiasm
date Wed, 09 Jun 2010 03:33:36 +0000
parents a02d56ac1464
children 9ae90fa5fa74
rev   line source
matthiasm@0 1
matthiasm@0 2 #include "NNLSChroma.h"
matthiasm@0 3 #include <cmath>
matthiasm@10 4 // #include <omp.h>
matthiasm@0 5 #include <list>
matthiasm@0 6 #include <iostream>
matthiasm@3 7 #include <fstream>
matthiasm@0 8 #include <sstream>
matthiasm@0 9 #include <cassert>
matthiasm@7 10 #include <cstdlib>
matthiasm@0 11 #include <cstdio>
matthiasm@7 12 #include <boost/tokenizer.hpp>
matthiasm@7 13 #include <boost/iostreams/device/file.hpp>
matthiasm@7 14 #include <boost/iostreams/stream.hpp>
matthiasm@7 15 #include <boost/lexical_cast.hpp>
matthiasm@1 16 #include "nnls.h"
matthiasm@0 17 #include "chorddict.cpp"
matthiasm@9 18
matthiasm@10 19 // #include <omp.h>
matthiasm@10 20 // #define N 1000
matthiasm@10 21 // #define CHUNKSIZE 100
matthiasm@9 22
matthiasm@9 23
matthiasm@0 24 using namespace std;
matthiasm@7 25 using namespace boost;
matthiasm@0 26
matthiasm@0 27 const float sinvalue = 0.866025404;
matthiasm@0 28 const float cosvalue = -0.5;
matthiasm@0 29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0 30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0 31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0 32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0 33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@7 34
matthiasm@7 35 const char* bassnames[12][12] ={
matthiasm@7 36 {"A","","B","C","C#","D","","E","","F#","G","G#"},
matthiasm@7 37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
matthiasm@7 38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
matthiasm@7 39 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
matthiasm@7 40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
matthiasm@7 41 {"D","","E","F","F#","G","","A","","B","C","C#"},
matthiasm@7 42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
matthiasm@7 43 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
matthiasm@7 44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
matthiasm@7 45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
matthiasm@7 46 {"G","","A","Bb","B","C","","D","","E","F","F#"},
matthiasm@7 47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
matthiasm@7 48 };
matthiasm@0 49 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 50 const int nNote = 256;
matthiasm@0 51
matthiasm@0 52 /** Special Convolution
matthiasm@0 53 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0 54 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0 55 as the first (last) valid convolution bin.
matthiasm@0 56 **/
matthiasm@0 57
matthiasm@0 58 const bool debug_on = false;
matthiasm@0 59
matthiasm@0 60 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0 61 {
matthiasm@0 62 float s;
matthiasm@0 63 int m, n;
matthiasm@0 64 int lenConvolvee = convolvee.size();
matthiasm@0 65 int lenKernel = kernel.size();
matthiasm@0 66
matthiasm@0 67 vector<float> Z(256,0);
matthiasm@0 68 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0 69
matthiasm@0 70 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0 71 s=0.0;
matthiasm@0 72 for (m = 0; m < lenKernel; m++) {
matthiasm@0 73 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0 74 s += convolvee[n-m] * kernel[m];
matthiasm@0 75 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0 76 }
matthiasm@0 77 // cerr << n - lenKernel/2 << endl;
matthiasm@0 78 Z[n -lenKernel/2] = s;
matthiasm@0 79 }
matthiasm@0 80
matthiasm@0 81 // fill upper and lower pads
matthiasm@0 82 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0 83 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0 84 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0 85 return Z;
matthiasm@0 86 }
matthiasm@0 87
matthiasm@0 88 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0 89 // {
matthiasm@0 90 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0 91 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0 92 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0 93 // }
matthiasm@0 94 // return freq;
matthiasm@0 95 // }
matthiasm@0 96
matthiasm@0 97 float cospuls(float x, float centre, float width)
matthiasm@0 98 {
matthiasm@0 99 float recipwidth = 1.0/width;
matthiasm@0 100 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0 101 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
matthiasm@0 102 }
matthiasm@0 103 return 0.0;
matthiasm@0 104 }
matthiasm@0 105
matthiasm@0 106 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0 107 {
matthiasm@0 108 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0 109 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0 110 // now scale to correct for note density
matthiasm@0 111 float c = log(2.0)/binsperoctave;
matthiasm@0 112 if (x > 0) {
matthiasm@0 113 out = out / (c * x);
matthiasm@0 114 } else {
matthiasm@0 115 out = 0;
matthiasm@0 116 }
matthiasm@0 117 return out;
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0 121
matthiasm@0 122 int binspersemitone = 3; // this must be 3
matthiasm@0 123 int minoctave = 0; // this must be 0
matthiasm@0 124 int maxoctave = 7; // this must be 7
matthiasm@1 125 int oversampling = 80;
matthiasm@0 126
matthiasm@0 127 // linear frequency vector
matthiasm@0 128 vector<float> fft_f;
matthiasm@0 129 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0 130 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0 131 }
matthiasm@0 132 float fft_width = fs * 2.0 / blocksize;
matthiasm@0 133
matthiasm@0 134 // linear oversampled frequency vector
matthiasm@0 135 vector<float> oversampled_f;
matthiasm@0 136 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0 137 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0 138 }
matthiasm@0 139
matthiasm@0 140 // pitch-spaced frequency vector
matthiasm@0 141 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0 142 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0 143 vector<float> cq_f;
matthiasm@0 144 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0 145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0 146 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0 147 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0 148 for (int k = -1; k < 2; ++k) {
matthiasm@0 149 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0 150 }
matthiasm@0 151 }
matthiasm@0 152 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0 153 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0 154
matthiasm@0 155 int nFFT = fft_f.size();
matthiasm@0 156
matthiasm@0 157 vector<float> fft_activation;
matthiasm@0 158 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0 159 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0 160 fft_activation.push_back(cosp);
matthiasm@0 161 // cerr << cosp << endl;
matthiasm@0 162 }
matthiasm@0 163
matthiasm@0 164 float cq_activation;
matthiasm@0 165 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0 166 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0 167 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0 168 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0 169 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0 170 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0 171 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1 172 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0 173 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0 174 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0 175 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0 176 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0 177 }
matthiasm@0 178 // if (iCQ == 1 || iCQ == 2) {
matthiasm@0 179 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0 180 // }
matthiasm@0 181 }
matthiasm@0 182 }
matthiasm@0 183 }
matthiasm@0 184 return true;
matthiasm@0 185 }
matthiasm@0 186
matthiasm@3 187 bool dictionaryMatrix(float* dm) {
matthiasm@1 188 int binspersemitone = 3; // this must be 3
matthiasm@1 189 int minoctave = 0; // this must be 0
matthiasm@1 190 int maxoctave = 7; // this must be 7
matthiasm@4 191 float s_param = 0.7;
matthiasm@1 192
matthiasm@1 193 // pitch-spaced frequency vector
matthiasm@1 194 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1 195 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1 196 vector<float> cq_f;
matthiasm@1 197 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1 198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1 199 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1 200 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1 201 for (int k = -1; k < 2; ++k) {
matthiasm@1 202 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1 203 }
matthiasm@1 204 }
matthiasm@1 205 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1 206 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1 207
matthiasm@1 208 float curr_f;
matthiasm@1 209 float floatbin;
matthiasm@1 210 float curr_amp;
matthiasm@1 211 // now for every combination calculate the matrix element
matthiasm@1 212 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@3 213 // cerr << iOut << endl;
matthiasm@1 214 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1 215 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
matthiasm@3 216 // if (curr_f > cq_f[nNote-1]) break;
matthiasm@3 217 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@3 218 // cerr << floatbin << endl;
matthiasm@1 219 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@3 220 // cerr << "curramp" << curr_amp << endl;
matthiasm@1 221 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@3 222 if (abs(iNote+1.0-floatbin)<2) {
matthiasm@3 223 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
matthiasm@3 224 // dm[iNote + nNote * iOut] += 1 * curr_amp;
matthiasm@3 225 }
matthiasm@1 226 }
matthiasm@3 227 }
matthiasm@1 228 }
matthiasm@3 229
matthiasm@3 230
matthiasm@1 231 }
matthiasm@1 232
matthiasm@7 233 string get_env_var( std::string const & key ) {
matthiasm@7 234 char * val;
matthiasm@7 235 val = getenv( key.c_str() );
matthiasm@7 236 string retval;
matthiasm@7 237 if (val != NULL) {
matthiasm@7 238 retval = val;
matthiasm@7 239 }
matthiasm@7 240 return retval;
matthiasm@7 241 }
matthiasm@7 242
matthiasm@7 243
matthiasm@9 244 vector<string> chordDictionary(vector<float> *mchorddict) {
matthiasm@7 245 // ifstream chordDictFile;
matthiasm@7 246 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
matthiasm@7 247 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
matthiasm@7 248 typedef tokenizer<char_separator<char> > Tok;
matthiasm@7 249 // char_separator<char> sep; // default constructed
matthiasm@7 250 char_separator<char> sep(",; ",":");
matthiasm@7 251 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
matthiasm@7 252 string line;
matthiasm@7 253 int iElement = 0;
matthiasm@7 254 int nChord = 0;
matthiasm@7 255
matthiasm@7 256 vector<string> loadedChordNames;
matthiasm@7 257 vector<float> loadedChordDict;
matthiasm@7 258 if (chordDictFile.is_open()) {
matthiasm@7 259 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
matthiasm@7 260 // first, get the chord definition
matthiasm@7 261 string chordType;
matthiasm@7 262 vector<float> tempPCVector;
matthiasm@7 263 // cerr << line << endl;
matthiasm@7 264 if (!line.empty() && line.substr(0,1) != "#") {
matthiasm@7 265 Tok tok(line, sep);
matthiasm@7 266 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
matthiasm@7 267 string tempString = *tok_iter;
matthiasm@7 268 // cerr << tempString << endl;
matthiasm@7 269 if (tok_iter == tok.begin()) { // either the chord name or a colon
matthiasm@7 270 if (tempString == ":") {
matthiasm@7 271 chordType = "";
matthiasm@7 272 } else {
matthiasm@7 273 chordType = tempString;
matthiasm@7 274 tok_iter++; // is this cheating ? :)
matthiasm@7 275 }
matthiasm@7 276 } else {
matthiasm@7 277 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
matthiasm@7 278 }
matthiasm@7 279 }
matthiasm@7 280
matthiasm@7 281 // now make all 12 chords of every type
matthiasm@7 282 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
matthiasm@7 283 // add bass slash notation
matthiasm@7 284 string slashNotation = "";
matthiasm@7 285 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
matthiasm@7 286 if (tempPCVector[(kSemitone) % 12] > 0.99) {
matthiasm@7 287 slashNotation = bassnames[iSemitone][kSemitone];
matthiasm@7 288 }
matthiasm@7 289 }
matthiasm@7 290 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
matthiasm@9 291 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
matthiasm@9 292 float bassValue = 0;
matthiasm@9 293 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
matthiasm@9 294 bassValue = 1;
matthiasm@9 295 } else {
matthiasm@10 296 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
matthiasm@9 297 }
matthiasm@9 298 loadedChordDict.push_back(bassValue);
matthiasm@7 299 }
matthiasm@7 300 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
matthiasm@7 301 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
matthiasm@7 302 }
matthiasm@7 303 ostringstream os;
matthiasm@7 304 if (slashNotation.empty()) {
matthiasm@7 305 os << notenames[12+iSemitone] << chordType;
matthiasm@7 306 } else {
matthiasm@7 307 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
matthiasm@7 308 }
matthiasm@7 309
matthiasm@7 310 loadedChordNames.push_back(os.str());
matthiasm@7 311 }
matthiasm@7 312 }
matthiasm@7 313 }
matthiasm@7 314 // N type
matthiasm@7 315 loadedChordNames.push_back("N");
matthiasm@7 316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
matthiasm@7 317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
matthiasm@7 318
matthiasm@7 319 // normalise
matthiasm@7 320 float sum = 0;
matthiasm@7 321 for (int i = 0; i < loadedChordDict.size(); i++) {
matthiasm@7 322 sum += pow(loadedChordDict[i],2);
matthiasm@7 323 if (i % 24 == 23) {
matthiasm@7 324 float invertedsum = 1.0/sqrt(sum);
matthiasm@7 325 for (int k = 0; k < 24; k++) {
matthiasm@7 326 loadedChordDict[i-k] *= invertedsum;
matthiasm@7 327 }
matthiasm@7 328 sum = 0;
matthiasm@7 329 }
matthiasm@7 330
matthiasm@7 331 }
matthiasm@7 332
matthiasm@7 333
matthiasm@7 334 nChord = 0;
matthiasm@7 335 for (int i = 0; i < loadedChordNames.size(); i++) {
matthiasm@7 336 nChord++;
matthiasm@7 337 }
matthiasm@7 338 chordDictFile.close();
matthiasm@7 339
matthiasm@7 340
matthiasm@9 341 // mchorddict = new float[nChord*24];
matthiasm@7 342 for (int i = 0; i < nChord*24; i++) {
matthiasm@9 343 mchorddict->push_back(loadedChordDict[i]);
matthiasm@7 344 }
matthiasm@9 345
matthiasm@7 346 } else {// use default from chorddict.cpp
matthiasm@9 347 // mchorddict = new float[nChorddict];
matthiasm@7 348 for (int i = 0; i < nChorddict; i++) {
matthiasm@9 349 mchorddict->push_back(chorddict[i]);
matthiasm@7 350 }
matthiasm@7 351
matthiasm@7 352 nChord = nChorddict/24;
matthiasm@7 353 // mchordnames = new string[nChorddict/24];
matthiasm@7 354 char buffer1 [50];
matthiasm@7 355 for (int i = 0; i < nChorddict/24; i++) {
matthiasm@7 356 if (i < nChorddict/24 - 1) {
matthiasm@7 357 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
matthiasm@7 358 } else {
matthiasm@7 359 sprintf(buffer1, "N");
matthiasm@7 360 }
matthiasm@7 361 ostringstream os;
matthiasm@7 362 os << buffer1;
matthiasm@9 363 loadedChordNames.push_back(os.str());
matthiasm@9 364
matthiasm@7 365 }
matthiasm@7 366
matthiasm@7 367 }
matthiasm@9 368 // cerr << "before leaving" << chordnames[1] << endl;
matthiasm@9 369 return loadedChordNames;
matthiasm@7 370 }
matthiasm@0 371
matthiasm@0 372 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0 373 Plugin(inputSampleRate),
matthiasm@0 374 m_fl(0),
matthiasm@0 375 m_blockSize(0),
matthiasm@0 376 m_stepSize(0),
matthiasm@0 377 m_lengthOfNoteIndex(0),
matthiasm@0 378 m_meanTuning0(0),
matthiasm@0 379 m_meanTuning1(0),
matthiasm@0 380 m_meanTuning2(0),
matthiasm@0 381 m_localTuning0(0),
matthiasm@0 382 m_localTuning1(0),
matthiasm@0 383 m_localTuning2(0),
matthiasm@4 384 m_paling(1.0),
matthiasm@3 385 m_preset(0.0),
matthiasm@0 386 m_localTuning(0),
matthiasm@0 387 m_kernelValue(0),
matthiasm@0 388 m_kernelFftIndex(0),
matthiasm@0 389 m_kernelNoteIndex(0),
matthiasm@1 390 m_dict(0),
matthiasm@0 391 m_tuneLocal(false),
matthiasm@7 392 m_dictID(0),
matthiasm@7 393 m_chorddict(0),
matthiasm@12 394 m_chordnames(0),
matthiasm@12 395 m_doNormalizeChroma(0)
matthiasm@0 396 {
matthiasm@0 397 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@7 398
matthiasm@7 399 // make the *note* dictionary matrix
matthiasm@3 400 m_dict = new float[nNote * 84];
matthiasm@3 401 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
matthiasm@1 402 dictionaryMatrix(m_dict);
matthiasm@7 403
matthiasm@7 404 // get the *chord* dictionary from file (if the file exists)
matthiasm@9 405 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 406 }
matthiasm@0 407
matthiasm@0 408
matthiasm@0 409 NNLSChroma::~NNLSChroma()
matthiasm@0 410 {
matthiasm@0 411 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1 412 delete [] m_dict;
matthiasm@9 413 // delete [] m_chorddict;
matthiasm@7 414 // delete m_chordnames;
matthiasm@0 415 }
matthiasm@0 416
matthiasm@0 417 string
matthiasm@0 418 NNLSChroma::getIdentifier() const
matthiasm@0 419 {
matthiasm@0 420 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 421 return "nnls_chroma";
matthiasm@0 422 }
matthiasm@0 423
matthiasm@0 424 string
matthiasm@0 425 NNLSChroma::getName() const
matthiasm@0 426 {
matthiasm@0 427 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 428 return "NNLS Chroma";
matthiasm@0 429 }
matthiasm@0 430
matthiasm@0 431 string
matthiasm@0 432 NNLSChroma::getDescription() const
matthiasm@0 433 {
matthiasm@0 434 // Return something helpful here!
matthiasm@0 435 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@4 436 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription.";
matthiasm@0 437 }
matthiasm@0 438
matthiasm@0 439 string
matthiasm@0 440 NNLSChroma::getMaker() const
matthiasm@0 441 {
matthiasm@0 442 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 443 // Your name here
matthiasm@0 444 return "Matthias Mauch";
matthiasm@0 445 }
matthiasm@0 446
matthiasm@0 447 int
matthiasm@0 448 NNLSChroma::getPluginVersion() const
matthiasm@0 449 {
matthiasm@0 450 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 451 // Increment this each time you release a version that behaves
matthiasm@0 452 // differently from the previous one
matthiasm@0 453 return 1;
matthiasm@0 454 }
matthiasm@0 455
matthiasm@0 456 string
matthiasm@0 457 NNLSChroma::getCopyright() const
matthiasm@0 458 {
matthiasm@0 459 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 460 // This function is not ideally named. It does not necessarily
matthiasm@0 461 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 462 // should indicate the terms under which it is distributed. For
matthiasm@0 463 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 464 return "Copyright (2010). All rights reserved.";
matthiasm@0 465 }
matthiasm@0 466
matthiasm@0 467 NNLSChroma::InputDomain
matthiasm@0 468 NNLSChroma::getInputDomain() const
matthiasm@0 469 {
matthiasm@0 470 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 471 return FrequencyDomain;
matthiasm@0 472 }
matthiasm@0 473
matthiasm@0 474 size_t
matthiasm@0 475 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 476 {
matthiasm@0 477 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 478 return 16384; // 0 means "I can handle any block size"
matthiasm@0 479 }
matthiasm@0 480
matthiasm@0 481 size_t
matthiasm@0 482 NNLSChroma::getPreferredStepSize() const
matthiasm@0 483 {
matthiasm@0 484 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 485 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0 486 // means the same as the block size for TimeDomain
matthiasm@0 487 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 488 }
matthiasm@0 489
matthiasm@0 490 size_t
matthiasm@0 491 NNLSChroma::getMinChannelCount() const
matthiasm@0 492 {
matthiasm@0 493 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 494 return 1;
matthiasm@0 495 }
matthiasm@0 496
matthiasm@0 497 size_t
matthiasm@0 498 NNLSChroma::getMaxChannelCount() const
matthiasm@0 499 {
matthiasm@0 500 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 501 return 1;
matthiasm@0 502 }
matthiasm@0 503
matthiasm@0 504 NNLSChroma::ParameterList
matthiasm@0 505 NNLSChroma::getParameterDescriptors() const
matthiasm@0 506 {
matthiasm@0 507 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 508 ParameterList list;
matthiasm@0 509
matthiasm@3 510 ParameterDescriptor d3;
matthiasm@3 511 d3.identifier = "preset";
matthiasm@3 512 d3.name = "preset";
matthiasm@3 513 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 514 d3.unit = "";
matthiasm@3 515 d3.isQuantized = true;
matthiasm@3 516 d3.quantizeStep = 1;
matthiasm@3 517 d3.minValue = 0.0;
matthiasm@4 518 d3.maxValue = 3.0;
matthiasm@3 519 d3.defaultValue = 0.0;
matthiasm@3 520 d3.valueNames.push_back("polyphonic pop");
matthiasm@3 521 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 522 d3.valueNames.push_back("solo keyboard");
matthiasm@3 523 d3.valueNames.push_back("manual");
matthiasm@3 524 list.push_back(d3);
matthiasm@4 525
matthiasm@4 526 // ParameterDescriptor d0;
matthiasm@4 527 // d0.identifier = "notedict";
matthiasm@4 528 // d0.name = "note dictionary";
matthiasm@4 529 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 530 // d0.unit = "";
matthiasm@4 531 // d0.minValue = 0;
matthiasm@4 532 // d0.maxValue = 1;
matthiasm@4 533 // d0.defaultValue = 0;
matthiasm@4 534 // d0.isQuantized = true;
matthiasm@4 535 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 536 // d0.valueNames.push_back("no NNLS");
matthiasm@4 537 // d0.quantizeStep = 1.0;
matthiasm@4 538 // list.push_back(d0);
matthiasm@4 539
matthiasm@4 540 ParameterDescriptor d1;
matthiasm@4 541 d1.identifier = "tuningmode";
matthiasm@4 542 d1.name = "tuning mode";
matthiasm@4 543 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 544 d1.unit = "";
matthiasm@4 545 d1.minValue = 0;
matthiasm@4 546 d1.maxValue = 1;
matthiasm@4 547 d1.defaultValue = 0;
matthiasm@4 548 d1.isQuantized = true;
matthiasm@4 549 d1.valueNames.push_back("global tuning");
matthiasm@4 550 d1.valueNames.push_back("local tuning");
matthiasm@4 551 d1.quantizeStep = 1.0;
matthiasm@4 552 list.push_back(d1);
matthiasm@4 553
matthiasm@4 554 // ParameterDescriptor d2;
matthiasm@4 555 // d2.identifier = "paling";
matthiasm@4 556 // d2.name = "spectral paling";
matthiasm@4 557 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@4 558 // d2.unit = "";
matthiasm@4 559 // d2.isQuantized = true;
matthiasm@4 560 // // d2.quantizeStep = 0.1;
matthiasm@4 561 // d2.minValue = 0.0;
matthiasm@4 562 // d2.maxValue = 1.0;
matthiasm@4 563 // d2.defaultValue = 1.0;
matthiasm@4 564 // d2.isQuantized = false;
matthiasm@4 565 // list.push_back(d2);
matthiasm@12 566 ParameterDescriptor d4;
matthiasm@12 567 d4.identifier = "chromanormalize";
matthiasm@12 568 d4.name = "chroma normalization";
matthiasm@12 569 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 570 d4.unit = "";
matthiasm@12 571 d4.minValue = 0;
matthiasm@12 572 d4.maxValue = 1;
matthiasm@12 573 d4.defaultValue = 0;
matthiasm@12 574 d4.isQuantized = true;
matthiasm@12 575 d4.valueNames.push_back("no normalization");
matthiasm@12 576 d4.valueNames.push_back("maximum normalization");
matthiasm@12 577 d4.quantizeStep = 1.0;
matthiasm@12 578 list.push_back(d4);
matthiasm@4 579
matthiasm@0 580 return list;
matthiasm@0 581 }
matthiasm@0 582
matthiasm@0 583 float
matthiasm@0 584 NNLSChroma::getParameter(string identifier) const
matthiasm@0 585 {
matthiasm@3 586 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 587 if (identifier == "notedict") {
matthiasm@0 588 return m_dictID;
matthiasm@0 589 }
matthiasm@0 590
matthiasm@0 591 if (identifier == "paling") {
matthiasm@0 592 return m_paling;
matthiasm@0 593 }
matthiasm@0 594
matthiasm@0 595 if (identifier == "tuningmode") {
matthiasm@0 596 if (m_tuneLocal) {
matthiasm@0 597 return 1.0;
matthiasm@0 598 } else {
matthiasm@0 599 return 0.0;
matthiasm@0 600 }
matthiasm@0 601 }
matthiasm@3 602 if (identifier == "preset") {
matthiasm@3 603 return m_preset;
matthiasm@3 604 }
matthiasm@12 605 if (identifier == "chromanormalize") {
matthiasm@12 606 return m_doNormalizeChroma;
matthiasm@12 607 }
matthiasm@0 608 return 0;
matthiasm@0 609
matthiasm@0 610 }
matthiasm@0 611
matthiasm@0 612 void
matthiasm@0 613 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 614 {
matthiasm@3 615 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 616 if (identifier == "notedict") {
matthiasm@0 617 m_dictID = (int) value;
matthiasm@0 618 }
matthiasm@0 619
matthiasm@0 620 if (identifier == "paling") {
matthiasm@0 621 m_paling = value;
matthiasm@0 622 }
matthiasm@0 623
matthiasm@0 624 if (identifier == "tuningmode") {
matthiasm@0 625 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 626 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 627 }
matthiasm@3 628 if (identifier == "preset") {
matthiasm@3 629 m_preset = value;
matthiasm@3 630 if (m_preset == 0.0) {
matthiasm@3 631 m_tuneLocal = false;
matthiasm@3 632 m_paling = 1.0;
matthiasm@3 633 m_dictID = 0.0;
matthiasm@3 634 }
matthiasm@3 635 if (m_preset == 1.0) {
matthiasm@3 636 m_tuneLocal = false;
matthiasm@3 637 m_paling = 1.0;
matthiasm@3 638 m_dictID = 1.0;
matthiasm@3 639 }
matthiasm@3 640 if (m_preset == 2.0) {
matthiasm@3 641 m_tuneLocal = false;
matthiasm@3 642 m_paling = 0.7;
matthiasm@3 643 m_dictID = 0.0;
matthiasm@3 644 }
matthiasm@3 645 }
matthiasm@12 646 if (identifier == "chromanormalize") {
matthiasm@12 647 m_doNormalizeChroma = value;
matthiasm@12 648 }
matthiasm@0 649 }
matthiasm@0 650
matthiasm@0 651 NNLSChroma::ProgramList
matthiasm@0 652 NNLSChroma::getPrograms() const
matthiasm@0 653 {
matthiasm@0 654 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 655 ProgramList list;
matthiasm@0 656
matthiasm@0 657 // If you have no programs, return an empty list (or simply don't
matthiasm@0 658 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 659
matthiasm@0 660 return list;
matthiasm@0 661 }
matthiasm@0 662
matthiasm@0 663 string
matthiasm@0 664 NNLSChroma::getCurrentProgram() const
matthiasm@0 665 {
matthiasm@0 666 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 667 return ""; // no programs
matthiasm@0 668 }
matthiasm@0 669
matthiasm@0 670 void
matthiasm@0 671 NNLSChroma::selectProgram(string name)
matthiasm@0 672 {
matthiasm@0 673 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 674 }
matthiasm@0 675
matthiasm@0 676
matthiasm@0 677 NNLSChroma::OutputList
matthiasm@0 678 NNLSChroma::getOutputDescriptors() const
matthiasm@0 679 {
matthiasm@0 680 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 681 OutputList list;
matthiasm@0 682
matthiasm@0 683 // Make chroma names for the binNames property
matthiasm@0 684 vector<string> chromanames;
matthiasm@0 685 vector<string> bothchromanames;
matthiasm@0 686 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 687 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 688 if (iNote < 12) {
matthiasm@0 689 chromanames.push_back(notenames[iNote]);
matthiasm@0 690 }
matthiasm@0 691 }
matthiasm@0 692
matthiasm@1 693 // int nNote = 84;
matthiasm@0 694
matthiasm@0 695 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 696 // Every plugin must have at least one output.
matthiasm@0 697
matthiasm@0 698 OutputDescriptor d0;
matthiasm@0 699 d0.identifier = "tuning";
matthiasm@0 700 d0.name = "Tuning";
matthiasm@0 701 d0.description = "The concert pitch.";
matthiasm@0 702 d0.unit = "Hz";
matthiasm@0 703 d0.hasFixedBinCount = true;
matthiasm@0 704 d0.binCount = 0;
matthiasm@0 705 d0.hasKnownExtents = true;
matthiasm@0 706 d0.minValue = 427.47;
matthiasm@0 707 d0.maxValue = 452.89;
matthiasm@0 708 d0.isQuantized = false;
matthiasm@0 709 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 710 d0.hasDuration = false;
matthiasm@0 711 list.push_back(d0);
matthiasm@0 712
matthiasm@0 713 OutputDescriptor d1;
matthiasm@0 714 d1.identifier = "logfreqspec";
matthiasm@0 715 d1.name = "Log-Frequency Spectrum";
matthiasm@0 716 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 717 d1.unit = "";
matthiasm@0 718 d1.hasFixedBinCount = true;
matthiasm@0 719 d1.binCount = nNote;
matthiasm@0 720 d1.hasKnownExtents = false;
matthiasm@0 721 d1.isQuantized = false;
matthiasm@0 722 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 723 d1.hasDuration = false;
matthiasm@0 724 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 725 list.push_back(d1);
matthiasm@0 726
matthiasm@0 727 OutputDescriptor d2;
matthiasm@0 728 d2.identifier = "tunedlogfreqspec";
matthiasm@0 729 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 730 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 731 d2.unit = "";
matthiasm@0 732 d2.hasFixedBinCount = true;
matthiasm@0 733 d2.binCount = 256;
matthiasm@0 734 d2.hasKnownExtents = false;
matthiasm@0 735 d2.isQuantized = false;
matthiasm@0 736 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 737 d2.hasDuration = false;
matthiasm@0 738 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 739 list.push_back(d2);
matthiasm@0 740
matthiasm@0 741 OutputDescriptor d3;
matthiasm@0 742 d3.identifier = "semitonespectrum";
matthiasm@0 743 d3.name = "Semitone Spectrum";
matthiasm@0 744 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 745 d3.unit = "";
matthiasm@0 746 d3.hasFixedBinCount = true;
matthiasm@0 747 d3.binCount = 84;
matthiasm@0 748 d3.hasKnownExtents = false;
matthiasm@0 749 d3.isQuantized = false;
matthiasm@0 750 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 751 d3.hasDuration = false;
matthiasm@0 752 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 753 list.push_back(d3);
matthiasm@0 754
matthiasm@0 755 OutputDescriptor d4;
matthiasm@0 756 d4.identifier = "chroma";
matthiasm@0 757 d4.name = "Chromagram";
matthiasm@0 758 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 759 d4.unit = "";
matthiasm@0 760 d4.hasFixedBinCount = true;
matthiasm@0 761 d4.binCount = 12;
matthiasm@0 762 d4.binNames = chromanames;
matthiasm@0 763 d4.hasKnownExtents = false;
matthiasm@0 764 d4.isQuantized = false;
matthiasm@0 765 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 766 d4.hasDuration = false;
matthiasm@0 767 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 768 list.push_back(d4);
matthiasm@0 769
matthiasm@0 770 OutputDescriptor d5;
matthiasm@0 771 d5.identifier = "basschroma";
matthiasm@0 772 d5.name = "Bass Chromagram";
matthiasm@0 773 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 774 d5.unit = "";
matthiasm@0 775 d5.hasFixedBinCount = true;
matthiasm@0 776 d5.binCount = 12;
matthiasm@0 777 d5.binNames = chromanames;
matthiasm@0 778 d5.hasKnownExtents = false;
matthiasm@0 779 d5.isQuantized = false;
matthiasm@0 780 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 781 d5.hasDuration = false;
matthiasm@0 782 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 783 list.push_back(d5);
matthiasm@0 784
matthiasm@0 785 OutputDescriptor d6;
matthiasm@0 786 d6.identifier = "bothchroma";
matthiasm@0 787 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 788 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 789 d6.unit = "";
matthiasm@0 790 d6.hasFixedBinCount = true;
matthiasm@0 791 d6.binCount = 24;
matthiasm@0 792 d6.binNames = bothchromanames;
matthiasm@0 793 d6.hasKnownExtents = false;
matthiasm@0 794 d6.isQuantized = false;
matthiasm@0 795 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 796 d6.hasDuration = false;
matthiasm@0 797 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 798 list.push_back(d6);
matthiasm@0 799
matthiasm@0 800 OutputDescriptor d7;
matthiasm@0 801 d7.identifier = "simplechord";
matthiasm@0 802 d7.name = "Simple Chord Estimate";
matthiasm@0 803 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 804 d7.unit = "";
matthiasm@0 805 d7.hasFixedBinCount = true;
matthiasm@0 806 d7.binCount = 0;
matthiasm@0 807 d7.hasKnownExtents = false;
matthiasm@0 808 d7.isQuantized = false;
matthiasm@0 809 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 810 d7.hasDuration = false;
matthiasm@0 811 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 812 list.push_back(d7);
matthiasm@0 813
matthiasm@1 814 // OutputDescriptor d8;
matthiasm@1 815 // d8.identifier = "inconsistency";
matthiasm@1 816 // d8.name = "Harmonic inconsistency value";
matthiasm@1 817 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1 818 // d8.unit = "";
matthiasm@1 819 // d8.hasFixedBinCount = true;
matthiasm@1 820 // d8.binCount = 1;
matthiasm@1 821 // d8.hasKnownExtents = false;
matthiasm@1 822 // d8.isQuantized = false;
matthiasm@1 823 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 824 // d8.hasDuration = false;
matthiasm@1 825 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 826 // list.push_back(d8);
matthiasm@1 827 //
matthiasm@1 828 // OutputDescriptor d9;
matthiasm@1 829 // d9.identifier = "inconsistencysegment";
matthiasm@1 830 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1 831 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1 832 // d9.unit = "";
matthiasm@1 833 // d9.hasFixedBinCount = true;
matthiasm@1 834 // d9.binCount = 0;
matthiasm@1 835 // d9.hasKnownExtents = true;
matthiasm@1 836 // d9.minValue = 0.1;
matthiasm@1 837 // d9.maxValue = 0.9;
matthiasm@1 838 // d9.isQuantized = false;
matthiasm@1 839 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1 840 // d9.hasDuration = false;
matthiasm@1 841 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 842 // list.push_back(d9);
matthiasm@1 843 //
matthiasm@1 844 OutputDescriptor d10;
matthiasm@1 845 d10.identifier = "localtuning";
matthiasm@1 846 d10.name = "Local tuning";
matthiasm@4 847 d10.description = "Tuning based on the history up to this timestamp.";
matthiasm@1 848 d10.unit = "Hz";
matthiasm@1 849 d10.hasFixedBinCount = true;
matthiasm@1 850 d10.binCount = 1;
matthiasm@1 851 d10.hasKnownExtents = true;
matthiasm@1 852 d10.minValue = 427.47;
matthiasm@1 853 d10.maxValue = 452.89;
matthiasm@1 854 d10.isQuantized = false;
matthiasm@3 855 d10.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 856 d10.hasDuration = false;
matthiasm@3 857 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 858 list.push_back(d10);
matthiasm@1 859
matthiasm@0 860 return list;
matthiasm@0 861 }
matthiasm@0 862
matthiasm@0 863
matthiasm@0 864 bool
matthiasm@0 865 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 866 {
matthiasm@1 867 if (debug_on) {
matthiasm@1 868 cerr << "--> initialise";
matthiasm@1 869 }
matthiasm@1 870
matthiasm@0 871 if (channels < getMinChannelCount() ||
matthiasm@0 872 channels > getMaxChannelCount()) return false;
matthiasm@0 873 m_blockSize = blockSize;
matthiasm@0 874 m_stepSize = stepSize;
matthiasm@0 875 frameCount = 0;
matthiasm@0 876 int tempn = 256 * m_blockSize/2;
matthiasm@4 877 // cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1 878 float *tempkernel;
matthiasm@1 879
matthiasm@1 880 tempkernel = new float[tempn];
matthiasm@1 881
matthiasm@0 882 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1 883 m_kernelValue.clear();
matthiasm@1 884 m_kernelFftIndex.clear();
matthiasm@1 885 m_kernelNoteIndex.clear();
matthiasm@1 886 int countNonzero = 0;
matthiasm@0 887 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1 888 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1 889 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 890 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0 891 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 892 countNonzero++;
matthiasm@0 893 }
matthiasm@1 894 m_kernelFftIndex.push_back(iFFT);
matthiasm@1 895 m_kernelNoteIndex.push_back(iNote);
matthiasm@0 896 }
matthiasm@0 897 }
matthiasm@1 898 }
matthiasm@4 899 // cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1 900 delete [] tempkernel;
matthiasm@3 901 ofstream myfile;
matthiasm@3 902 myfile.open ("matrix.txt");
matthiasm@3 903 // myfile << "Writing this to a file.\n";
matthiasm@3 904 for (int i = 0; i < nNote * 84; ++i) {
matthiasm@3 905 myfile << m_dict[i] << endl;
matthiasm@3 906 }
matthiasm@3 907 myfile.close();
matthiasm@0 908 return true;
matthiasm@0 909 }
matthiasm@0 910
matthiasm@0 911 void
matthiasm@0 912 NNLSChroma::reset()
matthiasm@0 913 {
matthiasm@4 914 if (debug_on) cerr << "--> reset";
matthiasm@4 915
matthiasm@0 916 // Clear buffers, reset stored values, etc
matthiasm@4 917 frameCount = 0;
matthiasm@4 918 m_dictID = 0;
matthiasm@4 919 m_fl.clear();
matthiasm@4 920 m_meanTuning0 = 0;
matthiasm@4 921 m_meanTuning1 = 0;
matthiasm@4 922 m_meanTuning2 = 0;
matthiasm@4 923 m_localTuning0 = 0;
matthiasm@4 924 m_localTuning1 = 0;
matthiasm@4 925 m_localTuning2 = 0;
matthiasm@4 926 m_localTuning.clear();
matthiasm@0 927 }
matthiasm@0 928
matthiasm@0 929 NNLSChroma::FeatureSet
matthiasm@0 930 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 931 {
matthiasm@4 932 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 933 frameCount++;
matthiasm@0 934 float *magnitude = new float[m_blockSize/2];
matthiasm@0 935
matthiasm@0 936 Feature f10; // local tuning
matthiasm@3 937 f10.hasTimestamp = true;
matthiasm@4 938 f10.timestamp = timestamp;
matthiasm@0 939 const float *fbuf = inputBuffers[0];
matthiasm@0 940
matthiasm@0 941 // make magnitude
matthiasm@0 942 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0 943 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0 944 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@0 945 }
matthiasm@4 946
matthiasm@0 947 // note magnitude mapping using pre-calculated matrix
matthiasm@0 948 float *nm = new float[nNote]; // note magnitude
matthiasm@0 949 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 950 nm[iNote] = 0; // initialise as 0
matthiasm@0 951 }
matthiasm@0 952 int binCount = 0;
matthiasm@0 953 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0 954 // cerr << ".";
matthiasm@1 955 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1 956 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0 957 binCount++;
matthiasm@0 958 }
matthiasm@1 959 // cerr << nm[20];
matthiasm@1 960 // cerr << endl;
matthiasm@0 961
matthiasm@0 962
matthiasm@0 963 float one_over_N = 1.0/frameCount;
matthiasm@0 964 // update means of complex tuning variables
matthiasm@0 965 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 966 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 967 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 968
matthiasm@0 969 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 970 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 971 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 972 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@3 973 float ratioOld = 0.997;
matthiasm@3 974 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 975 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 976 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 977 }
matthiasm@0 978
matthiasm@0 979 // if (m_tuneLocal) {
matthiasm@0 980 // local tuning
matthiasm@0 981 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0 982 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0 983 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0 984 m_localTuning.push_back(normalisedtuning);
matthiasm@0 985 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0 986 f10.values.push_back(tuning440);
matthiasm@3 987 // cerr << tuning440 << endl;
matthiasm@0 988 // }
matthiasm@0 989
matthiasm@0 990 Feature f1; // logfreqspec
matthiasm@0 991 f1.hasTimestamp = true;
matthiasm@0 992 f1.timestamp = timestamp;
matthiasm@0 993 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 994 f1.values.push_back(nm[iNote]);
matthiasm@0 995 }
matthiasm@0 996
matthiasm@0 997 FeatureSet fs;
matthiasm@0 998 fs[1].push_back(f1);
matthiasm@3 999 fs[8].push_back(f10);
matthiasm@0 1000
matthiasm@0 1001 // deletes
matthiasm@0 1002 delete[] magnitude;
matthiasm@0 1003 delete[] nm;
matthiasm@0 1004
matthiasm@0 1005 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@7 1006 char * pPath;
matthiasm@7 1007 pPath = getenv ("VAMP_PATH");
matthiasm@7 1008
matthiasm@7 1009
matthiasm@0 1010 return fs;
matthiasm@0 1011 }
matthiasm@0 1012
matthiasm@0 1013 NNLSChroma::FeatureSet
matthiasm@0 1014 NNLSChroma::getRemainingFeatures()
matthiasm@0 1015 {
matthiasm@4 1016 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@4 1017 FeatureSet fsOut;
matthiasm@4 1018 if (m_fl.size() == 0) return fsOut;
matthiasm@9 1019 int nChord = m_chordnames.size();
matthiasm@0 1020 //
matthiasm@1 1021 /** Calculate Tuning
matthiasm@1 1022 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1 1023 cumulative mean real and imag values)
matthiasm@1 1024 **/
matthiasm@1 1025 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1 1026 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1 1027 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1 1028 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1 1029 int intShift = floor(normalisedtuning * 3);
matthiasm@1 1030 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1031
matthiasm@1 1032 char buffer0 [50];
matthiasm@1 1033
matthiasm@1 1034 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 1035
matthiasm@1 1036 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 1037
matthiasm@1 1038 // push tuning to FeatureSet fsOut
matthiasm@1 1039 Feature f0; // tuning
matthiasm@1 1040 f0.hasTimestamp = true;
matthiasm@1 1041 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1 1042 f0.label = buffer0;
matthiasm@1 1043 fsOut[0].push_back(f0);
matthiasm@1 1044
matthiasm@1 1045 /** Tune Log-Frequency Spectrogram
matthiasm@1 1046 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1 1047 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@1 1048 **/
matthiasm@1 1049
matthiasm@1 1050 float tempValue = 0;
matthiasm@1 1051 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1 1052 float thresh = pow(10,dbThreshold/20);
matthiasm@1 1053 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1 1054 int count = 0;
matthiasm@1 1055
matthiasm@1 1056 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1 1057 Feature f1 = *i;
matthiasm@1 1058 Feature f2; // tuned log-frequency spectrum
matthiasm@1 1059 f2.hasTimestamp = true;
matthiasm@1 1060 f2.timestamp = f1.timestamp;
matthiasm@1 1061 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 1062
matthiasm@1 1063 if (m_tuneLocal) {
matthiasm@1 1064 intShift = floor(m_localTuning[count] * 3);
matthiasm@1 1065 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1066 }
matthiasm@1 1067
matthiasm@1 1068 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 1069
matthiasm@4 1070 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1 1071 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1 1072 f2.values.push_back(tempValue);
matthiasm@1 1073 }
matthiasm@1 1074
matthiasm@1 1075 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1 1076 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1 1077 vector<float> runningstd;
matthiasm@1 1078 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1 1079 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1 1080 }
matthiasm@1 1081 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1 1082 for (int i = 0; i < 256; i++) {
matthiasm@1 1083 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1 1084 if (runningstd[i] > 0) {
matthiasm@1 1085 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1 1086 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1087 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1 1088 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1089 }
matthiasm@1 1090 if (f2.values[i] < 0) {
matthiasm@1 1091 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1 1092 }
matthiasm@1 1093 }
matthiasm@1 1094 fsOut[2].push_back(f2);
matthiasm@1 1095 count++;
matthiasm@1 1096 }
matthiasm@1 1097
matthiasm@1 1098 /** Semitone spectrum and chromagrams
matthiasm@1 1099 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1 1100 is inferred using a non-negative least squares algorithm.
matthiasm@1 1101 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1 1102 bass and treble stacked onto each other).
matthiasm@1 1103 **/
matthiasm@1 1104 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n);
matthiasm@1 1105
matthiasm@1 1106 vector<vector<float> > chordogram;
matthiasm@3 1107 vector<vector<int> > scoreChordogram;
matthiasm@1 1108 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1 1109 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1 1110 count = 0;
matthiasm@9 1111
matthiasm@1 1112 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1 1113 Feature f2 = *it; // logfreq spectrum
matthiasm@1 1114 Feature f3; // semitone spectrum
matthiasm@1 1115 Feature f4; // treble chromagram
matthiasm@1 1116 Feature f5; // bass chromagram
matthiasm@1 1117 Feature f6; // treble and bass chromagram
matthiasm@1 1118
matthiasm@1 1119 f3.hasTimestamp = true;
matthiasm@1 1120 f3.timestamp = f2.timestamp;
matthiasm@1 1121
matthiasm@1 1122 f4.hasTimestamp = true;
matthiasm@1 1123 f4.timestamp = f2.timestamp;
matthiasm@1 1124
matthiasm@1 1125 f5.hasTimestamp = true;
matthiasm@1 1126 f5.timestamp = f2.timestamp;
matthiasm@1 1127
matthiasm@1 1128 f6.hasTimestamp = true;
matthiasm@1 1129 f6.timestamp = f2.timestamp;
matthiasm@1 1130
matthiasm@3 1131 float b[256];
matthiasm@1 1132
matthiasm@1 1133 bool some_b_greater_zero = false;
matthiasm@3 1134 float sumb = 0;
matthiasm@1 1135 for (int i = 0; i < 256; i++) {
matthiasm@3 1136 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@3 1137 b[i] = f2.values[i];
matthiasm@3 1138 sumb += b[i];
matthiasm@1 1139 if (b[i] > 0) {
matthiasm@1 1140 some_b_greater_zero = true;
matthiasm@1 1141 }
matthiasm@1 1142 }
matthiasm@1 1143
matthiasm@1 1144 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 1145
matthiasm@1 1146 vector<float> chroma = vector<float>(12, 0);
matthiasm@1 1147 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1 1148 float currval;
matthiasm@1 1149 unsigned iSemitone = 0;
matthiasm@1 1150
matthiasm@1 1151 if (some_b_greater_zero) {
matthiasm@3 1152 if (m_dictID == 1) {
matthiasm@1 1153 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1 1154 currval = 0;
matthiasm@3 1155 currval += b[iNote + 1 + -1] * 0.5;
matthiasm@3 1156 currval += b[iNote + 1 + 0] * 1.0;
matthiasm@3 1157 currval += b[iNote + 1 + 1] * 0.5;
matthiasm@1 1158 f3.values.push_back(currval);
matthiasm@1 1159 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1 1160 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1 1161 iSemitone++;
matthiasm@1 1162 }
matthiasm@1 1163
matthiasm@1 1164 } else {
matthiasm@3 1165 float x[84+1000];
matthiasm@3 1166 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
matthiasm@10 1167 vector<int> signifIndex;
matthiasm@10 1168 int index=0;
matthiasm@10 1169 sumb /= 84.0;
matthiasm@10 1170 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@10 1171 float currval = 0;
matthiasm@10 1172 currval += b[iNote + 1 + -1];
matthiasm@10 1173 currval += b[iNote + 1 + 0];
matthiasm@10 1174 currval += b[iNote + 1 + 1];
matthiasm@10 1175 if (currval > 0) signifIndex.push_back(index);
matthiasm@10 1176 f3.values.push_back(0); // fill the values, change later
matthiasm@10 1177 index++;
matthiasm@10 1178 }
matthiasm@3 1179 float rnorm;
matthiasm@3 1180 float w[84+1000];
matthiasm@3 1181 float zz[84+1000];
matthiasm@3 1182 int indx[84+1000];
matthiasm@1 1183 int mode;
matthiasm@10 1184 int dictsize = 256*signifIndex.size();
matthiasm@10 1185 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
matthiasm@10 1186 float *curr_dict = new float[dictsize];
matthiasm@10 1187 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
matthiasm@10 1188 for (unsigned iBin = 0; iBin < 256; iBin++) {
matthiasm@10 1189 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
matthiasm@10 1190 }
matthiasm@3 1191 }
matthiasm@10 1192 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
matthiasm@10 1193 delete [] curr_dict;
matthiasm@10 1194 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
matthiasm@10 1195 f3.values[signifIndex[iNote]] = x[iNote];
matthiasm@3 1196 // cerr << mode << endl;
matthiasm@10 1197 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
matthiasm@10 1198 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
matthiasm@3 1199 }
matthiasm@1 1200 }
matthiasm@1 1201 }
matthiasm@10 1202
matthiasm@12 1203
matthiasm@12 1204 if (m_doNormalizeChroma > 0) {
matthiasm@12 1205 float chromamax = *max_element(chroma.begin(), chroma.end());
matthiasm@12 1206 for (int i = 0; i < chroma.size(); i++) {
matthiasm@12 1207 chroma[i] /= chromamax;
matthiasm@12 1208 }
matthiasm@12 1209 }
matthiasm@12 1210 f4.values = chroma;
matthiasm@1 1211 f5.values = basschroma;
matthiasm@1 1212 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1 1213 f6.values = chroma;
matthiasm@1 1214
matthiasm@1 1215 // local chord estimation
matthiasm@1 1216 vector<float> currentChordSalience;
matthiasm@1 1217 float tempchordvalue = 0;
matthiasm@1 1218 float sumchordvalue = 0;
matthiasm@9 1219
matthiasm@1 1220 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1221 tempchordvalue = 0;
matthiasm@1 1222 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@9 1223 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1224 }
matthiasm@1 1225 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@9 1226 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1227 }
matthiasm@1 1228 sumchordvalue+=tempchordvalue;
matthiasm@1 1229 currentChordSalience.push_back(tempchordvalue);
matthiasm@1 1230 }
matthiasm@1 1231 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1232 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1 1233 }
matthiasm@1 1234 chordogram.push_back(currentChordSalience);
matthiasm@1 1235
matthiasm@1 1236 fsOut[3].push_back(f3);
matthiasm@1 1237 fsOut[4].push_back(f4);
matthiasm@1 1238 fsOut[5].push_back(f5);
matthiasm@1 1239 fsOut[6].push_back(f6);
matthiasm@1 1240 count++;
matthiasm@1 1241 }
matthiasm@10 1242 cerr << "******* NNLS done *******" << endl;
matthiasm@10 1243
matthiasm@3 1244 /* Simple chord estimation
matthiasm@3 1245 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@3 1246 take the maximum. Very simple, don't do this at home...
matthiasm@3 1247 */
matthiasm@3 1248 count = 0;
matthiasm@3 1249 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@3 1250 vector<int> chordSequence;
matthiasm@3 1251 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
matthiasm@3 1252 vector<int> temp = vector<int>(nChord,0);
matthiasm@3 1253 scoreChordogram.push_back(temp);
matthiasm@3 1254 }
matthiasm@4 1255 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
matthiasm@3 1256 int startIndex = count + 1;
matthiasm@3 1257 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 1258
matthiasm@10 1259 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 1260
matthiasm@10 1261 vector<int> chordCandidates;
matthiasm@10 1262 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@10 1263 // float currsum = 0;
matthiasm@10 1264 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@10 1265 // currsum += chordogram[iFrame][iChord];
matthiasm@10 1266 // }
matthiasm@10 1267 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@10 1268 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@10 1269 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@10 1270 chordCandidates.push_back(iChord);
matthiasm@10 1271 break;
matthiasm@10 1272 }
matthiasm@10 1273 }
matthiasm@10 1274 }
matthiasm@10 1275 chordCandidates.push_back(nChord-1);
matthiasm@10 1276 // cerr << chordCandidates.size() << endl;
matthiasm@10 1277
matthiasm@10 1278 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@4 1279 float maxindex = 0; //... and the index thereof
matthiasm@10 1280 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@10 1281 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@10 1282
matthiasm@4 1283 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@3 1284 // now find the max values on both sides of iWF
matthiasm@3 1285 // left side:
matthiasm@3 1286 float maxL = 0;
matthiasm@3 1287 unsigned maxindL = nChord-1;
matthiasm@10 1288 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@10 1289 unsigned iChord = chordCandidates[kChord];
matthiasm@3 1290 float currsum = 0;
matthiasm@3 1291 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@3 1292 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1293 }
matthiasm@3 1294 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1295 if (currsum > maxL) {
matthiasm@3 1296 maxL = currsum;
matthiasm@3 1297 maxindL = iChord;
matthiasm@3 1298 }
matthiasm@3 1299 }
matthiasm@3 1300 // right side:
matthiasm@3 1301 float maxR = 0;
matthiasm@3 1302 unsigned maxindR = nChord-1;
matthiasm@10 1303 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@10 1304 unsigned iChord = chordCandidates[kChord];
matthiasm@3 1305 float currsum = 0;
matthiasm@3 1306 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1307 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1308 }
matthiasm@3 1309 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1310 if (currsum > maxR) {
matthiasm@3 1311 maxR = currsum;
matthiasm@3 1312 maxindR = iChord;
matthiasm@3 1313 }
matthiasm@3 1314 }
matthiasm@3 1315 if (maxL+maxR > maxval) {
matthiasm@3 1316 maxval = maxL+maxR;
matthiasm@3 1317 maxindex = iWF;
matthiasm@3 1318 bestchordL = maxindL;
matthiasm@3 1319 bestchordR = maxindR;
matthiasm@3 1320 }
matthiasm@3 1321
matthiasm@3 1322 }
matthiasm@3 1323 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@3 1324 // add a score to every chord-frame-point that was part of a maximum
matthiasm@3 1325 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@3 1326 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@3 1327 }
matthiasm@3 1328 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1329 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@3 1330 }
matthiasm@3 1331 count++;
matthiasm@3 1332 }
matthiasm@10 1333 cerr << "******* agent finished *******" << endl;
matthiasm@3 1334 count = 0;
matthiasm@3 1335 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1336 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3 1337 float maxindex = 0; //... and the index thereof
matthiasm@3 1338 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3 1339 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@3 1340 maxval = scoreChordogram[count][iChord];
matthiasm@3 1341 maxindex = iChord;
matthiasm@4 1342 // cerr << iChord << endl;
matthiasm@3 1343 }
matthiasm@3 1344 }
matthiasm@3 1345 chordSequence.push_back(maxindex);
matthiasm@4 1346 // cerr << "before modefilter, maxindex: " << maxindex << endl;
matthiasm@3 1347 count++;
matthiasm@3 1348 }
matthiasm@10 1349 cerr << "******* mode filter done *******" << endl;
matthiasm@10 1350
matthiasm@3 1351
matthiasm@3 1352 // mode filter on chordSequence
matthiasm@3 1353 count = 0;
matthiasm@12 1354 string oldChord = "";
matthiasm@3 1355 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1356 Feature f6 = *it;
matthiasm@3 1357 Feature f7; // chord estimate
matthiasm@3 1358 f7.hasTimestamp = true;
matthiasm@3 1359 f7.timestamp = f6.timestamp;
matthiasm@3 1360 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@3 1361 int maxChordCount = 0;
matthiasm@3 1362 int maxChordIndex = nChord-1;
matthiasm@12 1363 string maxChord;
matthiasm@4 1364 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@4 1365 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@4 1366 for (int i = startIndex; i < endIndex; i++) {
matthiasm@4 1367 chordCount[chordSequence[i]]++;
matthiasm@4 1368 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@7 1369 // cerr << "start index " << startIndex << endl;
matthiasm@4 1370 maxChordCount++;
matthiasm@4 1371 maxChordIndex = chordSequence[i];
matthiasm@12 1372 maxChord = m_chordnames[maxChordIndex];
matthiasm@4 1373 }
matthiasm@4 1374 }
matthiasm@4 1375 // chordSequence[count] = maxChordIndex;
matthiasm@7 1376 // cerr << maxChordIndex << endl;
matthiasm@12 1377 if (oldChord != maxChord) {
matthiasm@12 1378 oldChord = maxChord;
matthiasm@3 1379
matthiasm@9 1380 // char buffer1 [50];
matthiasm@9 1381 // if (maxChordIndex < nChord - 1) {
matthiasm@9 1382 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@9 1383 // } else {
matthiasm@9 1384 // sprintf(buffer1, "N");
matthiasm@9 1385 // }
matthiasm@9 1386 // f7.label = buffer1;
matthiasm@9 1387 f7.label = m_chordnames[maxChordIndex];
matthiasm@3 1388 fsOut[7].push_back(f7);
matthiasm@3 1389 }
matthiasm@3 1390 count++;
matthiasm@3 1391 }
matthiasm@0 1392 // // musicity
matthiasm@0 1393 // count = 0;
matthiasm@0 1394 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0 1395 // vector<float> musicityValue;
matthiasm@0 1396 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1397 // Feature f4 = *it;
matthiasm@0 1398 //
matthiasm@0 1399 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1400 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1401 // float chromasum = 0;
matthiasm@0 1402 // float diffsum = 0;
matthiasm@0 1403 // for (int k = 0; k < 12; k++) {
matthiasm@0 1404 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0 1405 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0 1406 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0 1407 // }
matthiasm@0 1408 // }
matthiasm@0 1409 // diffsum /= chromasum;
matthiasm@0 1410 // musicityValue.push_back(diffsum);
matthiasm@0 1411 // count++;
matthiasm@0 1412 // }
matthiasm@0 1413 //
matthiasm@0 1414 // float musicityThreshold = 0.44;
matthiasm@0 1415 // if (m_stepSize == 4096) {
matthiasm@0 1416 // musicityThreshold = 0.74;
matthiasm@0 1417 // }
matthiasm@0 1418 // if (m_stepSize == 4410) {
matthiasm@0 1419 // musicityThreshold = 0.77;
matthiasm@0 1420 // }
matthiasm@0 1421 //
matthiasm@0 1422 // count = 0;
matthiasm@0 1423 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1424 // Feature f4 = *it;
matthiasm@0 1425 // Feature f8; // musicity
matthiasm@0 1426 // Feature f9; // musicity segmenter
matthiasm@0 1427 //
matthiasm@0 1428 // f8.hasTimestamp = true;
matthiasm@0 1429 // f8.timestamp = f4.timestamp;
matthiasm@0 1430 // f9.hasTimestamp = true;
matthiasm@0 1431 // f9.timestamp = f4.timestamp;
matthiasm@0 1432 //
matthiasm@0 1433 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1434 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1435 // int musicityCount = 0;
matthiasm@0 1436 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0 1437 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0 1438 // }
matthiasm@0 1439 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0 1440 //
matthiasm@0 1441 // if (isSpeech) {
matthiasm@0 1442 // if (oldlabeltype != 2) {
matthiasm@0 1443 // f9.label = "Speech";
matthiasm@0 1444 // fsOut[9].push_back(f9);
matthiasm@0 1445 // oldlabeltype = 2;
matthiasm@0 1446 // }
matthiasm@0 1447 // } else {
matthiasm@0 1448 // if (oldlabeltype != 1) {
matthiasm@0 1449 // f9.label = "Music";
matthiasm@0 1450 // fsOut[9].push_back(f9);
matthiasm@0 1451 // oldlabeltype = 1;
matthiasm@0 1452 // }
matthiasm@0 1453 // }
matthiasm@0 1454 // f8.values.push_back(musicityValue[count]);
matthiasm@0 1455 // fsOut[8].push_back(f8);
matthiasm@0 1456 // count++;
matthiasm@0 1457 // }
matthiasm@0 1458 return fsOut;
matthiasm@0 1459
matthiasm@0 1460 }
matthiasm@0 1461