annotate NNLSChroma.cpp @ 13:9ae90fa5fa74 matthiasm-plugin

NNLS is now taken from a file without gpl. more chroma normalisation options.
author matthiasm
date Wed, 16 Jun 2010 10:16:13 +0000
parents 54f28d8ac098
children 75fb80542cd2
rev   line source
matthiasm@0 1
matthiasm@0 2 #include "NNLSChroma.h"
matthiasm@0 3 #include <cmath>
matthiasm@10 4 // #include <omp.h>
matthiasm@0 5 #include <list>
matthiasm@0 6 #include <iostream>
matthiasm@3 7 #include <fstream>
matthiasm@0 8 #include <sstream>
matthiasm@0 9 #include <cassert>
matthiasm@7 10 #include <cstdlib>
matthiasm@0 11 #include <cstdio>
matthiasm@7 12 #include <boost/tokenizer.hpp>
matthiasm@7 13 #include <boost/iostreams/device/file.hpp>
matthiasm@7 14 #include <boost/iostreams/stream.hpp>
matthiasm@7 15 #include <boost/lexical_cast.hpp>
matthiasm@1 16 #include "nnls.h"
matthiasm@0 17 #include "chorddict.cpp"
matthiasm@9 18
matthiasm@10 19 // #include <omp.h>
matthiasm@10 20 // #define N 1000
matthiasm@10 21 // #define CHUNKSIZE 100
matthiasm@9 22
matthiasm@9 23
matthiasm@0 24 using namespace std;
matthiasm@7 25 using namespace boost;
matthiasm@0 26
matthiasm@0 27 const float sinvalue = 0.866025404;
matthiasm@0 28 const float cosvalue = -0.5;
matthiasm@0 29 const float hammingwind[19] = {0.0082, 0.0110, 0.0191, 0.0316, 0.0470, 0.0633, 0.0786, 0.0911, 0.0992, 0.1020, 0.0992, 0.0911, 0.0786, 0.0633, 0.0470, 0.0316, 0.0191, 0.0110, 0.0082};
matthiasm@0 30 const float basswindow[] = {0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.995091, 0.976388, 0.944223, 0.899505, 0.843498, 0.777785, 0.704222, 0.624888, 0.542025, 0.457975, 0.375112, 0.295778, 0.222215, 0.156502, 0.100495, 0.055777, 0.023612, 0.004909, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000};
matthiasm@0 31 const float treblewindow[] = {0.000350, 0.003144, 0.008717, 0.017037, 0.028058, 0.041719, 0.057942, 0.076638, 0.097701, 0.121014, 0.146447, 0.173856, 0.203090, 0.233984, 0.266366, 0.300054, 0.334860, 0.370590, 0.407044, 0.444018, 0.481304, 0.518696, 0.555982, 0.592956, 0.629410, 0.665140, 0.699946, 0.733634, 0.766016, 0.796910, 0.826144, 0.853553, 0.878986, 0.902299, 0.923362, 0.942058, 0.958281, 0.971942, 0.982963, 0.991283, 0.996856, 0.999650, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350};
matthiasm@0 32 const char* notenames[24] = {"A (bass)","Bb (bass)","B (bass)","C (bass)","C# (bass)","D (bass)","Eb (bass)","E (bass)","F (bass)","F# (bass)","G (bass)","Ab (bass)",
matthiasm@0 33 "A","Bb","B","C","C#","D","Eb","E","F","F#","G","Ab"};
matthiasm@7 34
matthiasm@7 35 const char* bassnames[12][12] ={
matthiasm@7 36 {"A","","B","C","C#","D","","E","","F#","G","G#"},
matthiasm@7 37 {"Bb","","C","Db","D","Eb","","F","","G","Ab","A"},
matthiasm@7 38 {"B","","C#","D","D#","E","","F#","","G#","A","A#"},
matthiasm@7 39 {"C","","D","Eb","E","F","","G","","A","Bb","B"},
matthiasm@7 40 {"C#","","D#","E","E#","F#","","G#","","A#","B","B#"},
matthiasm@7 41 {"D","","E","F","F#","G","","A","","B","C","C#"},
matthiasm@7 42 {"Eb","","F","Gb","G","Ab","","Bb","","C","Db","D"},
matthiasm@7 43 {"E","","F#","G","G#","A","","B","","C#","D","D#"},
matthiasm@7 44 {"F","","G","Ab","A","Bb","","C","","D","Eb","E"},
matthiasm@7 45 {"F#","","G#","A","A#","B","","C#","","D#","E","E#"},
matthiasm@7 46 {"G","","A","Bb","B","C","","D","","E","F","F#"},
matthiasm@7 47 {"Ab","","Bb","Cb","C","Db","","Eb","","F","Gb","G"}
matthiasm@7 48 };
matthiasm@0 49 const vector<float> hw(hammingwind, hammingwind+19);
matthiasm@0 50 const int nNote = 256;
matthiasm@0 51
matthiasm@0 52 /** Special Convolution
matthiasm@0 53 special convolution is as long as the convolvee, i.e. the first argument. in the valid core part of the
matthiasm@0 54 convolution it contains the usual convolution values, but the pads at the beginning (ending) have the same values
matthiasm@0 55 as the first (last) valid convolution bin.
matthiasm@0 56 **/
matthiasm@0 57
matthiasm@0 58 const bool debug_on = false;
matthiasm@0 59
matthiasm@0 60 vector<float> SpecialConvolution(vector<float> convolvee, vector<float> kernel)
matthiasm@0 61 {
matthiasm@0 62 float s;
matthiasm@0 63 int m, n;
matthiasm@0 64 int lenConvolvee = convolvee.size();
matthiasm@0 65 int lenKernel = kernel.size();
matthiasm@0 66
matthiasm@0 67 vector<float> Z(256,0);
matthiasm@0 68 assert(lenKernel % 2 != 0); // no exception handling !!!
matthiasm@0 69
matthiasm@0 70 for (n = lenKernel - 1; n < lenConvolvee; n++) {
matthiasm@0 71 s=0.0;
matthiasm@0 72 for (m = 0; m < lenKernel; m++) {
matthiasm@0 73 // cerr << "m = " << m << ", n = " << n << ", n-m = " << (n-m) << '\n';
matthiasm@0 74 s += convolvee[n-m] * kernel[m];
matthiasm@0 75 // if (debug_on) cerr << "--> s = " << s << '\n';
matthiasm@0 76 }
matthiasm@0 77 // cerr << n - lenKernel/2 << endl;
matthiasm@0 78 Z[n -lenKernel/2] = s;
matthiasm@0 79 }
matthiasm@0 80
matthiasm@0 81 // fill upper and lower pads
matthiasm@0 82 for (n = 0; n < lenKernel/2; n++) Z[n] = Z[lenKernel/2];
matthiasm@0 83 for (n = lenConvolvee; n < lenConvolvee +lenKernel/2; n++) Z[n - lenKernel/2] =
matthiasm@0 84 Z[lenConvolvee - lenKernel/2 - 1];
matthiasm@0 85 return Z;
matthiasm@0 86 }
matthiasm@0 87
matthiasm@0 88 // vector<float> FftBin2Frequency(vector<float> binnumbers, int fs, int blocksize)
matthiasm@0 89 // {
matthiasm@0 90 // vector<float> freq(binnumbers.size, 0.0);
matthiasm@0 91 // for (unsigned i = 0; i < binnumbers.size; ++i) {
matthiasm@0 92 // freq[i] = (binnumbers[i]-1.0) * fs * 1.0 / blocksize;
matthiasm@0 93 // }
matthiasm@0 94 // return freq;
matthiasm@0 95 // }
matthiasm@0 96
matthiasm@0 97 float cospuls(float x, float centre, float width)
matthiasm@0 98 {
matthiasm@0 99 float recipwidth = 1.0/width;
matthiasm@0 100 if (abs(x - centre) <= 0.5 * width) {
matthiasm@0 101 return cos((x-centre)*2*M_PI*recipwidth)*.5+.5;
matthiasm@0 102 }
matthiasm@0 103 return 0.0;
matthiasm@0 104 }
matthiasm@0 105
matthiasm@0 106 float pitchCospuls(float x, float centre, int binsperoctave)
matthiasm@0 107 {
matthiasm@0 108 float warpedf = -binsperoctave * (log2(centre) - log2(x));
matthiasm@0 109 float out = cospuls(warpedf, 0.0, 2.0);
matthiasm@0 110 // now scale to correct for note density
matthiasm@0 111 float c = log(2.0)/binsperoctave;
matthiasm@0 112 if (x > 0) {
matthiasm@0 113 out = out / (c * x);
matthiasm@0 114 } else {
matthiasm@0 115 out = 0;
matthiasm@0 116 }
matthiasm@0 117 return out;
matthiasm@0 118 }
matthiasm@0 119
matthiasm@0 120 bool logFreqMatrix(int fs, int blocksize, float *outmatrix) {
matthiasm@0 121
matthiasm@0 122 int binspersemitone = 3; // this must be 3
matthiasm@0 123 int minoctave = 0; // this must be 0
matthiasm@0 124 int maxoctave = 7; // this must be 7
matthiasm@1 125 int oversampling = 80;
matthiasm@0 126
matthiasm@0 127 // linear frequency vector
matthiasm@0 128 vector<float> fft_f;
matthiasm@0 129 for (int i = 0; i < blocksize/2; ++i) {
matthiasm@0 130 fft_f.push_back(i * (fs * 1.0 / blocksize));
matthiasm@0 131 }
matthiasm@0 132 float fft_width = fs * 2.0 / blocksize;
matthiasm@0 133
matthiasm@0 134 // linear oversampled frequency vector
matthiasm@0 135 vector<float> oversampled_f;
matthiasm@0 136 for (unsigned int i = 0; i < oversampling * blocksize/2; ++i) {
matthiasm@0 137 oversampled_f.push_back(i * ((fs * 1.0 / blocksize) / oversampling));
matthiasm@0 138 }
matthiasm@0 139
matthiasm@0 140 // pitch-spaced frequency vector
matthiasm@0 141 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@0 142 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@0 143 vector<float> cq_f;
matthiasm@0 144 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@0 145 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@0 146 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@0 147 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@0 148 for (int k = -1; k < 2; ++k) {
matthiasm@0 149 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@0 150 }
matthiasm@0 151 }
matthiasm@0 152 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@0 153 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@0 154
matthiasm@0 155 int nFFT = fft_f.size();
matthiasm@0 156
matthiasm@0 157 vector<float> fft_activation;
matthiasm@0 158 for (int iOS = 0; iOS < 2 * oversampling; ++iOS) {
matthiasm@0 159 float cosp = cospuls(oversampled_f[iOS],fft_f[1],fft_width);
matthiasm@0 160 fft_activation.push_back(cosp);
matthiasm@0 161 // cerr << cosp << endl;
matthiasm@0 162 }
matthiasm@0 163
matthiasm@0 164 float cq_activation;
matthiasm@0 165 for (int iFFT = 1; iFFT < nFFT; ++iFFT) {
matthiasm@0 166 // find frequency stretch where the oversampled vector can be non-zero (i.e. in a window of width fft_width around the current frequency)
matthiasm@0 167 int curr_start = oversampling * iFFT - oversampling;
matthiasm@0 168 int curr_end = oversampling * iFFT + oversampling; // don't know if I should add "+1" here
matthiasm@0 169 // cerr << oversampled_f[curr_start] << " " << fft_f[iFFT] << " " << oversampled_f[curr_end] << endl;
matthiasm@0 170 for (unsigned iCQ = 0; iCQ < cq_f.size(); ++iCQ) {
matthiasm@0 171 outmatrix[iFFT + nFFT * iCQ] = 0;
matthiasm@1 172 if (cq_f[iCQ] * pow(2.0, 0.084) + fft_width > fft_f[iFFT] && cq_f[iCQ] * pow(2.0, -0.084 * 2) - fft_width < fft_f[iFFT]) { // within a generous neighbourhood
matthiasm@0 173 for (int iOS = curr_start; iOS < curr_end; ++iOS) {
matthiasm@0 174 cq_activation = pitchCospuls(oversampled_f[iOS],cq_f[iCQ],binspersemitone*12);
matthiasm@0 175 // cerr << oversampled_f[iOS] << " " << cq_f[iCQ] << " " << cq_activation << endl;
matthiasm@0 176 outmatrix[iFFT + nFFT * iCQ] += cq_activation * fft_activation[iOS-curr_start];
matthiasm@0 177 }
matthiasm@0 178 // if (iCQ == 1 || iCQ == 2) {
matthiasm@0 179 // cerr << " " << outmatrix[iFFT + nFFT * iCQ] << endl;
matthiasm@0 180 // }
matthiasm@0 181 }
matthiasm@0 182 }
matthiasm@0 183 }
matthiasm@0 184 return true;
matthiasm@0 185 }
matthiasm@0 186
matthiasm@3 187 bool dictionaryMatrix(float* dm) {
matthiasm@1 188 int binspersemitone = 3; // this must be 3
matthiasm@1 189 int minoctave = 0; // this must be 0
matthiasm@1 190 int maxoctave = 7; // this must be 7
matthiasm@4 191 float s_param = 0.7;
matthiasm@1 192
matthiasm@1 193 // pitch-spaced frequency vector
matthiasm@1 194 int minMIDI = 21 + minoctave * 12 - 1; // this includes one additional semitone!
matthiasm@1 195 int maxMIDI = 21 + maxoctave * 12; // this includes one additional semitone!
matthiasm@1 196 vector<float> cq_f;
matthiasm@1 197 float oob = 1.0/binspersemitone; // one over binspersemitone
matthiasm@1 198 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-69))); // 0.083333 is approx 1/12
matthiasm@1 199 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI+oob-69)));
matthiasm@1 200 for (int i = minMIDI + 1; i < maxMIDI; ++i) {
matthiasm@1 201 for (int k = -1; k < 2; ++k) {
matthiasm@1 202 cq_f.push_back(440 * pow(2.0,0.083333333333 * (i+oob*k-69)));
matthiasm@1 203 }
matthiasm@1 204 }
matthiasm@1 205 cq_f.push_back(440 * pow(2.0,0.083333 * (minMIDI-oob-69)));
matthiasm@1 206 cq_f.push_back(440 * pow(2.0,0.083333 * (maxMIDI-69)));
matthiasm@1 207
matthiasm@1 208 float curr_f;
matthiasm@1 209 float floatbin;
matthiasm@1 210 float curr_amp;
matthiasm@1 211 // now for every combination calculate the matrix element
matthiasm@1 212 for (unsigned iOut = 0; iOut < 12 * (maxoctave - minoctave); ++iOut) {
matthiasm@3 213 // cerr << iOut << endl;
matthiasm@1 214 for (unsigned iHarm = 1; iHarm <= 20; ++iHarm) {
matthiasm@1 215 curr_f = 440 * pow(2,(minMIDI-69+iOut)*1.0/12) * iHarm;
matthiasm@3 216 // if (curr_f > cq_f[nNote-1]) break;
matthiasm@3 217 floatbin = ((iOut + 1) * binspersemitone + 1) + binspersemitone * 12 * log2(iHarm);
matthiasm@3 218 // cerr << floatbin << endl;
matthiasm@1 219 curr_amp = pow(s_param,float(iHarm-1));
matthiasm@3 220 // cerr << "curramp" << curr_amp << endl;
matthiasm@1 221 for (unsigned iNote = 0; iNote < nNote; ++iNote) {
matthiasm@3 222 if (abs(iNote+1.0-floatbin)<2) {
matthiasm@3 223 dm[iNote + 256 * iOut] += cospuls(iNote+1.0, floatbin, binspersemitone + 0.0) * curr_amp;
matthiasm@3 224 // dm[iNote + nNote * iOut] += 1 * curr_amp;
matthiasm@3 225 }
matthiasm@1 226 }
matthiasm@3 227 }
matthiasm@1 228 }
matthiasm@3 229
matthiasm@3 230
matthiasm@1 231 }
matthiasm@1 232
matthiasm@7 233 string get_env_var( std::string const & key ) {
matthiasm@7 234 char * val;
matthiasm@7 235 val = getenv( key.c_str() );
matthiasm@7 236 string retval;
matthiasm@7 237 if (val != NULL) {
matthiasm@7 238 retval = val;
matthiasm@7 239 }
matthiasm@7 240 return retval;
matthiasm@7 241 }
matthiasm@7 242
matthiasm@7 243
matthiasm@9 244 vector<string> chordDictionary(vector<float> *mchorddict) {
matthiasm@7 245 // ifstream chordDictFile;
matthiasm@7 246 string chordDictFilename(get_env_var("VAMP_PATH")+"/chord.dict");
matthiasm@7 247 // string instring[] = ",1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\nm,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0\n6,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0\n7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0\nmaj7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,1\nmin7,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0\n,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\n,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0\ndim,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0\naug,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0\n";
matthiasm@7 248 typedef tokenizer<char_separator<char> > Tok;
matthiasm@7 249 // char_separator<char> sep; // default constructed
matthiasm@7 250 char_separator<char> sep(",; ",":");
matthiasm@7 251 iostreams::stream<iostreams::file_source> chordDictFile(chordDictFilename.c_str());
matthiasm@7 252 string line;
matthiasm@7 253 int iElement = 0;
matthiasm@7 254 int nChord = 0;
matthiasm@7 255
matthiasm@7 256 vector<string> loadedChordNames;
matthiasm@7 257 vector<float> loadedChordDict;
matthiasm@7 258 if (chordDictFile.is_open()) {
matthiasm@7 259 while (std::getline(chordDictFile, line)) { // loop over lines in chord.dict file
matthiasm@7 260 // first, get the chord definition
matthiasm@7 261 string chordType;
matthiasm@7 262 vector<float> tempPCVector;
matthiasm@7 263 // cerr << line << endl;
matthiasm@7 264 if (!line.empty() && line.substr(0,1) != "#") {
matthiasm@7 265 Tok tok(line, sep);
matthiasm@7 266 for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter) { // loop over line elements
matthiasm@7 267 string tempString = *tok_iter;
matthiasm@7 268 // cerr << tempString << endl;
matthiasm@7 269 if (tok_iter == tok.begin()) { // either the chord name or a colon
matthiasm@7 270 if (tempString == ":") {
matthiasm@7 271 chordType = "";
matthiasm@7 272 } else {
matthiasm@7 273 chordType = tempString;
matthiasm@7 274 tok_iter++; // is this cheating ? :)
matthiasm@7 275 }
matthiasm@7 276 } else {
matthiasm@7 277 tempPCVector.push_back(lexical_cast<float>(*tok_iter));
matthiasm@7 278 }
matthiasm@7 279 }
matthiasm@7 280
matthiasm@7 281 // now make all 12 chords of every type
matthiasm@7 282 for (unsigned iSemitone = 0; iSemitone < 12; iSemitone++) {
matthiasm@7 283 // add bass slash notation
matthiasm@7 284 string slashNotation = "";
matthiasm@7 285 for (unsigned kSemitone = 1; kSemitone < 12; kSemitone++) {
matthiasm@7 286 if (tempPCVector[(kSemitone) % 12] > 0.99) {
matthiasm@7 287 slashNotation = bassnames[iSemitone][kSemitone];
matthiasm@7 288 }
matthiasm@7 289 }
matthiasm@7 290 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // bass pitch classes
matthiasm@9 291 // cerr << ((kSemitone - iSemitone + 12) % 12) << endl;
matthiasm@9 292 float bassValue = 0;
matthiasm@9 293 if (tempPCVector[(kSemitone - iSemitone + 12) % 12]==1) {
matthiasm@9 294 bassValue = 1;
matthiasm@9 295 } else {
matthiasm@10 296 if (tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12] == 1) bassValue = 0.5;
matthiasm@9 297 }
matthiasm@9 298 loadedChordDict.push_back(bassValue);
matthiasm@7 299 }
matthiasm@7 300 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) { // chord pitch classes
matthiasm@7 301 loadedChordDict.push_back(tempPCVector[((kSemitone - iSemitone + 12) % 12) + 12]);
matthiasm@7 302 }
matthiasm@7 303 ostringstream os;
matthiasm@7 304 if (slashNotation.empty()) {
matthiasm@7 305 os << notenames[12+iSemitone] << chordType;
matthiasm@7 306 } else {
matthiasm@7 307 os << notenames[12+iSemitone] << chordType << "/" << slashNotation;
matthiasm@7 308 }
matthiasm@7 309
matthiasm@7 310 loadedChordNames.push_back(os.str());
matthiasm@7 311 }
matthiasm@7 312 }
matthiasm@7 313 }
matthiasm@7 314 // N type
matthiasm@7 315 loadedChordNames.push_back("N");
matthiasm@7 316 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(0.5);
matthiasm@7 317 for (unsigned kSemitone = 0; kSemitone < 12; kSemitone++) loadedChordDict.push_back(1.0);
matthiasm@7 318
matthiasm@7 319 // normalise
matthiasm@7 320 float sum = 0;
matthiasm@7 321 for (int i = 0; i < loadedChordDict.size(); i++) {
matthiasm@7 322 sum += pow(loadedChordDict[i],2);
matthiasm@7 323 if (i % 24 == 23) {
matthiasm@7 324 float invertedsum = 1.0/sqrt(sum);
matthiasm@7 325 for (int k = 0; k < 24; k++) {
matthiasm@7 326 loadedChordDict[i-k] *= invertedsum;
matthiasm@7 327 }
matthiasm@7 328 sum = 0;
matthiasm@7 329 }
matthiasm@7 330
matthiasm@7 331 }
matthiasm@7 332
matthiasm@7 333
matthiasm@7 334 nChord = 0;
matthiasm@7 335 for (int i = 0; i < loadedChordNames.size(); i++) {
matthiasm@7 336 nChord++;
matthiasm@7 337 }
matthiasm@7 338 chordDictFile.close();
matthiasm@7 339
matthiasm@7 340
matthiasm@9 341 // mchorddict = new float[nChord*24];
matthiasm@7 342 for (int i = 0; i < nChord*24; i++) {
matthiasm@9 343 mchorddict->push_back(loadedChordDict[i]);
matthiasm@7 344 }
matthiasm@9 345
matthiasm@7 346 } else {// use default from chorddict.cpp
matthiasm@9 347 // mchorddict = new float[nChorddict];
matthiasm@7 348 for (int i = 0; i < nChorddict; i++) {
matthiasm@9 349 mchorddict->push_back(chorddict[i]);
matthiasm@7 350 }
matthiasm@7 351
matthiasm@7 352 nChord = nChorddict/24;
matthiasm@7 353 // mchordnames = new string[nChorddict/24];
matthiasm@7 354 char buffer1 [50];
matthiasm@7 355 for (int i = 0; i < nChorddict/24; i++) {
matthiasm@7 356 if (i < nChorddict/24 - 1) {
matthiasm@7 357 sprintf(buffer1, "%s%s", notenames[i % 12 + 12], chordtypes[i]);
matthiasm@7 358 } else {
matthiasm@7 359 sprintf(buffer1, "N");
matthiasm@7 360 }
matthiasm@7 361 ostringstream os;
matthiasm@7 362 os << buffer1;
matthiasm@9 363 loadedChordNames.push_back(os.str());
matthiasm@9 364
matthiasm@7 365 }
matthiasm@7 366
matthiasm@7 367 }
matthiasm@9 368 // cerr << "before leaving" << chordnames[1] << endl;
matthiasm@9 369 return loadedChordNames;
matthiasm@7 370 }
matthiasm@0 371
matthiasm@0 372 NNLSChroma::NNLSChroma(float inputSampleRate) :
matthiasm@0 373 Plugin(inputSampleRate),
matthiasm@0 374 m_fl(0),
matthiasm@0 375 m_blockSize(0),
matthiasm@0 376 m_stepSize(0),
matthiasm@0 377 m_lengthOfNoteIndex(0),
matthiasm@0 378 m_meanTuning0(0),
matthiasm@0 379 m_meanTuning1(0),
matthiasm@0 380 m_meanTuning2(0),
matthiasm@0 381 m_localTuning0(0),
matthiasm@0 382 m_localTuning1(0),
matthiasm@0 383 m_localTuning2(0),
matthiasm@4 384 m_paling(1.0),
matthiasm@3 385 m_preset(0.0),
matthiasm@0 386 m_localTuning(0),
matthiasm@0 387 m_kernelValue(0),
matthiasm@0 388 m_kernelFftIndex(0),
matthiasm@0 389 m_kernelNoteIndex(0),
matthiasm@1 390 m_dict(0),
matthiasm@0 391 m_tuneLocal(false),
matthiasm@7 392 m_dictID(0),
matthiasm@7 393 m_chorddict(0),
matthiasm@12 394 m_chordnames(0),
matthiasm@12 395 m_doNormalizeChroma(0)
matthiasm@0 396 {
matthiasm@0 397 if (debug_on) cerr << "--> NNLSChroma" << endl;
matthiasm@7 398
matthiasm@7 399 // make the *note* dictionary matrix
matthiasm@3 400 m_dict = new float[nNote * 84];
matthiasm@3 401 for (unsigned i = 0; i < nNote * 84; ++i) m_dict[i] = 0.0;
matthiasm@1 402 dictionaryMatrix(m_dict);
matthiasm@7 403
matthiasm@7 404 // get the *chord* dictionary from file (if the file exists)
matthiasm@9 405 m_chordnames = chordDictionary(&m_chorddict);
matthiasm@0 406 }
matthiasm@0 407
matthiasm@0 408
matthiasm@0 409 NNLSChroma::~NNLSChroma()
matthiasm@0 410 {
matthiasm@0 411 if (debug_on) cerr << "--> ~NNLSChroma" << endl;
matthiasm@1 412 delete [] m_dict;
matthiasm@9 413 // delete [] m_chorddict;
matthiasm@7 414 // delete m_chordnames;
matthiasm@0 415 }
matthiasm@0 416
matthiasm@0 417 string
matthiasm@0 418 NNLSChroma::getIdentifier() const
matthiasm@0 419 {
matthiasm@0 420 if (debug_on) cerr << "--> getIdentifier" << endl;
matthiasm@0 421 return "nnls_chroma";
matthiasm@0 422 }
matthiasm@0 423
matthiasm@0 424 string
matthiasm@0 425 NNLSChroma::getName() const
matthiasm@0 426 {
matthiasm@0 427 if (debug_on) cerr << "--> getName" << endl;
matthiasm@0 428 return "NNLS Chroma";
matthiasm@0 429 }
matthiasm@0 430
matthiasm@0 431 string
matthiasm@0 432 NNLSChroma::getDescription() const
matthiasm@0 433 {
matthiasm@0 434 // Return something helpful here!
matthiasm@0 435 if (debug_on) cerr << "--> getDescription" << endl;
matthiasm@13 436 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
matthiasm@0 437 }
matthiasm@0 438
matthiasm@0 439 string
matthiasm@0 440 NNLSChroma::getMaker() const
matthiasm@0 441 {
matthiasm@0 442 if (debug_on) cerr << "--> getMaker" << endl;
matthiasm@0 443 // Your name here
matthiasm@0 444 return "Matthias Mauch";
matthiasm@0 445 }
matthiasm@0 446
matthiasm@0 447 int
matthiasm@0 448 NNLSChroma::getPluginVersion() const
matthiasm@0 449 {
matthiasm@0 450 if (debug_on) cerr << "--> getPluginVersion" << endl;
matthiasm@0 451 // Increment this each time you release a version that behaves
matthiasm@0 452 // differently from the previous one
matthiasm@0 453 return 1;
matthiasm@0 454 }
matthiasm@0 455
matthiasm@0 456 string
matthiasm@0 457 NNLSChroma::getCopyright() const
matthiasm@0 458 {
matthiasm@0 459 if (debug_on) cerr << "--> getCopyright" << endl;
matthiasm@0 460 // This function is not ideally named. It does not necessarily
matthiasm@0 461 // need to say who made the plugin -- getMaker does that -- but it
matthiasm@0 462 // should indicate the terms under which it is distributed. For
matthiasm@0 463 // example, "Copyright (year). All Rights Reserved", or "GPL"
matthiasm@0 464 return "Copyright (2010). All rights reserved.";
matthiasm@0 465 }
matthiasm@0 466
matthiasm@0 467 NNLSChroma::InputDomain
matthiasm@0 468 NNLSChroma::getInputDomain() const
matthiasm@0 469 {
matthiasm@0 470 if (debug_on) cerr << "--> getInputDomain" << endl;
matthiasm@0 471 return FrequencyDomain;
matthiasm@0 472 }
matthiasm@0 473
matthiasm@0 474 size_t
matthiasm@0 475 NNLSChroma::getPreferredBlockSize() const
matthiasm@0 476 {
matthiasm@0 477 if (debug_on) cerr << "--> getPreferredBlockSize" << endl;
matthiasm@0 478 return 16384; // 0 means "I can handle any block size"
matthiasm@0 479 }
matthiasm@0 480
matthiasm@0 481 size_t
matthiasm@0 482 NNLSChroma::getPreferredStepSize() const
matthiasm@0 483 {
matthiasm@0 484 if (debug_on) cerr << "--> getPreferredStepSize" << endl;
matthiasm@0 485 return 2048; // 0 means "anything sensible"; in practice this
matthiasm@0 486 // means the same as the block size for TimeDomain
matthiasm@0 487 // plugins, or half of it for FrequencyDomain plugins
matthiasm@0 488 }
matthiasm@0 489
matthiasm@0 490 size_t
matthiasm@0 491 NNLSChroma::getMinChannelCount() const
matthiasm@0 492 {
matthiasm@0 493 if (debug_on) cerr << "--> getMinChannelCount" << endl;
matthiasm@0 494 return 1;
matthiasm@0 495 }
matthiasm@0 496
matthiasm@0 497 size_t
matthiasm@0 498 NNLSChroma::getMaxChannelCount() const
matthiasm@0 499 {
matthiasm@0 500 if (debug_on) cerr << "--> getMaxChannelCount" << endl;
matthiasm@0 501 return 1;
matthiasm@0 502 }
matthiasm@0 503
matthiasm@0 504 NNLSChroma::ParameterList
matthiasm@0 505 NNLSChroma::getParameterDescriptors() const
matthiasm@0 506 {
matthiasm@0 507 if (debug_on) cerr << "--> getParameterDescriptors" << endl;
matthiasm@0 508 ParameterList list;
matthiasm@0 509
matthiasm@3 510 ParameterDescriptor d3;
matthiasm@3 511 d3.identifier = "preset";
matthiasm@3 512 d3.name = "preset";
matthiasm@3 513 d3.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@3 514 d3.unit = "";
matthiasm@3 515 d3.isQuantized = true;
matthiasm@3 516 d3.quantizeStep = 1;
matthiasm@3 517 d3.minValue = 0.0;
matthiasm@4 518 d3.maxValue = 3.0;
matthiasm@3 519 d3.defaultValue = 0.0;
matthiasm@3 520 d3.valueNames.push_back("polyphonic pop");
matthiasm@3 521 d3.valueNames.push_back("polyphonic pop (fast)");
matthiasm@3 522 d3.valueNames.push_back("solo keyboard");
matthiasm@3 523 d3.valueNames.push_back("manual");
matthiasm@3 524 list.push_back(d3);
matthiasm@4 525
matthiasm@4 526 // ParameterDescriptor d0;
matthiasm@4 527 // d0.identifier = "notedict";
matthiasm@4 528 // d0.name = "note dictionary";
matthiasm@4 529 // d0.description = "Notes in different note dictionaries differ by their spectral shapes.";
matthiasm@4 530 // d0.unit = "";
matthiasm@4 531 // d0.minValue = 0;
matthiasm@4 532 // d0.maxValue = 1;
matthiasm@4 533 // d0.defaultValue = 0;
matthiasm@4 534 // d0.isQuantized = true;
matthiasm@4 535 // d0.valueNames.push_back("s = 0.6");
matthiasm@4 536 // d0.valueNames.push_back("no NNLS");
matthiasm@4 537 // d0.quantizeStep = 1.0;
matthiasm@4 538 // list.push_back(d0);
matthiasm@4 539
matthiasm@4 540 ParameterDescriptor d1;
matthiasm@4 541 d1.identifier = "tuningmode";
matthiasm@4 542 d1.name = "tuning mode";
matthiasm@4 543 d1.description = "Tuning can be performed locally or on the whole extraction segment. Local tuning is only advisable when the tuning is likely to change over the audio, for example in podcasts, or in a cappella singing.";
matthiasm@4 544 d1.unit = "";
matthiasm@4 545 d1.minValue = 0;
matthiasm@4 546 d1.maxValue = 1;
matthiasm@4 547 d1.defaultValue = 0;
matthiasm@4 548 d1.isQuantized = true;
matthiasm@4 549 d1.valueNames.push_back("global tuning");
matthiasm@4 550 d1.valueNames.push_back("local tuning");
matthiasm@4 551 d1.quantizeStep = 1.0;
matthiasm@4 552 list.push_back(d1);
matthiasm@4 553
matthiasm@4 554 // ParameterDescriptor d2;
matthiasm@4 555 // d2.identifier = "paling";
matthiasm@4 556 // d2.name = "spectral paling";
matthiasm@4 557 // d2.description = "Spectral paling: no paling - 0; whitening - 1.";
matthiasm@4 558 // d2.unit = "";
matthiasm@4 559 // d2.isQuantized = true;
matthiasm@4 560 // // d2.quantizeStep = 0.1;
matthiasm@4 561 // d2.minValue = 0.0;
matthiasm@4 562 // d2.maxValue = 1.0;
matthiasm@4 563 // d2.defaultValue = 1.0;
matthiasm@4 564 // d2.isQuantized = false;
matthiasm@4 565 // list.push_back(d2);
matthiasm@12 566 ParameterDescriptor d4;
matthiasm@12 567 d4.identifier = "chromanormalize";
matthiasm@12 568 d4.name = "chroma normalization";
matthiasm@12 569 d4.description = "How shall the chroma vector be normalized?";
matthiasm@12 570 d4.unit = "";
matthiasm@12 571 d4.minValue = 0;
matthiasm@13 572 d4.maxValue = 3;
matthiasm@12 573 d4.defaultValue = 0;
matthiasm@12 574 d4.isQuantized = true;
matthiasm@13 575 d4.valueNames.push_back("none");
matthiasm@13 576 d4.valueNames.push_back("maximum norm");
matthiasm@13 577 d4.valueNames.push_back("L1 norm");
matthiasm@13 578 d4.valueNames.push_back("L2 norm");
matthiasm@12 579 d4.quantizeStep = 1.0;
matthiasm@12 580 list.push_back(d4);
matthiasm@4 581
matthiasm@0 582 return list;
matthiasm@0 583 }
matthiasm@0 584
matthiasm@0 585 float
matthiasm@0 586 NNLSChroma::getParameter(string identifier) const
matthiasm@0 587 {
matthiasm@3 588 if (debug_on) cerr << "--> getParameter" << endl;
matthiasm@0 589 if (identifier == "notedict") {
matthiasm@0 590 return m_dictID;
matthiasm@0 591 }
matthiasm@0 592
matthiasm@0 593 if (identifier == "paling") {
matthiasm@0 594 return m_paling;
matthiasm@0 595 }
matthiasm@0 596
matthiasm@0 597 if (identifier == "tuningmode") {
matthiasm@0 598 if (m_tuneLocal) {
matthiasm@0 599 return 1.0;
matthiasm@0 600 } else {
matthiasm@0 601 return 0.0;
matthiasm@0 602 }
matthiasm@0 603 }
matthiasm@3 604 if (identifier == "preset") {
matthiasm@3 605 return m_preset;
matthiasm@3 606 }
matthiasm@12 607 if (identifier == "chromanormalize") {
matthiasm@12 608 return m_doNormalizeChroma;
matthiasm@12 609 }
matthiasm@0 610 return 0;
matthiasm@0 611
matthiasm@0 612 }
matthiasm@0 613
matthiasm@0 614 void
matthiasm@0 615 NNLSChroma::setParameter(string identifier, float value)
matthiasm@0 616 {
matthiasm@3 617 if (debug_on) cerr << "--> setParameter" << endl;
matthiasm@0 618 if (identifier == "notedict") {
matthiasm@0 619 m_dictID = (int) value;
matthiasm@0 620 }
matthiasm@0 621
matthiasm@0 622 if (identifier == "paling") {
matthiasm@0 623 m_paling = value;
matthiasm@0 624 }
matthiasm@0 625
matthiasm@0 626 if (identifier == "tuningmode") {
matthiasm@0 627 m_tuneLocal = (value > 0) ? true : false;
matthiasm@0 628 // cerr << "m_tuneLocal :" << m_tuneLocal << endl;
matthiasm@0 629 }
matthiasm@3 630 if (identifier == "preset") {
matthiasm@3 631 m_preset = value;
matthiasm@3 632 if (m_preset == 0.0) {
matthiasm@3 633 m_tuneLocal = false;
matthiasm@3 634 m_paling = 1.0;
matthiasm@3 635 m_dictID = 0.0;
matthiasm@3 636 }
matthiasm@3 637 if (m_preset == 1.0) {
matthiasm@3 638 m_tuneLocal = false;
matthiasm@3 639 m_paling = 1.0;
matthiasm@3 640 m_dictID = 1.0;
matthiasm@3 641 }
matthiasm@3 642 if (m_preset == 2.0) {
matthiasm@3 643 m_tuneLocal = false;
matthiasm@3 644 m_paling = 0.7;
matthiasm@3 645 m_dictID = 0.0;
matthiasm@3 646 }
matthiasm@3 647 }
matthiasm@12 648 if (identifier == "chromanormalize") {
matthiasm@12 649 m_doNormalizeChroma = value;
matthiasm@12 650 }
matthiasm@0 651 }
matthiasm@0 652
matthiasm@0 653 NNLSChroma::ProgramList
matthiasm@0 654 NNLSChroma::getPrograms() const
matthiasm@0 655 {
matthiasm@0 656 if (debug_on) cerr << "--> getPrograms" << endl;
matthiasm@0 657 ProgramList list;
matthiasm@0 658
matthiasm@0 659 // If you have no programs, return an empty list (or simply don't
matthiasm@0 660 // implement this function or getCurrentProgram/selectProgram)
matthiasm@0 661
matthiasm@0 662 return list;
matthiasm@0 663 }
matthiasm@0 664
matthiasm@0 665 string
matthiasm@0 666 NNLSChroma::getCurrentProgram() const
matthiasm@0 667 {
matthiasm@0 668 if (debug_on) cerr << "--> getCurrentProgram" << endl;
matthiasm@0 669 return ""; // no programs
matthiasm@0 670 }
matthiasm@0 671
matthiasm@0 672 void
matthiasm@0 673 NNLSChroma::selectProgram(string name)
matthiasm@0 674 {
matthiasm@0 675 if (debug_on) cerr << "--> selectProgram" << endl;
matthiasm@0 676 }
matthiasm@0 677
matthiasm@0 678
matthiasm@0 679 NNLSChroma::OutputList
matthiasm@0 680 NNLSChroma::getOutputDescriptors() const
matthiasm@0 681 {
matthiasm@0 682 if (debug_on) cerr << "--> getOutputDescriptors" << endl;
matthiasm@0 683 OutputList list;
matthiasm@0 684
matthiasm@0 685 // Make chroma names for the binNames property
matthiasm@0 686 vector<string> chromanames;
matthiasm@0 687 vector<string> bothchromanames;
matthiasm@0 688 for (int iNote = 0; iNote < 24; iNote++) {
matthiasm@0 689 bothchromanames.push_back(notenames[iNote]);
matthiasm@0 690 if (iNote < 12) {
matthiasm@0 691 chromanames.push_back(notenames[iNote]);
matthiasm@0 692 }
matthiasm@0 693 }
matthiasm@0 694
matthiasm@1 695 // int nNote = 84;
matthiasm@0 696
matthiasm@0 697 // See OutputDescriptor documentation for the possibilities here.
matthiasm@0 698 // Every plugin must have at least one output.
matthiasm@0 699
matthiasm@0 700 OutputDescriptor d0;
matthiasm@0 701 d0.identifier = "tuning";
matthiasm@0 702 d0.name = "Tuning";
matthiasm@0 703 d0.description = "The concert pitch.";
matthiasm@0 704 d0.unit = "Hz";
matthiasm@0 705 d0.hasFixedBinCount = true;
matthiasm@0 706 d0.binCount = 0;
matthiasm@0 707 d0.hasKnownExtents = true;
matthiasm@0 708 d0.minValue = 427.47;
matthiasm@0 709 d0.maxValue = 452.89;
matthiasm@0 710 d0.isQuantized = false;
matthiasm@0 711 d0.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 712 d0.hasDuration = false;
matthiasm@0 713 list.push_back(d0);
matthiasm@0 714
matthiasm@0 715 OutputDescriptor d1;
matthiasm@0 716 d1.identifier = "logfreqspec";
matthiasm@0 717 d1.name = "Log-Frequency Spectrum";
matthiasm@0 718 d1.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping.";
matthiasm@0 719 d1.unit = "";
matthiasm@0 720 d1.hasFixedBinCount = true;
matthiasm@0 721 d1.binCount = nNote;
matthiasm@0 722 d1.hasKnownExtents = false;
matthiasm@0 723 d1.isQuantized = false;
matthiasm@0 724 d1.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 725 d1.hasDuration = false;
matthiasm@0 726 d1.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 727 list.push_back(d1);
matthiasm@0 728
matthiasm@0 729 OutputDescriptor d2;
matthiasm@0 730 d2.identifier = "tunedlogfreqspec";
matthiasm@0 731 d2.name = "Tuned Log-Frequency Spectrum";
matthiasm@0 732 d2.description = "A Log-Frequency Spectrum (constant Q) that is obtained by cosine filter mapping, then its tuned using the estimated tuning frequency.";
matthiasm@0 733 d2.unit = "";
matthiasm@0 734 d2.hasFixedBinCount = true;
matthiasm@0 735 d2.binCount = 256;
matthiasm@0 736 d2.hasKnownExtents = false;
matthiasm@0 737 d2.isQuantized = false;
matthiasm@0 738 d2.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 739 d2.hasDuration = false;
matthiasm@0 740 d2.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 741 list.push_back(d2);
matthiasm@0 742
matthiasm@0 743 OutputDescriptor d3;
matthiasm@0 744 d3.identifier = "semitonespectrum";
matthiasm@0 745 d3.name = "Semitone Spectrum";
matthiasm@0 746 d3.description = "A semitone-spaced log-frequency spectrum derived from the third-of-a-semitone-spaced tuned log-frequency spectrum.";
matthiasm@0 747 d3.unit = "";
matthiasm@0 748 d3.hasFixedBinCount = true;
matthiasm@0 749 d3.binCount = 84;
matthiasm@0 750 d3.hasKnownExtents = false;
matthiasm@0 751 d3.isQuantized = false;
matthiasm@0 752 d3.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 753 d3.hasDuration = false;
matthiasm@0 754 d3.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 755 list.push_back(d3);
matthiasm@0 756
matthiasm@0 757 OutputDescriptor d4;
matthiasm@0 758 d4.identifier = "chroma";
matthiasm@0 759 d4.name = "Chromagram";
matthiasm@0 760 d4.description = "Tuning-adjusted chromagram from NNLS soft transcription, with an emphasis on the medium note range.";
matthiasm@0 761 d4.unit = "";
matthiasm@0 762 d4.hasFixedBinCount = true;
matthiasm@0 763 d4.binCount = 12;
matthiasm@0 764 d4.binNames = chromanames;
matthiasm@0 765 d4.hasKnownExtents = false;
matthiasm@0 766 d4.isQuantized = false;
matthiasm@0 767 d4.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 768 d4.hasDuration = false;
matthiasm@0 769 d4.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 770 list.push_back(d4);
matthiasm@0 771
matthiasm@0 772 OutputDescriptor d5;
matthiasm@0 773 d5.identifier = "basschroma";
matthiasm@0 774 d5.name = "Bass Chromagram";
matthiasm@0 775 d5.description = "Tuning-adjusted bass chromagram from NNLS soft transcription, with an emphasis on the bass note range.";
matthiasm@0 776 d5.unit = "";
matthiasm@0 777 d5.hasFixedBinCount = true;
matthiasm@0 778 d5.binCount = 12;
matthiasm@0 779 d5.binNames = chromanames;
matthiasm@0 780 d5.hasKnownExtents = false;
matthiasm@0 781 d5.isQuantized = false;
matthiasm@0 782 d5.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 783 d5.hasDuration = false;
matthiasm@0 784 d5.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 785 list.push_back(d5);
matthiasm@0 786
matthiasm@0 787 OutputDescriptor d6;
matthiasm@0 788 d6.identifier = "bothchroma";
matthiasm@0 789 d6.name = "Chromagram and Bass Chromagram";
matthiasm@0 790 d6.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS soft transcription.";
matthiasm@0 791 d6.unit = "";
matthiasm@0 792 d6.hasFixedBinCount = true;
matthiasm@0 793 d6.binCount = 24;
matthiasm@0 794 d6.binNames = bothchromanames;
matthiasm@0 795 d6.hasKnownExtents = false;
matthiasm@0 796 d6.isQuantized = false;
matthiasm@0 797 d6.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@0 798 d6.hasDuration = false;
matthiasm@0 799 d6.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 800 list.push_back(d6);
matthiasm@0 801
matthiasm@0 802 OutputDescriptor d7;
matthiasm@0 803 d7.identifier = "simplechord";
matthiasm@0 804 d7.name = "Simple Chord Estimate";
matthiasm@0 805 d7.description = "A simple chord estimate based on the inner product of chord templates with the smoothed chroma.";
matthiasm@0 806 d7.unit = "";
matthiasm@0 807 d7.hasFixedBinCount = true;
matthiasm@0 808 d7.binCount = 0;
matthiasm@0 809 d7.hasKnownExtents = false;
matthiasm@0 810 d7.isQuantized = false;
matthiasm@0 811 d7.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@0 812 d7.hasDuration = false;
matthiasm@0 813 d7.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@0 814 list.push_back(d7);
matthiasm@0 815
matthiasm@1 816 // OutputDescriptor d8;
matthiasm@1 817 // d8.identifier = "inconsistency";
matthiasm@1 818 // d8.name = "Harmonic inconsistency value";
matthiasm@1 819 // d8.description = "Harmonic inconsistency. Indicates music if low, non-music or speech when high.";
matthiasm@1 820 // d8.unit = "";
matthiasm@1 821 // d8.hasFixedBinCount = true;
matthiasm@1 822 // d8.binCount = 1;
matthiasm@1 823 // d8.hasKnownExtents = false;
matthiasm@1 824 // d8.isQuantized = false;
matthiasm@1 825 // d8.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 826 // d8.hasDuration = false;
matthiasm@1 827 // d8.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 828 // list.push_back(d8);
matthiasm@1 829 //
matthiasm@1 830 // OutputDescriptor d9;
matthiasm@1 831 // d9.identifier = "inconsistencysegment";
matthiasm@1 832 // d9.name = "Harmonic inconsistency segmenter";
matthiasm@1 833 // d9.description = "Segments the audio based on the harmonic inconsistency value into speech and music.";
matthiasm@1 834 // d9.unit = "";
matthiasm@1 835 // d9.hasFixedBinCount = true;
matthiasm@1 836 // d9.binCount = 0;
matthiasm@1 837 // d9.hasKnownExtents = true;
matthiasm@1 838 // d9.minValue = 0.1;
matthiasm@1 839 // d9.maxValue = 0.9;
matthiasm@1 840 // d9.isQuantized = false;
matthiasm@1 841 // d9.sampleType = OutputDescriptor::VariableSampleRate;
matthiasm@1 842 // d9.hasDuration = false;
matthiasm@1 843 // d9.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 844 // list.push_back(d9);
matthiasm@1 845 //
matthiasm@1 846 OutputDescriptor d10;
matthiasm@1 847 d10.identifier = "localtuning";
matthiasm@1 848 d10.name = "Local tuning";
matthiasm@4 849 d10.description = "Tuning based on the history up to this timestamp.";
matthiasm@1 850 d10.unit = "Hz";
matthiasm@1 851 d10.hasFixedBinCount = true;
matthiasm@1 852 d10.binCount = 1;
matthiasm@1 853 d10.hasKnownExtents = true;
matthiasm@1 854 d10.minValue = 427.47;
matthiasm@1 855 d10.maxValue = 452.89;
matthiasm@1 856 d10.isQuantized = false;
matthiasm@3 857 d10.sampleType = OutputDescriptor::FixedSampleRate;
matthiasm@1 858 d10.hasDuration = false;
matthiasm@3 859 // d10.sampleRate = (m_stepSize == 0) ? m_inputSampleRate/2048 : m_inputSampleRate/m_stepSize;
matthiasm@1 860 list.push_back(d10);
matthiasm@1 861
matthiasm@0 862 return list;
matthiasm@0 863 }
matthiasm@0 864
matthiasm@0 865
matthiasm@0 866 bool
matthiasm@0 867 NNLSChroma::initialise(size_t channels, size_t stepSize, size_t blockSize)
matthiasm@0 868 {
matthiasm@1 869 if (debug_on) {
matthiasm@1 870 cerr << "--> initialise";
matthiasm@1 871 }
matthiasm@1 872
matthiasm@0 873 if (channels < getMinChannelCount() ||
matthiasm@0 874 channels > getMaxChannelCount()) return false;
matthiasm@0 875 m_blockSize = blockSize;
matthiasm@0 876 m_stepSize = stepSize;
matthiasm@0 877 frameCount = 0;
matthiasm@0 878 int tempn = 256 * m_blockSize/2;
matthiasm@4 879 // cerr << "length of tempkernel : " << tempn << endl;
matthiasm@1 880 float *tempkernel;
matthiasm@1 881
matthiasm@1 882 tempkernel = new float[tempn];
matthiasm@1 883
matthiasm@0 884 logFreqMatrix(m_inputSampleRate, m_blockSize, tempkernel);
matthiasm@1 885 m_kernelValue.clear();
matthiasm@1 886 m_kernelFftIndex.clear();
matthiasm@1 887 m_kernelNoteIndex.clear();
matthiasm@1 888 int countNonzero = 0;
matthiasm@0 889 for (unsigned iNote = 0; iNote < nNote; ++iNote) { // I don't know if this is wise: manually making a sparse matrix
matthiasm@1 890 for (unsigned iFFT = 0; iFFT < blockSize/2; ++iFFT) {
matthiasm@1 891 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 892 m_kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]);
matthiasm@0 893 if (tempkernel[iFFT + blockSize/2 * iNote] > 0) {
matthiasm@1 894 countNonzero++;
matthiasm@0 895 }
matthiasm@1 896 m_kernelFftIndex.push_back(iFFT);
matthiasm@1 897 m_kernelNoteIndex.push_back(iNote);
matthiasm@0 898 }
matthiasm@0 899 }
matthiasm@1 900 }
matthiasm@4 901 // cerr << "nonzero count : " << countNonzero << endl;
matthiasm@1 902 delete [] tempkernel;
matthiasm@3 903 ofstream myfile;
matthiasm@3 904 myfile.open ("matrix.txt");
matthiasm@3 905 // myfile << "Writing this to a file.\n";
matthiasm@3 906 for (int i = 0; i < nNote * 84; ++i) {
matthiasm@3 907 myfile << m_dict[i] << endl;
matthiasm@3 908 }
matthiasm@3 909 myfile.close();
matthiasm@0 910 return true;
matthiasm@0 911 }
matthiasm@0 912
matthiasm@0 913 void
matthiasm@0 914 NNLSChroma::reset()
matthiasm@0 915 {
matthiasm@4 916 if (debug_on) cerr << "--> reset";
matthiasm@4 917
matthiasm@0 918 // Clear buffers, reset stored values, etc
matthiasm@4 919 frameCount = 0;
matthiasm@4 920 m_dictID = 0;
matthiasm@4 921 m_fl.clear();
matthiasm@4 922 m_meanTuning0 = 0;
matthiasm@4 923 m_meanTuning1 = 0;
matthiasm@4 924 m_meanTuning2 = 0;
matthiasm@4 925 m_localTuning0 = 0;
matthiasm@4 926 m_localTuning1 = 0;
matthiasm@4 927 m_localTuning2 = 0;
matthiasm@4 928 m_localTuning.clear();
matthiasm@0 929 }
matthiasm@0 930
matthiasm@0 931 NNLSChroma::FeatureSet
matthiasm@0 932 NNLSChroma::process(const float *const *inputBuffers, Vamp::RealTime timestamp)
matthiasm@0 933 {
matthiasm@4 934 if (debug_on) cerr << "--> process" << endl;
matthiasm@0 935 frameCount++;
matthiasm@0 936 float *magnitude = new float[m_blockSize/2];
matthiasm@0 937
matthiasm@0 938 Feature f10; // local tuning
matthiasm@3 939 f10.hasTimestamp = true;
matthiasm@4 940 f10.timestamp = timestamp;
matthiasm@0 941 const float *fbuf = inputBuffers[0];
matthiasm@0 942
matthiasm@0 943 // make magnitude
matthiasm@0 944 for (size_t iBin = 0; iBin < m_blockSize/2; iBin++) {
matthiasm@0 945 magnitude[iBin] = sqrt(fbuf[2 * iBin] * fbuf[2 * iBin] +
matthiasm@0 946 fbuf[2 * iBin + 1] * fbuf[2 * iBin + 1]);
matthiasm@0 947 }
matthiasm@4 948
matthiasm@0 949 // note magnitude mapping using pre-calculated matrix
matthiasm@0 950 float *nm = new float[nNote]; // note magnitude
matthiasm@0 951 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 952 nm[iNote] = 0; // initialise as 0
matthiasm@0 953 }
matthiasm@0 954 int binCount = 0;
matthiasm@0 955 for (vector<float>::iterator it = m_kernelValue.begin(); it != m_kernelValue.end(); ++it) {
matthiasm@0 956 // cerr << ".";
matthiasm@1 957 nm[m_kernelNoteIndex[binCount]] += magnitude[m_kernelFftIndex[binCount]] * m_kernelValue[binCount];
matthiasm@1 958 // cerr << m_kernelFftIndex[binCount] << " -- " << magnitude[m_kernelFftIndex[binCount]] << " -- "<< m_kernelValue[binCount] << endl;
matthiasm@0 959 binCount++;
matthiasm@0 960 }
matthiasm@1 961 // cerr << nm[20];
matthiasm@1 962 // cerr << endl;
matthiasm@0 963
matthiasm@0 964
matthiasm@0 965 float one_over_N = 1.0/frameCount;
matthiasm@0 966 // update means of complex tuning variables
matthiasm@0 967 m_meanTuning0 *= float(frameCount-1)*one_over_N;
matthiasm@0 968 m_meanTuning1 *= float(frameCount-1)*one_over_N;
matthiasm@0 969 m_meanTuning2 *= float(frameCount-1)*one_over_N;
matthiasm@0 970
matthiasm@0 971 for (int iTone = 0; iTone < 160; iTone = iTone + 3) {
matthiasm@0 972 m_meanTuning0 += nm[iTone + 0]*one_over_N;
matthiasm@0 973 m_meanTuning1 += nm[iTone + 1]*one_over_N;
matthiasm@0 974 m_meanTuning2 += nm[iTone + 2]*one_over_N;
matthiasm@3 975 float ratioOld = 0.997;
matthiasm@3 976 m_localTuning0 *= ratioOld; m_localTuning0 += nm[iTone + 0] * (1 - ratioOld);
matthiasm@3 977 m_localTuning1 *= ratioOld; m_localTuning1 += nm[iTone + 1] * (1 - ratioOld);
matthiasm@3 978 m_localTuning2 *= ratioOld; m_localTuning2 += nm[iTone + 2] * (1 - ratioOld);
matthiasm@0 979 }
matthiasm@0 980
matthiasm@0 981 // if (m_tuneLocal) {
matthiasm@0 982 // local tuning
matthiasm@0 983 float localTuningImag = sinvalue * m_localTuning1 - sinvalue * m_localTuning2;
matthiasm@0 984 float localTuningReal = m_localTuning0 + cosvalue * m_localTuning1 + cosvalue * m_localTuning2;
matthiasm@0 985 float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI);
matthiasm@0 986 m_localTuning.push_back(normalisedtuning);
matthiasm@0 987 float tuning440 = 440 * pow(2,normalisedtuning/12);
matthiasm@0 988 f10.values.push_back(tuning440);
matthiasm@3 989 // cerr << tuning440 << endl;
matthiasm@0 990 // }
matthiasm@0 991
matthiasm@0 992 Feature f1; // logfreqspec
matthiasm@0 993 f1.hasTimestamp = true;
matthiasm@0 994 f1.timestamp = timestamp;
matthiasm@0 995 for (size_t iNote = 0; iNote < nNote; iNote++) {
matthiasm@0 996 f1.values.push_back(nm[iNote]);
matthiasm@0 997 }
matthiasm@0 998
matthiasm@0 999 FeatureSet fs;
matthiasm@0 1000 fs[1].push_back(f1);
matthiasm@3 1001 fs[8].push_back(f10);
matthiasm@0 1002
matthiasm@0 1003 // deletes
matthiasm@0 1004 delete[] magnitude;
matthiasm@0 1005 delete[] nm;
matthiasm@0 1006
matthiasm@0 1007 m_fl.push_back(f1); // remember note magnitude for getRemainingFeatures
matthiasm@7 1008 char * pPath;
matthiasm@7 1009 pPath = getenv ("VAMP_PATH");
matthiasm@7 1010
matthiasm@7 1011
matthiasm@0 1012 return fs;
matthiasm@0 1013 }
matthiasm@0 1014
matthiasm@0 1015 NNLSChroma::FeatureSet
matthiasm@0 1016 NNLSChroma::getRemainingFeatures()
matthiasm@0 1017 {
matthiasm@4 1018 if (debug_on) cerr << "--> getRemainingFeatures" << endl;
matthiasm@4 1019 FeatureSet fsOut;
matthiasm@4 1020 if (m_fl.size() == 0) return fsOut;
matthiasm@9 1021 int nChord = m_chordnames.size();
matthiasm@0 1022 //
matthiasm@1 1023 /** Calculate Tuning
matthiasm@1 1024 calculate tuning from (using the angle of the complex number defined by the
matthiasm@1 1025 cumulative mean real and imag values)
matthiasm@1 1026 **/
matthiasm@1 1027 float meanTuningImag = sinvalue * m_meanTuning1 - sinvalue * m_meanTuning2;
matthiasm@1 1028 float meanTuningReal = m_meanTuning0 + cosvalue * m_meanTuning1 + cosvalue * m_meanTuning2;
matthiasm@1 1029 float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI));
matthiasm@1 1030 float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI);
matthiasm@1 1031 int intShift = floor(normalisedtuning * 3);
matthiasm@1 1032 float intFactor = normalisedtuning * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1033
matthiasm@1 1034 char buffer0 [50];
matthiasm@1 1035
matthiasm@1 1036 sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning);
matthiasm@1 1037
matthiasm@1 1038 // cerr << "normalisedtuning: " << normalisedtuning << '\n';
matthiasm@1 1039
matthiasm@1 1040 // push tuning to FeatureSet fsOut
matthiasm@1 1041 Feature f0; // tuning
matthiasm@1 1042 f0.hasTimestamp = true;
matthiasm@1 1043 f0.timestamp = Vamp::RealTime::frame2RealTime(0, lrintf(m_inputSampleRate));;
matthiasm@1 1044 f0.label = buffer0;
matthiasm@1 1045 fsOut[0].push_back(f0);
matthiasm@1 1046
matthiasm@1 1047 /** Tune Log-Frequency Spectrogram
matthiasm@1 1048 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
matthiasm@1 1049 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
matthiasm@13 1050 **/
matthiasm@13 1051 cerr << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
matthiasm@13 1052
matthiasm@1 1053 float tempValue = 0;
matthiasm@1 1054 float dbThreshold = 0; // relative to the background spectrum
matthiasm@1 1055 float thresh = pow(10,dbThreshold/20);
matthiasm@1 1056 // cerr << "tune local ? " << m_tuneLocal << endl;
matthiasm@1 1057 int count = 0;
matthiasm@1 1058
matthiasm@1 1059 for (FeatureList::iterator i = m_fl.begin(); i != m_fl.end(); ++i) {
matthiasm@1 1060 Feature f1 = *i;
matthiasm@1 1061 Feature f2; // tuned log-frequency spectrum
matthiasm@1 1062 f2.hasTimestamp = true;
matthiasm@1 1063 f2.timestamp = f1.timestamp;
matthiasm@1 1064 f2.values.push_back(0.0); f2.values.push_back(0.0); // set lower edge to zero
matthiasm@1 1065
matthiasm@1 1066 if (m_tuneLocal) {
matthiasm@1 1067 intShift = floor(m_localTuning[count] * 3);
matthiasm@1 1068 intFactor = m_localTuning[count] * 3 - intShift; // intFactor is a really bad name for this
matthiasm@1 1069 }
matthiasm@1 1070
matthiasm@1 1071 // cerr << intShift << " " << intFactor << endl;
matthiasm@1 1072
matthiasm@4 1073 for (unsigned k = 2; k < f1.values.size() - 3; ++k) { // interpolate all inner bins
matthiasm@1 1074 tempValue = f1.values[k + intShift] * (1-intFactor) + f1.values[k+intShift+1] * intFactor;
matthiasm@1 1075 f2.values.push_back(tempValue);
matthiasm@1 1076 }
matthiasm@1 1077
matthiasm@1 1078 f2.values.push_back(0.0); f2.values.push_back(0.0); f2.values.push_back(0.0); // upper edge
matthiasm@1 1079 vector<float> runningmean = SpecialConvolution(f2.values,hw);
matthiasm@1 1080 vector<float> runningstd;
matthiasm@1 1081 for (int i = 0; i < 256; i++) { // first step: squared values into vector (variance)
matthiasm@1 1082 runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i]));
matthiasm@1 1083 }
matthiasm@1 1084 runningstd = SpecialConvolution(runningstd,hw); // second step convolve
matthiasm@1 1085 for (int i = 0; i < 256; i++) {
matthiasm@1 1086 runningstd[i] = sqrt(runningstd[i]); // square root to finally have running std
matthiasm@1 1087 if (runningstd[i] > 0) {
matthiasm@1 1088 // f2.values[i] = (f2.values[i] / runningmean[i]) > thresh ?
matthiasm@1 1089 // (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1090 f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ?
matthiasm@1 1091 (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_paling) : 0;
matthiasm@1 1092 }
matthiasm@1 1093 if (f2.values[i] < 0) {
matthiasm@1 1094 cerr << "ERROR: negative value in logfreq spectrum" << endl;
matthiasm@1 1095 }
matthiasm@1 1096 }
matthiasm@1 1097 fsOut[2].push_back(f2);
matthiasm@1 1098 count++;
matthiasm@1 1099 }
matthiasm@13 1100 cerr << "done." << endl;
matthiasm@1 1101
matthiasm@1 1102 /** Semitone spectrum and chromagrams
matthiasm@1 1103 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
matthiasm@1 1104 is inferred using a non-negative least squares algorithm.
matthiasm@1 1105 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
matthiasm@1 1106 bass and treble stacked onto each other).
matthiasm@1 1107 **/
matthiasm@13 1108 if (m_dictID == 1) {
matthiasm@13 1109 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
matthiasm@13 1110 } else {
matthiasm@13 1111 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
matthiasm@13 1112 }
matthiasm@13 1113
matthiasm@1 1114
matthiasm@1 1115 vector<vector<float> > chordogram;
matthiasm@3 1116 vector<vector<int> > scoreChordogram;
matthiasm@1 1117 vector<float> oldchroma = vector<float>(12,0);
matthiasm@1 1118 vector<float> oldbasschroma = vector<float>(12,0);
matthiasm@1 1119 count = 0;
matthiasm@9 1120
matthiasm@1 1121 for (FeatureList::iterator it = fsOut[2].begin(); it != fsOut[2].end(); ++it) {
matthiasm@1 1122 Feature f2 = *it; // logfreq spectrum
matthiasm@1 1123 Feature f3; // semitone spectrum
matthiasm@1 1124 Feature f4; // treble chromagram
matthiasm@1 1125 Feature f5; // bass chromagram
matthiasm@1 1126 Feature f6; // treble and bass chromagram
matthiasm@1 1127
matthiasm@1 1128 f3.hasTimestamp = true;
matthiasm@1 1129 f3.timestamp = f2.timestamp;
matthiasm@1 1130
matthiasm@1 1131 f4.hasTimestamp = true;
matthiasm@1 1132 f4.timestamp = f2.timestamp;
matthiasm@1 1133
matthiasm@1 1134 f5.hasTimestamp = true;
matthiasm@1 1135 f5.timestamp = f2.timestamp;
matthiasm@1 1136
matthiasm@1 1137 f6.hasTimestamp = true;
matthiasm@1 1138 f6.timestamp = f2.timestamp;
matthiasm@1 1139
matthiasm@3 1140 float b[256];
matthiasm@1 1141
matthiasm@1 1142 bool some_b_greater_zero = false;
matthiasm@3 1143 float sumb = 0;
matthiasm@1 1144 for (int i = 0; i < 256; i++) {
matthiasm@3 1145 // b[i] = m_dict[(256 * count + i) % (256 * 84)];
matthiasm@3 1146 b[i] = f2.values[i];
matthiasm@3 1147 sumb += b[i];
matthiasm@1 1148 if (b[i] > 0) {
matthiasm@1 1149 some_b_greater_zero = true;
matthiasm@1 1150 }
matthiasm@1 1151 }
matthiasm@1 1152
matthiasm@1 1153 // here's where the non-negative least squares algorithm calculates the note activation x
matthiasm@1 1154
matthiasm@1 1155 vector<float> chroma = vector<float>(12, 0);
matthiasm@1 1156 vector<float> basschroma = vector<float>(12, 0);
matthiasm@1 1157 float currval;
matthiasm@1 1158 unsigned iSemitone = 0;
matthiasm@1 1159
matthiasm@1 1160 if (some_b_greater_zero) {
matthiasm@3 1161 if (m_dictID == 1) {
matthiasm@1 1162 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@1 1163 currval = 0;
matthiasm@3 1164 currval += b[iNote + 1 + -1] * 0.5;
matthiasm@3 1165 currval += b[iNote + 1 + 0] * 1.0;
matthiasm@3 1166 currval += b[iNote + 1 + 1] * 0.5;
matthiasm@1 1167 f3.values.push_back(currval);
matthiasm@1 1168 chroma[iSemitone % 12] += currval * treblewindow[iSemitone];
matthiasm@1 1169 basschroma[iSemitone % 12] += currval * basswindow[iSemitone];
matthiasm@1 1170 iSemitone++;
matthiasm@1 1171 }
matthiasm@1 1172
matthiasm@1 1173 } else {
matthiasm@3 1174 float x[84+1000];
matthiasm@3 1175 for (int i = 1; i < 1084; ++i) x[i] = 1.0;
matthiasm@10 1176 vector<int> signifIndex;
matthiasm@10 1177 int index=0;
matthiasm@10 1178 sumb /= 84.0;
matthiasm@10 1179 for (unsigned iNote = 2; iNote < nNote - 2; iNote += 3) {
matthiasm@10 1180 float currval = 0;
matthiasm@10 1181 currval += b[iNote + 1 + -1];
matthiasm@10 1182 currval += b[iNote + 1 + 0];
matthiasm@10 1183 currval += b[iNote + 1 + 1];
matthiasm@10 1184 if (currval > 0) signifIndex.push_back(index);
matthiasm@10 1185 f3.values.push_back(0); // fill the values, change later
matthiasm@10 1186 index++;
matthiasm@10 1187 }
matthiasm@3 1188 float rnorm;
matthiasm@3 1189 float w[84+1000];
matthiasm@3 1190 float zz[84+1000];
matthiasm@3 1191 int indx[84+1000];
matthiasm@1 1192 int mode;
matthiasm@10 1193 int dictsize = 256*signifIndex.size();
matthiasm@10 1194 // cerr << "dictsize is " << dictsize << "and values size" << f3.values.size()<< endl;
matthiasm@10 1195 float *curr_dict = new float[dictsize];
matthiasm@10 1196 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
matthiasm@10 1197 for (unsigned iBin = 0; iBin < 256; iBin++) {
matthiasm@10 1198 curr_dict[iNote * 256 + iBin] = 1.0 * m_dict[signifIndex[iNote] * 256 + iBin];
matthiasm@10 1199 }
matthiasm@3 1200 }
matthiasm@10 1201 nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode);
matthiasm@10 1202 delete [] curr_dict;
matthiasm@10 1203 for (unsigned iNote = 0; iNote < signifIndex.size(); ++iNote) {
matthiasm@10 1204 f3.values[signifIndex[iNote]] = x[iNote];
matthiasm@3 1205 // cerr << mode << endl;
matthiasm@10 1206 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
matthiasm@10 1207 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
matthiasm@3 1208 }
matthiasm@1 1209 }
matthiasm@1 1210 }
matthiasm@13 1211
matthiasm@10 1212
matthiasm@12 1213
matthiasm@13 1214
matthiasm@12 1215 f4.values = chroma;
matthiasm@1 1216 f5.values = basschroma;
matthiasm@1 1217 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
matthiasm@1 1218 f6.values = chroma;
matthiasm@1 1219
matthiasm@13 1220 if (m_doNormalizeChroma > 0) {
matthiasm@13 1221 vector<float> chromanorm = vector<float>(3,0);
matthiasm@13 1222 switch (int(m_doNormalizeChroma)) {
matthiasm@13 1223 case 0: // should never end up here
matthiasm@13 1224 break;
matthiasm@13 1225 case 1:
matthiasm@13 1226 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
matthiasm@13 1227 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
matthiasm@13 1228 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
matthiasm@13 1229 break;
matthiasm@13 1230 case 2:
matthiasm@13 1231 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
matthiasm@13 1232 chromanorm[0] += *it;
matthiasm@13 1233 }
matthiasm@13 1234 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
matthiasm@13 1235 chromanorm[1] += *it;
matthiasm@13 1236 }
matthiasm@13 1237 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
matthiasm@13 1238 chromanorm[2] += *it;
matthiasm@13 1239 }
matthiasm@13 1240 break;
matthiasm@13 1241 case 3:
matthiasm@13 1242 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
matthiasm@13 1243 chromanorm[0] += pow(*it,2);
matthiasm@13 1244 }
matthiasm@13 1245 chromanorm[0] = sqrt(chromanorm[0]);
matthiasm@13 1246 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
matthiasm@13 1247 chromanorm[1] += pow(*it,2);
matthiasm@13 1248 }
matthiasm@13 1249 chromanorm[1] = sqrt(chromanorm[1]);
matthiasm@13 1250 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
matthiasm@13 1251 chromanorm[2] += pow(*it,2);
matthiasm@13 1252 }
matthiasm@13 1253 chromanorm[2] = sqrt(chromanorm[2]);
matthiasm@13 1254 break;
matthiasm@13 1255 }
matthiasm@13 1256 if (chromanorm[0] > 0) {
matthiasm@13 1257 for (int i = 0; i < f4.values.size(); i++) {
matthiasm@13 1258 f4.values[i] /= chromanorm[0];
matthiasm@13 1259 }
matthiasm@13 1260 }
matthiasm@13 1261 if (chromanorm[1] > 0) {
matthiasm@13 1262 for (int i = 0; i < f5.values.size(); i++) {
matthiasm@13 1263 f5.values[i] /= chromanorm[1];
matthiasm@13 1264 }
matthiasm@13 1265 }
matthiasm@13 1266 if (chromanorm[2] > 0) {
matthiasm@13 1267 for (int i = 0; i < f6.values.size(); i++) {
matthiasm@13 1268 f6.values[i] /= chromanorm[2];
matthiasm@13 1269 }
matthiasm@13 1270 }
matthiasm@13 1271
matthiasm@13 1272 }
matthiasm@13 1273
matthiasm@1 1274 // local chord estimation
matthiasm@1 1275 vector<float> currentChordSalience;
matthiasm@1 1276 float tempchordvalue = 0;
matthiasm@1 1277 float sumchordvalue = 0;
matthiasm@9 1278
matthiasm@1 1279 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1280 tempchordvalue = 0;
matthiasm@1 1281 for (int iBin = 0; iBin < 12; iBin++) {
matthiasm@9 1282 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1283 }
matthiasm@1 1284 for (int iBin = 12; iBin < 24; iBin++) {
matthiasm@9 1285 tempchordvalue += m_chorddict[24 * iChord + iBin] * chroma[iBin];
matthiasm@1 1286 }
matthiasm@1 1287 sumchordvalue+=tempchordvalue;
matthiasm@1 1288 currentChordSalience.push_back(tempchordvalue);
matthiasm@1 1289 }
matthiasm@1 1290 for (int iChord = 0; iChord < nChord; iChord++) {
matthiasm@1 1291 currentChordSalience[iChord] /= sumchordvalue;
matthiasm@1 1292 }
matthiasm@1 1293 chordogram.push_back(currentChordSalience);
matthiasm@1 1294
matthiasm@1 1295 fsOut[3].push_back(f3);
matthiasm@1 1296 fsOut[4].push_back(f4);
matthiasm@1 1297 fsOut[5].push_back(f5);
matthiasm@1 1298 fsOut[6].push_back(f6);
matthiasm@1 1299 count++;
matthiasm@1 1300 }
matthiasm@13 1301 cerr << "done." << endl;
matthiasm@13 1302
matthiasm@10 1303
matthiasm@3 1304 /* Simple chord estimation
matthiasm@3 1305 I just take the local chord estimates ("currentChordSalience") and average them over time, then
matthiasm@3 1306 take the maximum. Very simple, don't do this at home...
matthiasm@3 1307 */
matthiasm@13 1308 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
matthiasm@3 1309 count = 0;
matthiasm@3 1310 int halfwindowlength = m_inputSampleRate / m_stepSize;
matthiasm@3 1311 vector<int> chordSequence;
matthiasm@3 1312 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
matthiasm@3 1313 vector<int> temp = vector<int>(nChord,0);
matthiasm@3 1314 scoreChordogram.push_back(temp);
matthiasm@3 1315 }
matthiasm@4 1316 for (FeatureList::iterator it = fsOut[6].begin(); it < fsOut[6].end()-2*halfwindowlength-1; ++it) {
matthiasm@3 1317 int startIndex = count + 1;
matthiasm@3 1318 int endIndex = count + 2 * halfwindowlength;
matthiasm@10 1319
matthiasm@10 1320 float chordThreshold = 2.5/nChord;//*(2*halfwindowlength+1);
matthiasm@10 1321
matthiasm@10 1322 vector<int> chordCandidates;
matthiasm@10 1323 for (unsigned iChord = 0; iChord < nChord-1; iChord++) {
matthiasm@10 1324 // float currsum = 0;
matthiasm@10 1325 // for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@10 1326 // currsum += chordogram[iFrame][iChord];
matthiasm@10 1327 // }
matthiasm@10 1328 // if (currsum > chordThreshold) chordCandidates.push_back(iChord);
matthiasm@10 1329 for (unsigned iFrame = startIndex; iFrame < endIndex; ++iFrame) {
matthiasm@10 1330 if (chordogram[iFrame][iChord] > chordThreshold) {
matthiasm@10 1331 chordCandidates.push_back(iChord);
matthiasm@10 1332 break;
matthiasm@10 1333 }
matthiasm@10 1334 }
matthiasm@10 1335 }
matthiasm@10 1336 chordCandidates.push_back(nChord-1);
matthiasm@10 1337 // cerr << chordCandidates.size() << endl;
matthiasm@10 1338
matthiasm@10 1339 float maxval = 0; // will be the value of the most salient *chord change* in this frame
matthiasm@4 1340 float maxindex = 0; //... and the index thereof
matthiasm@10 1341 unsigned bestchordL = nChord-1; // index of the best "left" chord
matthiasm@10 1342 unsigned bestchordR = nChord-1; // index of the best "right" chord
matthiasm@10 1343
matthiasm@4 1344 for (int iWF = 1; iWF < 2*halfwindowlength; ++iWF) {
matthiasm@3 1345 // now find the max values on both sides of iWF
matthiasm@3 1346 // left side:
matthiasm@3 1347 float maxL = 0;
matthiasm@3 1348 unsigned maxindL = nChord-1;
matthiasm@10 1349 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@10 1350 unsigned iChord = chordCandidates[kChord];
matthiasm@3 1351 float currsum = 0;
matthiasm@3 1352 for (unsigned iFrame = 0; iFrame < iWF-1; ++iFrame) {
matthiasm@3 1353 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1354 }
matthiasm@3 1355 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1356 if (currsum > maxL) {
matthiasm@3 1357 maxL = currsum;
matthiasm@3 1358 maxindL = iChord;
matthiasm@3 1359 }
matthiasm@3 1360 }
matthiasm@3 1361 // right side:
matthiasm@3 1362 float maxR = 0;
matthiasm@3 1363 unsigned maxindR = nChord-1;
matthiasm@10 1364 for (unsigned kChord = 0; kChord < chordCandidates.size(); kChord++) {
matthiasm@10 1365 unsigned iChord = chordCandidates[kChord];
matthiasm@3 1366 float currsum = 0;
matthiasm@3 1367 for (unsigned iFrame = iWF-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1368 currsum += chordogram[count+iFrame][iChord];
matthiasm@3 1369 }
matthiasm@3 1370 if (iChord == nChord-1) currsum *= 0.8;
matthiasm@3 1371 if (currsum > maxR) {
matthiasm@3 1372 maxR = currsum;
matthiasm@3 1373 maxindR = iChord;
matthiasm@3 1374 }
matthiasm@3 1375 }
matthiasm@3 1376 if (maxL+maxR > maxval) {
matthiasm@3 1377 maxval = maxL+maxR;
matthiasm@3 1378 maxindex = iWF;
matthiasm@3 1379 bestchordL = maxindL;
matthiasm@3 1380 bestchordR = maxindR;
matthiasm@3 1381 }
matthiasm@3 1382
matthiasm@3 1383 }
matthiasm@3 1384 // cerr << "maxindex: " << maxindex << ", bestchordR is " << bestchordR << ", of frame " << count << endl;
matthiasm@3 1385 // add a score to every chord-frame-point that was part of a maximum
matthiasm@3 1386 for (unsigned iFrame = 0; iFrame < maxindex-1; ++iFrame) {
matthiasm@3 1387 scoreChordogram[iFrame+count][bestchordL]++;
matthiasm@3 1388 }
matthiasm@3 1389 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
matthiasm@3 1390 scoreChordogram[iFrame+count][bestchordR]++;
matthiasm@3 1391 }
matthiasm@3 1392 count++;
matthiasm@3 1393 }
matthiasm@13 1394 // cerr << "******* agent finished *******" << endl;
matthiasm@3 1395 count = 0;
matthiasm@3 1396 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1397 float maxval = 0; // will be the value of the most salient chord in this frame
matthiasm@3 1398 float maxindex = 0; //... and the index thereof
matthiasm@3 1399 for (unsigned iChord = 0; iChord < nChord; iChord++) {
matthiasm@3 1400 if (scoreChordogram[count][iChord] > maxval) {
matthiasm@3 1401 maxval = scoreChordogram[count][iChord];
matthiasm@3 1402 maxindex = iChord;
matthiasm@4 1403 // cerr << iChord << endl;
matthiasm@3 1404 }
matthiasm@3 1405 }
matthiasm@3 1406 chordSequence.push_back(maxindex);
matthiasm@4 1407 // cerr << "before modefilter, maxindex: " << maxindex << endl;
matthiasm@3 1408 count++;
matthiasm@3 1409 }
matthiasm@13 1410 // cerr << "******* mode filter done *******" << endl;
matthiasm@10 1411
matthiasm@3 1412
matthiasm@3 1413 // mode filter on chordSequence
matthiasm@3 1414 count = 0;
matthiasm@12 1415 string oldChord = "";
matthiasm@3 1416 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
matthiasm@3 1417 Feature f6 = *it;
matthiasm@3 1418 Feature f7; // chord estimate
matthiasm@3 1419 f7.hasTimestamp = true;
matthiasm@3 1420 f7.timestamp = f6.timestamp;
matthiasm@3 1421 vector<int> chordCount = vector<int>(nChord,0);
matthiasm@3 1422 int maxChordCount = 0;
matthiasm@3 1423 int maxChordIndex = nChord-1;
matthiasm@12 1424 string maxChord;
matthiasm@4 1425 int startIndex = max(count - halfwindowlength/2,0);
matthiasm@4 1426 int endIndex = min(int(chordogram.size()), count + halfwindowlength/2);
matthiasm@4 1427 for (int i = startIndex; i < endIndex; i++) {
matthiasm@4 1428 chordCount[chordSequence[i]]++;
matthiasm@4 1429 if (chordCount[chordSequence[i]] > maxChordCount) {
matthiasm@7 1430 // cerr << "start index " << startIndex << endl;
matthiasm@4 1431 maxChordCount++;
matthiasm@4 1432 maxChordIndex = chordSequence[i];
matthiasm@12 1433 maxChord = m_chordnames[maxChordIndex];
matthiasm@4 1434 }
matthiasm@4 1435 }
matthiasm@4 1436 // chordSequence[count] = maxChordIndex;
matthiasm@7 1437 // cerr << maxChordIndex << endl;
matthiasm@12 1438 if (oldChord != maxChord) {
matthiasm@12 1439 oldChord = maxChord;
matthiasm@3 1440
matthiasm@9 1441 // char buffer1 [50];
matthiasm@9 1442 // if (maxChordIndex < nChord - 1) {
matthiasm@9 1443 // sprintf(buffer1, "%s%s", notenames[maxChordIndex % 12 + 12], chordtypes[maxChordIndex]);
matthiasm@9 1444 // } else {
matthiasm@9 1445 // sprintf(buffer1, "N");
matthiasm@9 1446 // }
matthiasm@9 1447 // f7.label = buffer1;
matthiasm@9 1448 f7.label = m_chordnames[maxChordIndex];
matthiasm@3 1449 fsOut[7].push_back(f7);
matthiasm@3 1450 }
matthiasm@3 1451 count++;
matthiasm@3 1452 }
matthiasm@13 1453 cerr << "done." << endl;
matthiasm@0 1454 // // musicity
matthiasm@0 1455 // count = 0;
matthiasm@0 1456 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
matthiasm@0 1457 // vector<float> musicityValue;
matthiasm@0 1458 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1459 // Feature f4 = *it;
matthiasm@0 1460 //
matthiasm@0 1461 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1462 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1463 // float chromasum = 0;
matthiasm@0 1464 // float diffsum = 0;
matthiasm@0 1465 // for (int k = 0; k < 12; k++) {
matthiasm@0 1466 // for (int i = startIndex + 1; i < endIndex; i++) {
matthiasm@0 1467 // chromasum += pow(fsOut[4][i].values[k],2);
matthiasm@0 1468 // diffsum += abs(fsOut[4][i-1].values[k] - fsOut[4][i].values[k]);
matthiasm@0 1469 // }
matthiasm@0 1470 // }
matthiasm@0 1471 // diffsum /= chromasum;
matthiasm@0 1472 // musicityValue.push_back(diffsum);
matthiasm@0 1473 // count++;
matthiasm@0 1474 // }
matthiasm@0 1475 //
matthiasm@0 1476 // float musicityThreshold = 0.44;
matthiasm@0 1477 // if (m_stepSize == 4096) {
matthiasm@0 1478 // musicityThreshold = 0.74;
matthiasm@0 1479 // }
matthiasm@0 1480 // if (m_stepSize == 4410) {
matthiasm@0 1481 // musicityThreshold = 0.77;
matthiasm@0 1482 // }
matthiasm@0 1483 //
matthiasm@0 1484 // count = 0;
matthiasm@0 1485 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {
matthiasm@0 1486 // Feature f4 = *it;
matthiasm@0 1487 // Feature f8; // musicity
matthiasm@0 1488 // Feature f9; // musicity segmenter
matthiasm@0 1489 //
matthiasm@0 1490 // f8.hasTimestamp = true;
matthiasm@0 1491 // f8.timestamp = f4.timestamp;
matthiasm@0 1492 // f9.hasTimestamp = true;
matthiasm@0 1493 // f9.timestamp = f4.timestamp;
matthiasm@0 1494 //
matthiasm@0 1495 // int startIndex = max(count - musicitykernelwidth/2,0);
matthiasm@0 1496 // int endIndex = min(int(chordogram.size()), startIndex + musicitykernelwidth - 1);
matthiasm@0 1497 // int musicityCount = 0;
matthiasm@0 1498 // for (int i = startIndex; i <= endIndex; i++) {
matthiasm@0 1499 // if (musicityValue[i] > musicityThreshold) musicityCount++;
matthiasm@0 1500 // }
matthiasm@0 1501 // bool isSpeech = (2 * musicityCount > endIndex - startIndex + 1);
matthiasm@0 1502 //
matthiasm@0 1503 // if (isSpeech) {
matthiasm@0 1504 // if (oldlabeltype != 2) {
matthiasm@0 1505 // f9.label = "Speech";
matthiasm@0 1506 // fsOut[9].push_back(f9);
matthiasm@0 1507 // oldlabeltype = 2;
matthiasm@0 1508 // }
matthiasm@0 1509 // } else {
matthiasm@0 1510 // if (oldlabeltype != 1) {
matthiasm@0 1511 // f9.label = "Music";
matthiasm@0 1512 // fsOut[9].push_back(f9);
matthiasm@0 1513 // oldlabeltype = 1;
matthiasm@0 1514 // }
matthiasm@0 1515 // }
matthiasm@0 1516 // f8.values.push_back(musicityValue[count]);
matthiasm@0 1517 // fsOut[8].push_back(f8);
matthiasm@0 1518 // count++;
matthiasm@0 1519 // }
matthiasm@0 1520 return fsOut;
matthiasm@0 1521
matthiasm@0 1522 }
matthiasm@0 1523