comparison NNLSChroma.cpp @ 13:9ae90fa5fa74 matthiasm-plugin

NNLS is now taken from a file without gpl. more chroma normalisation options.
author matthiasm
date Wed, 16 Jun 2010 10:16:13 +0000
parents 54f28d8ac098
children 75fb80542cd2
comparison
equal deleted inserted replaced
12:54f28d8ac098 13:9ae90fa5fa74
431 string 431 string
432 NNLSChroma::getDescription() const 432 NNLSChroma::getDescription() const
433 { 433 {
434 // Return something helpful here! 434 // Return something helpful here!
435 if (debug_on) cerr << "--> getDescription" << endl; 435 if (debug_on) cerr << "--> getDescription" << endl;
436 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum (LAS) of the DFT: the LAS itself, a standard-tuned version thereof (the local and global tuning estimates can are also be output), an approximate transcription to semitone activation using non-linear least squares (NNLS). Furthermore chroma features and a simple chord estimate derived from this NNLS semitone transcription."; 436 return "This plugin provides a number of features derived from a log-frequency amplitude spectrum of the DFT: some variants of the log-frequency spectrum, including a semitone spectrum derived from approximate transcription using the NNLS algorithm; based on this semitone spectrum, chroma features and a simple chord estimate.";
437 } 437 }
438 438
439 string 439 string
440 NNLSChroma::getMaker() const 440 NNLSChroma::getMaker() const
441 { 441 {
567 d4.identifier = "chromanormalize"; 567 d4.identifier = "chromanormalize";
568 d4.name = "chroma normalization"; 568 d4.name = "chroma normalization";
569 d4.description = "How shall the chroma vector be normalized?"; 569 d4.description = "How shall the chroma vector be normalized?";
570 d4.unit = ""; 570 d4.unit = "";
571 d4.minValue = 0; 571 d4.minValue = 0;
572 d4.maxValue = 1; 572 d4.maxValue = 3;
573 d4.defaultValue = 0; 573 d4.defaultValue = 0;
574 d4.isQuantized = true; 574 d4.isQuantized = true;
575 d4.valueNames.push_back("no normalization"); 575 d4.valueNames.push_back("none");
576 d4.valueNames.push_back("maximum normalization"); 576 d4.valueNames.push_back("maximum norm");
577 d4.valueNames.push_back("L1 norm");
578 d4.valueNames.push_back("L2 norm");
577 d4.quantizeStep = 1.0; 579 d4.quantizeStep = 1.0;
578 list.push_back(d4); 580 list.push_back(d4);
579 581
580 return list; 582 return list;
581 } 583 }
1043 fsOut[0].push_back(f0); 1045 fsOut[0].push_back(f0);
1044 1046
1045 /** Tune Log-Frequency Spectrogram 1047 /** Tune Log-Frequency Spectrogram
1046 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to 1048 calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to
1047 perform linear interpolation on the existing log-frequency spectrogram (kinda f1). 1049 perform linear interpolation on the existing log-frequency spectrogram (kinda f1).
1048 **/ 1050 **/
1049 1051 cerr << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... ";
1052
1050 float tempValue = 0; 1053 float tempValue = 0;
1051 float dbThreshold = 0; // relative to the background spectrum 1054 float dbThreshold = 0; // relative to the background spectrum
1052 float thresh = pow(10,dbThreshold/20); 1055 float thresh = pow(10,dbThreshold/20);
1053 // cerr << "tune local ? " << m_tuneLocal << endl; 1056 // cerr << "tune local ? " << m_tuneLocal << endl;
1054 int count = 0; 1057 int count = 0;
1092 } 1095 }
1093 } 1096 }
1094 fsOut[2].push_back(f2); 1097 fsOut[2].push_back(f2);
1095 count++; 1098 count++;
1096 } 1099 }
1100 cerr << "done." << endl;
1097 1101
1098 /** Semitone spectrum and chromagrams 1102 /** Semitone spectrum and chromagrams
1099 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum 1103 Semitone-spaced log-frequency spectrum derived from the tuned log-freq spectrum above. the spectrum
1100 is inferred using a non-negative least squares algorithm. 1104 is inferred using a non-negative least squares algorithm.
1101 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means 1105 Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means
1102 bass and treble stacked onto each other). 1106 bass and treble stacked onto each other).
1103 **/ 1107 **/
1104 // taucs_ccs_matrix* A_original_ordering = taucs_construct_sorted_ccs_matrix(nnlsdict06, nnls_m, nnls_n); 1108 if (m_dictID == 1) {
1109 cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... ";
1110 } else {
1111 cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... ";
1112 }
1113
1105 1114
1106 vector<vector<float> > chordogram; 1115 vector<vector<float> > chordogram;
1107 vector<vector<int> > scoreChordogram; 1116 vector<vector<int> > scoreChordogram;
1108 vector<float> oldchroma = vector<float>(12,0); 1117 vector<float> oldchroma = vector<float>(12,0);
1109 vector<float> oldbasschroma = vector<float>(12,0); 1118 vector<float> oldbasschroma = vector<float>(12,0);
1197 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; 1206 chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]];
1198 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; 1207 basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]];
1199 } 1208 }
1200 } 1209 }
1201 } 1210 }
1211
1202 1212
1203 1213
1204 if (m_doNormalizeChroma > 0) { 1214
1205 float chromamax = *max_element(chroma.begin(), chroma.end());
1206 for (int i = 0; i < chroma.size(); i++) {
1207 chroma[i] /= chromamax;
1208 }
1209 }
1210 f4.values = chroma; 1215 f4.values = chroma;
1211 f5.values = basschroma; 1216 f5.values = basschroma;
1212 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas 1217 chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); // just stack the both chromas
1213 f6.values = chroma; 1218 f6.values = chroma;
1214 1219
1220 if (m_doNormalizeChroma > 0) {
1221 vector<float> chromanorm = vector<float>(3,0);
1222 switch (int(m_doNormalizeChroma)) {
1223 case 0: // should never end up here
1224 break;
1225 case 1:
1226 chromanorm[0] = *max_element(f4.values.begin(), f4.values.end());
1227 chromanorm[1] = *max_element(f5.values.begin(), f5.values.end());
1228 chromanorm[2] = max(chromanorm[0], chromanorm[1]);
1229 break;
1230 case 2:
1231 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
1232 chromanorm[0] += *it;
1233 }
1234 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
1235 chromanorm[1] += *it;
1236 }
1237 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
1238 chromanorm[2] += *it;
1239 }
1240 break;
1241 case 3:
1242 for (vector<float>::iterator it = f4.values.begin(); it != f4.values.end(); ++it) {
1243 chromanorm[0] += pow(*it,2);
1244 }
1245 chromanorm[0] = sqrt(chromanorm[0]);
1246 for (vector<float>::iterator it = f5.values.begin(); it != f5.values.end(); ++it) {
1247 chromanorm[1] += pow(*it,2);
1248 }
1249 chromanorm[1] = sqrt(chromanorm[1]);
1250 for (vector<float>::iterator it = f6.values.begin(); it != f6.values.end(); ++it) {
1251 chromanorm[2] += pow(*it,2);
1252 }
1253 chromanorm[2] = sqrt(chromanorm[2]);
1254 break;
1255 }
1256 if (chromanorm[0] > 0) {
1257 for (int i = 0; i < f4.values.size(); i++) {
1258 f4.values[i] /= chromanorm[0];
1259 }
1260 }
1261 if (chromanorm[1] > 0) {
1262 for (int i = 0; i < f5.values.size(); i++) {
1263 f5.values[i] /= chromanorm[1];
1264 }
1265 }
1266 if (chromanorm[2] > 0) {
1267 for (int i = 0; i < f6.values.size(); i++) {
1268 f6.values[i] /= chromanorm[2];
1269 }
1270 }
1271
1272 }
1273
1215 // local chord estimation 1274 // local chord estimation
1216 vector<float> currentChordSalience; 1275 vector<float> currentChordSalience;
1217 float tempchordvalue = 0; 1276 float tempchordvalue = 0;
1218 float sumchordvalue = 0; 1277 float sumchordvalue = 0;
1219 1278
1237 fsOut[4].push_back(f4); 1296 fsOut[4].push_back(f4);
1238 fsOut[5].push_back(f5); 1297 fsOut[5].push_back(f5);
1239 fsOut[6].push_back(f6); 1298 fsOut[6].push_back(f6);
1240 count++; 1299 count++;
1241 } 1300 }
1242 cerr << "******* NNLS done *******" << endl; 1301 cerr << "done." << endl;
1302
1243 1303
1244 /* Simple chord estimation 1304 /* Simple chord estimation
1245 I just take the local chord estimates ("currentChordSalience") and average them over time, then 1305 I just take the local chord estimates ("currentChordSalience") and average them over time, then
1246 take the maximum. Very simple, don't do this at home... 1306 take the maximum. Very simple, don't do this at home...
1247 */ 1307 */
1308 cerr << "[NNLS Chroma Plugin] Chord Estimation ... ";
1248 count = 0; 1309 count = 0;
1249 int halfwindowlength = m_inputSampleRate / m_stepSize; 1310 int halfwindowlength = m_inputSampleRate / m_stepSize;
1250 vector<int> chordSequence; 1311 vector<int> chordSequence;
1251 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram 1312 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { // initialise the score chordogram
1252 vector<int> temp = vector<int>(nChord,0); 1313 vector<int> temp = vector<int>(nChord,0);
1328 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) { 1389 for (unsigned iFrame = maxindex-1; iFrame < 2*halfwindowlength; ++iFrame) {
1329 scoreChordogram[iFrame+count][bestchordR]++; 1390 scoreChordogram[iFrame+count][bestchordR]++;
1330 } 1391 }
1331 count++; 1392 count++;
1332 } 1393 }
1333 cerr << "******* agent finished *******" << endl; 1394 // cerr << "******* agent finished *******" << endl;
1334 count = 0; 1395 count = 0;
1335 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) { 1396 for (FeatureList::iterator it = fsOut[6].begin(); it != fsOut[6].end(); ++it) {
1336 float maxval = 0; // will be the value of the most salient chord in this frame 1397 float maxval = 0; // will be the value of the most salient chord in this frame
1337 float maxindex = 0; //... and the index thereof 1398 float maxindex = 0; //... and the index thereof
1338 for (unsigned iChord = 0; iChord < nChord; iChord++) { 1399 for (unsigned iChord = 0; iChord < nChord; iChord++) {
1344 } 1405 }
1345 chordSequence.push_back(maxindex); 1406 chordSequence.push_back(maxindex);
1346 // cerr << "before modefilter, maxindex: " << maxindex << endl; 1407 // cerr << "before modefilter, maxindex: " << maxindex << endl;
1347 count++; 1408 count++;
1348 } 1409 }
1349 cerr << "******* mode filter done *******" << endl; 1410 // cerr << "******* mode filter done *******" << endl;
1350 1411
1351 1412
1352 // mode filter on chordSequence 1413 // mode filter on chordSequence
1353 count = 0; 1414 count = 0;
1354 string oldChord = ""; 1415 string oldChord = "";
1387 f7.label = m_chordnames[maxChordIndex]; 1448 f7.label = m_chordnames[maxChordIndex];
1388 fsOut[7].push_back(f7); 1449 fsOut[7].push_back(f7);
1389 } 1450 }
1390 count++; 1451 count++;
1391 } 1452 }
1453 cerr << "done." << endl;
1392 // // musicity 1454 // // musicity
1393 // count = 0; 1455 // count = 0;
1394 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2 1456 // int oldlabeltype = 0; // start value is 0, music is 1, speech is 2
1395 // vector<float> musicityValue; 1457 // vector<float> musicityValue;
1396 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) { 1458 // for (FeatureList::iterator it = fsOut[4].begin(); it != fsOut[4].end(); ++it) {