comparison plugins/SimilarityPlugin.cpp @ 45:5d7ce1d87301

* Add MFCC plugin * Add means output to Chromagram plugin * Update similarity plugin for MFCC changes
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 18 Jan 2008 13:30:56 +0000
parents 1dc00e4dbae6
children 26a2e341d358
comparison
equal deleted inserted replaced
44:1dc00e4dbae6 45:5d7ce1d87301
61 } 61 }
62 62
63 string 63 string
64 SimilarityPlugin::getMaker() const 64 SimilarityPlugin::getMaker() const
65 { 65 {
66 return "Chris Cannam, Queen Mary, University of London"; 66 return "Mark Levy and Chris Cannam, Queen Mary, University of London";
67 } 67 }
68 68
69 int 69 int
70 SimilarityPlugin::getPluginVersion() const 70 SimilarityPlugin::getPluginVersion() const
71 { 71 {
86 86
87 size_t 87 size_t
88 SimilarityPlugin::getMaxChannelCount() const 88 SimilarityPlugin::getMaxChannelCount() const
89 { 89 {
90 return 1024; 90 return 1024;
91 // return 1;
92 } 91 }
93 92
94 bool 93 bool
95 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) 94 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize)
96 { 95 {
127 126
128 if (m_type == TypeMFCC) { 127 if (m_type == TypeMFCC) {
129 128
130 m_featureColumnSize = 20; 129 m_featureColumnSize = 20;
131 130
132 MFCCConfig config; 131 MFCCConfig config(lrintf(m_inputSampleRate) / decimationFactor);
133 config.FS = lrintf(m_inputSampleRate) / decimationFactor;
134 config.fftsize = 2048; 132 config.fftsize = 2048;
135 config.nceps = m_featureColumnSize - 1; 133 config.nceps = m_featureColumnSize - 1;
136 config.want_c0 = true; 134 config.want_c0 = true;
135 config.logpower = 1;
137 m_mfcc = new MFCC(config); 136 m_mfcc = new MFCC(config);
138 m_fftSize = m_mfcc->getfftlength(); 137 m_fftSize = m_mfcc->getfftlength();
139 138
140 std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl; 139 std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl;
141 140
190 189
191 size_t 190 size_t
192 SimilarityPlugin::getPreferredStepSize() const 191 SimilarityPlugin::getPreferredStepSize() const
193 { 192 {
194 if (m_blockSize == 0) calculateBlockSize(); 193 if (m_blockSize == 0) calculateBlockSize();
195 if (m_type == TypeChroma) { 194 return m_blockSize/2;
196 return m_blockSize/2;
197 } else {
198 // for compatibility with old-skool Soundbite, which doesn't
199 // overlap blocks on input
200 return m_blockSize;
201 }
202 } 195 }
203 196
204 size_t 197 size_t
205 SimilarityPlugin::getPreferredBlockSize() const 198 SimilarityPlugin::getPreferredBlockSize() const
206 { 199 {
235 ParameterList list; 228 ParameterList list;
236 229
237 ParameterDescriptor desc; 230 ParameterDescriptor desc;
238 desc.identifier = "featureType"; 231 desc.identifier = "featureType";
239 desc.name = "Feature Type"; 232 desc.name = "Feature Type";
240 desc.description = "";//!!! 233 desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma.";
241 desc.unit = ""; 234 desc.unit = "";
242 desc.minValue = 0; 235 desc.minValue = 0;
243 desc.maxValue = 1; 236 desc.maxValue = 1;
244 desc.defaultValue = 0; 237 desc.defaultValue = 0;
245 desc.isQuantized = true; 238 desc.isQuantized = true;
397 if (m_decimator) { 390 if (m_decimator) {
398 m_decimator->process(dblbuf, decbuf); 391 m_decimator->process(dblbuf, decbuf);
399 } 392 }
400 393
401 if (m_type == TypeMFCC) { 394 if (m_type == TypeMFCC) {
402 m_mfcc->process(m_fftSize, decbuf, raw); 395 m_mfcc->process(decbuf, raw);
403 } else if (m_type == TypeChroma) { 396 } else if (m_type == TypeChroma) {
404 raw = m_chromagram->process(decbuf); 397 raw = m_chromagram->process(decbuf);
405 } 398 }
406 399
407 FeatureColumn mf(m_featureColumnSize); 400 FeatureColumn mf(m_featureColumnSize);
573 feature.values.clear(); 566 feature.values.clear();
574 feature.timestamp = Vamp::RealTime(0, 0); 567 feature.timestamp = Vamp::RealTime(0, 0);
575 568
576 for (std::map<double, int>::iterator i = sorted.begin(); 569 for (std::map<double, int>::iterator i = sorted.begin();
577 i != sorted.end(); ++i) { 570 i != sorted.end(); ++i) {
578 feature.values.push_back(i->second); 571 feature.values.push_back(i->second + 1);
579 } 572 }
580 573
581 returnFeatures[m_sortedVectorOutput].push_back(feature); 574 returnFeatures[m_sortedVectorOutput].push_back(feature);
582 575
583 feature.label = "Ordered distances of channels from first channel"; 576 feature.label = "Ordered distances of channels from first channel";