Mercurial > hg > qm-vamp-plugins
comparison plugins/SimilarityPlugin.cpp @ 45:5d7ce1d87301
* Add MFCC plugin
* Add means output to Chromagram plugin
* Update similarity plugin for MFCC changes
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 18 Jan 2008 13:30:56 +0000 |
parents | 1dc00e4dbae6 |
children | 26a2e341d358 |
comparison
equal
deleted
inserted
replaced
44:1dc00e4dbae6 | 45:5d7ce1d87301 |
---|---|
61 } | 61 } |
62 | 62 |
63 string | 63 string |
64 SimilarityPlugin::getMaker() const | 64 SimilarityPlugin::getMaker() const |
65 { | 65 { |
66 return "Chris Cannam, Queen Mary, University of London"; | 66 return "Mark Levy and Chris Cannam, Queen Mary, University of London"; |
67 } | 67 } |
68 | 68 |
69 int | 69 int |
70 SimilarityPlugin::getPluginVersion() const | 70 SimilarityPlugin::getPluginVersion() const |
71 { | 71 { |
86 | 86 |
87 size_t | 87 size_t |
88 SimilarityPlugin::getMaxChannelCount() const | 88 SimilarityPlugin::getMaxChannelCount() const |
89 { | 89 { |
90 return 1024; | 90 return 1024; |
91 // return 1; | |
92 } | 91 } |
93 | 92 |
94 bool | 93 bool |
95 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) | 94 SimilarityPlugin::initialise(size_t channels, size_t stepSize, size_t blockSize) |
96 { | 95 { |
127 | 126 |
128 if (m_type == TypeMFCC) { | 127 if (m_type == TypeMFCC) { |
129 | 128 |
130 m_featureColumnSize = 20; | 129 m_featureColumnSize = 20; |
131 | 130 |
132 MFCCConfig config; | 131 MFCCConfig config(lrintf(m_inputSampleRate) / decimationFactor); |
133 config.FS = lrintf(m_inputSampleRate) / decimationFactor; | |
134 config.fftsize = 2048; | 132 config.fftsize = 2048; |
135 config.nceps = m_featureColumnSize - 1; | 133 config.nceps = m_featureColumnSize - 1; |
136 config.want_c0 = true; | 134 config.want_c0 = true; |
135 config.logpower = 1; | |
137 m_mfcc = new MFCC(config); | 136 m_mfcc = new MFCC(config); |
138 m_fftSize = m_mfcc->getfftlength(); | 137 m_fftSize = m_mfcc->getfftlength(); |
139 | 138 |
140 std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl; | 139 std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl; |
141 | 140 |
190 | 189 |
191 size_t | 190 size_t |
192 SimilarityPlugin::getPreferredStepSize() const | 191 SimilarityPlugin::getPreferredStepSize() const |
193 { | 192 { |
194 if (m_blockSize == 0) calculateBlockSize(); | 193 if (m_blockSize == 0) calculateBlockSize(); |
195 if (m_type == TypeChroma) { | 194 return m_blockSize/2; |
196 return m_blockSize/2; | |
197 } else { | |
198 // for compatibility with old-skool Soundbite, which doesn't | |
199 // overlap blocks on input | |
200 return m_blockSize; | |
201 } | |
202 } | 195 } |
203 | 196 |
204 size_t | 197 size_t |
205 SimilarityPlugin::getPreferredBlockSize() const | 198 SimilarityPlugin::getPreferredBlockSize() const |
206 { | 199 { |
235 ParameterList list; | 228 ParameterList list; |
236 | 229 |
237 ParameterDescriptor desc; | 230 ParameterDescriptor desc; |
238 desc.identifier = "featureType"; | 231 desc.identifier = "featureType"; |
239 desc.name = "Feature Type"; | 232 desc.name = "Feature Type"; |
240 desc.description = "";//!!! | 233 desc.description = "Audio feature used for similarity measure. Timbral: use the first 20 MFCCs (19 plus C0). Chromatic: use 12 bin-per-octave chroma."; |
241 desc.unit = ""; | 234 desc.unit = ""; |
242 desc.minValue = 0; | 235 desc.minValue = 0; |
243 desc.maxValue = 1; | 236 desc.maxValue = 1; |
244 desc.defaultValue = 0; | 237 desc.defaultValue = 0; |
245 desc.isQuantized = true; | 238 desc.isQuantized = true; |
397 if (m_decimator) { | 390 if (m_decimator) { |
398 m_decimator->process(dblbuf, decbuf); | 391 m_decimator->process(dblbuf, decbuf); |
399 } | 392 } |
400 | 393 |
401 if (m_type == TypeMFCC) { | 394 if (m_type == TypeMFCC) { |
402 m_mfcc->process(m_fftSize, decbuf, raw); | 395 m_mfcc->process(decbuf, raw); |
403 } else if (m_type == TypeChroma) { | 396 } else if (m_type == TypeChroma) { |
404 raw = m_chromagram->process(decbuf); | 397 raw = m_chromagram->process(decbuf); |
405 } | 398 } |
406 | 399 |
407 FeatureColumn mf(m_featureColumnSize); | 400 FeatureColumn mf(m_featureColumnSize); |
573 feature.values.clear(); | 566 feature.values.clear(); |
574 feature.timestamp = Vamp::RealTime(0, 0); | 567 feature.timestamp = Vamp::RealTime(0, 0); |
575 | 568 |
576 for (std::map<double, int>::iterator i = sorted.begin(); | 569 for (std::map<double, int>::iterator i = sorted.begin(); |
577 i != sorted.end(); ++i) { | 570 i != sorted.end(); ++i) { |
578 feature.values.push_back(i->second); | 571 feature.values.push_back(i->second + 1); |
579 } | 572 } |
580 | 573 |
581 returnFeatures[m_sortedVectorOutput].push_back(feature); | 574 returnFeatures[m_sortedVectorOutput].push_back(feature); |
582 | 575 |
583 feature.label = "Ordered distances of channels from first channel"; | 576 feature.label = "Ordered distances of channels from first channel"; |