# HG changeset patch # User Chris Cannam # Date 1200506605 0 # Node ID 1389f05cb688ff04dcddf99f9ce5186385c887c6 # Parent 0f85778f1b5377106ac8dc5f58f819a97ff45d8f * Various fixes diff -r 0f85778f1b53 -r 1389f05cb688 plugins/SimilarityPlugin.cpp --- a/plugins/SimilarityPlugin.cpp Mon Jan 14 18:14:55 2008 +0000 +++ b/plugins/SimilarityPlugin.cpp Wed Jan 16 18:03:25 2008 +0000 @@ -81,7 +81,7 @@ size_t SimilarityPlugin::getMinChannelCount() const { - return 2; + return 1; } size_t @@ -97,6 +97,8 @@ channels > getMaxChannelCount()) return false; if (stepSize != getPreferredStepSize()) { + //!!! actually this perhaps shouldn't be an error... similarly + //using more than getMaxChannelCount channels std::cerr << "SimilarityPlugin::initialise: supplied step size " << stepSize << " differs from required step size " << getPreferredStepSize() << std::endl; @@ -130,6 +132,8 @@ m_mfcc = new MFCC(config); m_fftSize = m_mfcc->getfftlength(); + std::cerr << "MFCC FS = " << config.FS << ", FFT size = " << m_fftSize<< std::endl; + } else if (m_type == TypeChroma) { m_featureColumnSize = 12; @@ -183,7 +187,13 @@ SimilarityPlugin::getPreferredStepSize() const { if (m_blockSize == 0) calculateBlockSize(); - return m_blockSize/2; + if (m_type == TypeChroma) { + return m_blockSize/2; + } else { + // for compatibility with old-skool Soundbite, which doesn't + // overlap blocks on input + return m_blockSize; + } } size_t @@ -272,9 +282,9 @@ OutputList list; OutputDescriptor similarity; - similarity.identifier = "distance"; - similarity.name = "Distance"; - similarity.description = "Distance Metric for Similarity (smaller = more similar)"; + similarity.identifier = "distancematrix"; + similarity.name = "Distance Matrix"; + similarity.description = "Distance matrix for similarity metric. Smaller = more similar. Should be symmetrical."; similarity.unit = ""; similarity.hasFixedBinCount = true; similarity.binCount = m_channels; @@ -283,34 +293,52 @@ similarity.sampleType = OutputDescriptor::FixedSampleRate; similarity.sampleRate = 1; + m_distanceMatrixOutput = list.size(); list.push_back(similarity); + OutputDescriptor simvec; + simvec.identifier = "distancevector"; + simvec.name = "Distance from First Channel"; + simvec.description = "Distance vector for similarity of each channel to the first channel. Smaller = more similar."; + simvec.unit = ""; + simvec.hasFixedBinCount = true; + simvec.binCount = m_channels; + simvec.hasKnownExtents = false; + simvec.isQuantized = false; + simvec.sampleType = OutputDescriptor::FixedSampleRate; + simvec.sampleRate = 1; + + m_distanceVectorOutput = list.size(); + list.push_back(simvec); + OutputDescriptor means; means.identifier = "means"; means.name = "Feature Means"; - means.description = ""; + means.description = "Means of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type."; means.unit = ""; means.hasFixedBinCount = true; - means.binCount = m_channels; + means.binCount = m_featureColumnSize; means.hasKnownExtents = false; means.isQuantized = false; - means.sampleType = OutputDescriptor::VariableSampleRate; - means.sampleRate = m_inputSampleRate / getPreferredStepSize(); + means.sampleType = OutputDescriptor::FixedSampleRate; + means.sampleRate = 1; + m_meansOutput = list.size(); list.push_back(means); OutputDescriptor variances; variances.identifier = "variances"; variances.name = "Feature Variances"; - variances.description = ""; + variances.description = "Variances of the feature bins. Feature time (sec) corresponds to input channel. Number of bins depends on selected feature type."; variances.unit = ""; variances.hasFixedBinCount = true; - variances.binCount = m_channels; + variances.binCount = m_featureColumnSize; variances.hasKnownExtents = false; variances.isQuantized = false; - variances.sampleType = OutputDescriptor::VariableSampleRate; - variances.sampleRate = m_inputSampleRate / getPreferredStepSize(); + variances.sampleType = OutputDescriptor::FixedSampleRate; + variances.sampleRate = 1; + m_variancesOutput = list.size(); list.push_back(variances); return list; @@ -331,12 +359,20 @@ ownRaw = true; } + float threshold = 1e-10; + for (size_t c = 0; c < m_channels; ++c) { + bool empty = true; + for (int i = 0; i < m_blockSize; ++i) { - dblbuf[i] = inputBuffers[c][i]; + float val = inputBuffers[c][i]; + if (fabs(val) > threshold) empty = false; + dblbuf[i] = val; } + if (empty) continue; + if (m_decimator) { m_decimator->process(dblbuf, decbuf); } @@ -373,13 +409,21 @@ for (int j = 0; j < m_featureColumnSize; ++j) { - mean[j] = variance[j] = 0.0; + mean[j] = 0.0; + variance[j] = 0.0; int count; -// std::cout << i << "," << j << ":" << std::endl; + // we need to use at least one value, but we want to + // disregard the final value because it may have come from + // incomplete data + + int sz = m_values[i].size(); + if (sz > 1) --sz; + +// std::cout << "\nBin " << j << ":" << std::endl; count = 0; - for (int k = 0; k < m_values[i].size(); ++k) { + for (int k = 0; k < sz; ++k) { double val = m_values[i][k][j]; // std::cout << val << " "; if (isnan(val) || isinf(val)) continue; @@ -387,11 +431,10 @@ ++count; } if (count > 0) mean[j] /= count; - -// std::cout << std::endl; +// std::cout << "\n" << count << " non-NaN non-inf values, so mean = " << mean[j] << std::endl; count = 0; - for (int k = 0; k < m_values[i].size(); ++k) { + for (int k = 0; k < sz; ++k) { double val = ((m_values[i][k][j] - mean[j]) * (m_values[i][k][j] - mean[j])); if (isnan(val) || isinf(val)) continue; @@ -399,6 +442,7 @@ ++count; } if (count > 0) variance[j] /= count; +// std::cout << "... and variance = " << variance[j] << std::endl; } m[i] = mean; @@ -442,6 +486,9 @@ FeatureSet returnFeatures; + Feature distanceVectorFeature; + distanceVectorFeature.label = "Distance from first channel"; + for (int i = 0; i < m_channels; ++i) { Feature feature; @@ -453,25 +500,30 @@ feature.values.push_back(m[i][k]); } - returnFeatures[1].push_back(feature); + returnFeatures[m_meansOutput].push_back(feature); feature.values.clear(); for (int k = 0; k < m_featureColumnSize; ++k) { feature.values.push_back(v[i][k]); } - returnFeatures[2].push_back(feature); + returnFeatures[m_variancesOutput].push_back(feature); feature.values.clear(); for (int j = 0; j < m_channels; ++j) { feature.values.push_back(distances[i][j]); } + ostringstream oss; oss << "Distance from " << (i + 1); feature.label = oss.str(); - returnFeatures[0].push_back(feature); + returnFeatures[m_distanceMatrixOutput].push_back(feature); + + distanceVectorFeature.values.push_back(distances[0][i]); } + returnFeatures[m_distanceVectorOutput].push_back(distanceVectorFeature); + return returnFeatures; } diff -r 0f85778f1b53 -r 1389f05cb688 plugins/SimilarityPlugin.h --- a/plugins/SimilarityPlugin.h Mon Jan 14 18:14:55 2008 +0000 +++ b/plugins/SimilarityPlugin.h Wed Jan 16 18:03:25 2008 +0000 @@ -69,6 +69,11 @@ size_t m_fftSize; int m_channels; + mutable int m_distanceMatrixOutput; + mutable int m_distanceVectorOutput; + mutable int m_meansOutput; + mutable int m_variancesOutput; + typedef std::vector FeatureColumn; typedef std::vector FeatureMatrix; typedef std::vector FeatureMatrixSet; diff -r 0f85778f1b53 -r 1389f05cb688 qm-vamp-plugins.pro --- a/qm-vamp-plugins.pro Mon Jan 14 18:14:55 2008 +0000 +++ b/qm-vamp-plugins.pro Wed Jan 16 18:03:25 2008 +0000 @@ -4,7 +4,7 @@ CONFIG += plugin warn_on release CONFIG -= qt -linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O2 -march=pentium3 -msse +linux-g++:QMAKE_CXXFLAGS_RELEASE += -DNDEBUG -O3 -march=pentium4 -msse -msse2 OBJECTS_DIR = tmp_obj MOC_DIR = tmp_moc