# HG changeset patch # User Chris Cannam # Date 1399390606 -3600 # Node ID ac750e222ad35d2b52e8d10e1350f335046fecda # Parent e6b4235fa2ea1bea3eba1f7ccc2c7b44f0d1ff5c# Parent bdc32f72c361cd8a30a66fb5813748b76cdd9d66 Merge OpenMP and bqvec stuff into bqvec-openmp branch diff -r e6b4235fa2ea -r ac750e222ad3 Makefile.linux --- a/Makefile.linux Tue May 06 16:05:05 2014 +0100 +++ b/Makefile.linux Tue May 06 16:36:46 2014 +0100 @@ -1,11 +1,12 @@ -CFLAGS := -Wall -O3 -ffast-math -msse -mfpmath=sse -ftree-vectorize -ftree-vectorizer-verbose=1 -fPIC -I../vamp-plugin-sdk/ +CFLAGS := -Wall -O3 -fopenmp -ffast-math -msse -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/ + #CFLAGS := -g -fPIC -I../vamp-plugin-sdk CXXFLAGS := $(CFLAGS) VAMPSDK_DIR := ../vamp-plugin-sdk -PLUGIN_LDFLAGS := -shared -Wl,--version-script=vamp-plugin.map +PLUGIN_LDFLAGS := -lgomp -shared -Wl,--version-script=vamp-plugin.map PLUGIN_EXT := .so diff -r e6b4235fa2ea -r ac750e222ad3 src/Silvet.cpp --- a/src/Silvet.cpp Tue May 06 16:05:05 2014 +0100 +++ b/src/Silvet.cpp Tue May 06 16:36:46 2014 +0100 @@ -375,6 +375,8 @@ FeatureSet fs; + if (filtered.empty()) return fs; + for (int i = 0; i < (int)filtered.size(); ++i) { Feature f; for (int j = 0; j < processingHeight; ++j) { @@ -387,38 +389,56 @@ int iterations = 12; - for (int i = 0; i < width; ++i) { + int stride = 8; - double sum = 0.0; - for (int j = 0; j < processingHeight; ++j) { - sum += filtered[i][j]; + for (int i = 0; i < width; i += stride) { + + int chunk = stride; + if (i + chunk > width) { + chunk = width - i; } - if (sum < 1e-5) continue; + vector > pitchSubMatrix + (chunk, vector(processingNotes)); - EM em; - for (int j = 0; j < iterations; ++j) { - em.iterate(filtered[i].data()); +#pragma omp parallel for + for (int k = 0; k < chunk; ++k) { + + double sum = 0.0; + for (int j = 0; j < processingHeight; ++j) { + sum += filtered[i + k][j]; + } + + if (sum < 1e-5) continue; + + EM em; + for (int j = 0; j < iterations; ++j) { + em.iterate(filtered[i + k].data()); + } + + const double *pitches = em.getPitchDistribution(); + + for (int j = 0; j < processingNotes; ++j) { + pitchSubMatrix[k][j] = pitches[j] * sum; + } } + + for (int k = 0; k < chunk; ++k) { - const double *pd = em.getPitchDistribution(); - vector pitches(pd, pd + processingNotes); - - for (int j = 0; j < processingNotes; ++j) { - pitches[j] *= sum; - } + const vector &pitches = pitchSubMatrix[k]; - Feature f; - for (int j = 0; j < processingNotes; ++j) { - f.values.push_back(float(pitches[j])); - } - fs[m_pitchOutputNo].push_back(f); + Feature f; + for (int j = 0; j < processingNotes; ++j) { + f.values.push_back(float(pitches[j])); + } + fs[m_pitchOutputNo].push_back(f); - FeatureList noteFeatures = postProcess(pitches); + FeatureList noteFeatures = postProcess(pitches); - for (FeatureList::const_iterator fi = noteFeatures.begin(); - fi != noteFeatures.end(); ++fi) { - fs[m_notesOutputNo].push_back(*fi); + for (FeatureList::const_iterator fi = noteFeatures.begin(); + fi != noteFeatures.end(); ++fi) { + fs[m_notesOutputNo].push_back(*fi); + } } } diff -r e6b4235fa2ea -r ac750e222ad3 testdata/timing/results.txt --- a/testdata/timing/results.txt Tue May 06 16:05:05 2014 +0100 +++ b/testdata/timing/results.txt Tue May 06 16:36:46 2014 +0100 @@ -91,6 +91,16 @@ conclusion: supports the previous test +OPENMP: + +commit:62b7be1226d5, as commit:ce64d11ef336 but with OpenMP parallel +"for" in the main EM iteration loop (4 cores) + +real 0m56.400s +user 2m59.740s +sys 0m0.237s + + EM TWEAKS: commit:a0dedcbfa628, as commit:ce64d11ef336 but with variables hoisted