changeset 106:ac750e222ad3 bqvec-openmp

Merge OpenMP and bqvec stuff into bqvec-openmp branch
author Chris Cannam
date Tue, 06 May 2014 16:36:46 +0100
parents e6b4235fa2ea (current diff) bdc32f72c361 (diff)
children 16d6e2f6f159
files Makefile.linux src/Silvet.cpp testdata/timing/results.txt
diffstat 3 files changed, 56 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.linux	Tue May 06 16:05:05 2014 +0100
+++ b/Makefile.linux	Tue May 06 16:36:46 2014 +0100
@@ -1,11 +1,12 @@
 
-CFLAGS := -Wall -O3 -ffast-math -msse -mfpmath=sse -ftree-vectorize -ftree-vectorizer-verbose=1 -fPIC -I../vamp-plugin-sdk/
+CFLAGS := -Wall -O3 -fopenmp -ffast-math -msse -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/
+
 #CFLAGS := -g -fPIC -I../vamp-plugin-sdk
 
 CXXFLAGS := $(CFLAGS)
 
 VAMPSDK_DIR := ../vamp-plugin-sdk
-PLUGIN_LDFLAGS := -shared -Wl,--version-script=vamp-plugin.map
+PLUGIN_LDFLAGS := -lgomp -shared -Wl,--version-script=vamp-plugin.map
 
 PLUGIN_EXT := .so
 
--- a/src/Silvet.cpp	Tue May 06 16:05:05 2014 +0100
+++ b/src/Silvet.cpp	Tue May 06 16:36:46 2014 +0100
@@ -375,6 +375,8 @@
 
     FeatureSet fs;
 
+    if (filtered.empty()) return fs;
+
     for (int i = 0; i < (int)filtered.size(); ++i) {
         Feature f;
         for (int j = 0; j < processingHeight; ++j) {
@@ -387,38 +389,56 @@
 
     int iterations = 12;
 
-    for (int i = 0; i < width; ++i) {
+    int stride = 8;
 
-        double sum = 0.0;
-        for (int j = 0; j < processingHeight; ++j) {
-            sum += filtered[i][j];
+    for (int i = 0; i < width; i += stride) {
+
+        int chunk = stride;
+        if (i + chunk > width) {
+            chunk = width - i;
         }
 
-        if (sum < 1e-5) continue;
+        vector<vector<double> > pitchSubMatrix
+            (chunk, vector<double>(processingNotes));
 
-        EM em;
-        for (int j = 0; j < iterations; ++j) {
-            em.iterate(filtered[i].data());
+#pragma omp parallel for
+        for (int k = 0; k < chunk; ++k) {
+
+            double sum = 0.0;
+            for (int j = 0; j < processingHeight; ++j) {
+                sum += filtered[i + k][j];
+            }
+
+            if (sum < 1e-5) continue;
+
+            EM em;
+            for (int j = 0; j < iterations; ++j) {
+                em.iterate(filtered[i + k].data());
+            }
+
+            const double *pitches = em.getPitchDistribution();
+        
+            for (int j = 0; j < processingNotes; ++j) {
+                pitchSubMatrix[k][j] = pitches[j] * sum;
+            }
         }
+        
+        for (int k = 0; k < chunk; ++k) {
 
-        const double *pd = em.getPitchDistribution();
-        vector<double> pitches(pd, pd + processingNotes);
-        
-        for (int j = 0; j < processingNotes; ++j) {
-            pitches[j] *= sum;
-        }
+            const vector<double> &pitches = pitchSubMatrix[k];
 
-        Feature f;
-        for (int j = 0; j < processingNotes; ++j) {
-            f.values.push_back(float(pitches[j]));
-        }
-        fs[m_pitchOutputNo].push_back(f);
+            Feature f;
+            for (int j = 0; j < processingNotes; ++j) {
+                f.values.push_back(float(pitches[j]));
+            }
+            fs[m_pitchOutputNo].push_back(f);
 
-        FeatureList noteFeatures = postProcess(pitches);
+            FeatureList noteFeatures = postProcess(pitches);
 
-        for (FeatureList::const_iterator fi = noteFeatures.begin();
-             fi != noteFeatures.end(); ++fi) {
-            fs[m_notesOutputNo].push_back(*fi);
+            for (FeatureList::const_iterator fi = noteFeatures.begin();
+                 fi != noteFeatures.end(); ++fi) {
+                fs[m_notesOutputNo].push_back(*fi);
+            }
         }
     }
 
--- a/testdata/timing/results.txt	Tue May 06 16:05:05 2014 +0100
+++ b/testdata/timing/results.txt	Tue May 06 16:36:46 2014 +0100
@@ -91,6 +91,16 @@
 conclusion: supports the previous test
 
 
+OPENMP:
+
+commit:62b7be1226d5, as commit:ce64d11ef336 but with OpenMP parallel
+"for" in the main EM iteration loop (4 cores)
+
+real	0m56.400s
+user	2m59.740s
+sys	0m0.237s
+
+
 EM TWEAKS:
 
 commit:a0dedcbfa628, as commit:ce64d11ef336 but with variables hoisted