changeset 94:891cbcf1e4d2 bqvec

Vectorise some calculations
author Chris Cannam
date Tue, 06 May 2014 14:29:07 +0100
parents a062b79865d6
children 853b2d750688
files Makefile.linux src/EM.cpp
diffstat 2 files changed, 12 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.linux	Tue May 06 14:20:36 2014 +0100
+++ b/Makefile.linux	Tue May 06 14:29:07 2014 +0100
@@ -1,5 +1,5 @@
 
-CFLAGS := -Wall -O3 -ffast-math -msse -mfpmath=sse -ftree-vectorize -ftree-vectorizer-verbose=2 -fPIC -I../vamp-plugin-sdk/
+CFLAGS := -Wall -O3 -ffast-math -msse -mfpmath=sse -ftree-vectorize -ftree-vectorizer-verbose=1 -fPIC -I../vamp-plugin-sdk/
 #CFLAGS := -g -fPIC -I../vamp-plugin-sdk
 
 CXXFLAGS := $(CFLAGS)
--- a/src/EM.cpp	Tue May 06 14:20:36 2014 +0100
+++ b/src/EM.cpp	Tue May 06 14:29:07 2014 +0100
@@ -176,6 +176,8 @@
         v_set(newSources[i], epsilon, m_noteCount);
     }
 
+    double *contributions = allocate<double>(m_binCount);
+
     for (int n = 0; n < m_noteCount; ++n) {
 
         const double pitch = m_pitches[n];
@@ -190,22 +192,22 @@
                 const double factor = pitch * source * shift;
                 const double *w = templateFor(i, n, f);
 
+                v_copy(contributions, w, m_binCount);
+                v_add(contributions, m_q, m_binCount);
+                v_scale(contributions, factor, m_binCount);
+
+                double total = v_sum(contributions, m_binCount);
+
                 if (n >= m_lowestPitch && n <= m_highestPitch) {
 
-                    for (int j = 0; j < m_binCount; ++j) {
-                        newPitches[n] += w[j] * m_q[j] * factor;
-                    }
+                    newPitches[n] += total;
 
                     if (inRange(i, n)) {
-                        for (int j = 0; j < m_binCount; ++j) {
-                            newSources[i][n] += w[j] * m_q[j] * factor;
-                        }
+                        newSources[i][n] += total;
                     }
                 }
 
-                for (int j = 0; j < m_binCount; ++j) {
-                    newShifts[f][n] += w[j] * m_q[j] * factor;
-                }
+                newShifts[f][n] += total;
             }
         }
     }