Mercurial > hg > silvet
changeset 106:ac750e222ad3 bqvec-openmp
Merge OpenMP and bqvec stuff into bqvec-openmp branch
author | Chris Cannam |
---|---|
date | Tue, 06 May 2014 16:36:46 +0100 |
parents | e6b4235fa2ea (diff) bdc32f72c361 (current diff) |
children | 16d6e2f6f159 |
files | Makefile.linux src/Silvet.cpp testdata/timing/results.txt |
diffstat | 8 files changed, 233 insertions(+), 130 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile.inc Tue May 06 16:31:32 2014 +0100 +++ b/Makefile.inc Tue May 06 16:36:46 2014 +0100 @@ -3,6 +3,7 @@ QMDSP_DIR ?= ../qm-dsp VAMPSDK_DIR ?= ../vamp-plugin-sdk +BQVEC_DIR ?= ../bqvec CQ_DIR ?= constant-q-cpp/cpp-qm-dsp @@ -12,7 +13,7 @@ CC ?= gcc CFLAGS := $(CFLAGS) -CXXFLAGS := -I. -I$(VAMPSDK_DIR) -I$(QMDSP_DIR) $(CXXFLAGS) +CXXFLAGS := -I. -I$(VAMPSDK_DIR) -I$(QMDSP_DIR) -I$(BQVEC_DIR) $(CXXFLAGS) LDFLAGS := $(LDFLAGS) PLUGIN_LDFLAGS := $(LDFLAGS) $(PLUGIN_LDFLAGS)
--- a/Makefile.linux Tue May 06 16:31:32 2014 +0100 +++ b/Makefile.linux Tue May 06 16:36:46 2014 +0100 @@ -1,5 +1,5 @@ -CFLAGS := -Wall -O3 -ffast-math -fopenmp -msse -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/ +CFLAGS := -Wall -O3 -fopenmp -ffast-math -msse -mfpmath=sse -ftree-vectorize -fPIC -I../vamp-plugin-sdk/ #CFLAGS := -g -fPIC -I../vamp-plugin-sdk
--- a/data/include/templates.h Tue May 06 16:31:32 2014 +0100 +++ b/data/include/templates.h Tue May 06 16:36:46 2014 +0100 @@ -15,7 +15,7 @@ const char *name; int lowest; int highest; - float data[SILVET_TEMPLATE_NOTE_COUNT][SILVET_TEMPLATE_SIZE]; + double data[SILVET_TEMPLATE_NOTE_COUNT][SILVET_TEMPLATE_SIZE]; } silvet_template_t; static int silvet_templates_lowest_note = 15;
--- a/src/EM.cpp Tue May 06 16:31:32 2014 +0100 +++ b/src/EM.cpp Tue May 06 16:36:46 2014 +0100 @@ -22,52 +22,63 @@ #include <iostream> -#include <vector> +#include "VectorOps.h" +#include "Allocators.h" using std::vector; using std::cerr; using std::endl; +using namespace breakfastquay; + static double epsilon = 1e-16; EM::EM() : m_noteCount(SILVET_TEMPLATE_NOTE_COUNT), m_shiftCount(SILVET_TEMPLATE_MAX_SHIFT * 2 + 1), m_binCount(SILVET_TEMPLATE_HEIGHT), - m_instrumentCount(SILVET_TEMPLATE_COUNT), + m_sourceCount(SILVET_TEMPLATE_COUNT), m_pitchSparsity(1.1), - m_sourceSparsity(1.3) + m_sourceSparsity(1.3), + m_lowestPitch(silvet_templates_lowest_note), + m_highestPitch(silvet_templates_highest_note) { - m_lowestPitch = silvet_templates_lowest_note; - m_highestPitch = silvet_templates_highest_note; - - m_pitches = V(m_noteCount); + m_pitches = allocate<double>(m_noteCount); + m_updatePitches = allocate<double>(m_noteCount); for (int n = 0; n < m_noteCount; ++n) { m_pitches[n] = drand48(); } - m_shifts = Grid(m_shiftCount); + m_shifts = allocate_channels<double>(m_shiftCount, m_noteCount); + m_updateShifts = allocate_channels<double>(m_shiftCount, m_noteCount); for (int f = 0; f < m_shiftCount; ++f) { - m_shifts[f] = V(m_noteCount); for (int n = 0; n < m_noteCount; ++n) { m_shifts[f][n] = drand48(); } } - m_sources = Grid(m_instrumentCount); - for (int i = 0; i < m_instrumentCount; ++i) { - m_sources[i] = V(m_noteCount); + m_sources = allocate_channels<double>(m_sourceCount, m_noteCount); + m_updateSources = allocate_channels<double>(m_sourceCount, m_noteCount); + for (int i = 0; i < m_sourceCount; ++i) { for (int n = 0; n < m_noteCount; ++n) { m_sources[i][n] = (inRange(i, n) ? 1.0 : 0.0); } } - m_estimate = V(m_binCount); - m_q = V(m_binCount); + m_estimate = allocate<double>(m_binCount); + m_q = allocate<double>(m_binCount); } EM::~EM() { + deallocate(m_q); + deallocate(m_estimate); + deallocate_channels(m_sources, m_sourceCount); + deallocate_channels(m_updateSources, m_sourceCount); + deallocate_channels(m_shifts, m_shiftCount); + deallocate_channels(m_updateShifts, m_shiftCount); + deallocate(m_pitches); + deallocate(m_updatePitches); } void @@ -86,68 +97,63 @@ } void -EM::normaliseColumn(V &column) +EM::normaliseColumn(double *column, int size) { - double sum = 0.0; - for (int i = 0; i < (int)column.size(); ++i) { - sum += column[i]; - } - for (int i = 0; i < (int)column.size(); ++i) { - column[i] /= sum; - } + double sum = v_sum(column, size); + v_scale(column, 1.0 / sum, size); } void -EM::normaliseGrid(Grid &grid) +EM::normaliseGrid(double **grid, int size1, int size2) { - V denominators(grid[0].size()); + double *denominators = allocate_and_zero<double>(size2); - for (int i = 0; i < (int)grid.size(); ++i) { - for (int j = 0; j < (int)grid[i].size(); ++j) { + for (int i = 0; i < size1; ++i) { + for (int j = 0; j < size2; ++j) { denominators[j] += grid[i][j]; } } - for (int i = 0; i < (int)grid.size(); ++i) { - for (int j = 0; j < (int)grid[i].size(); ++j) { - grid[i][j] /= denominators[j]; - } + for (int i = 0; i < size1; ++i) { + v_divide(grid[i], denominators, size2); } + + deallocate(denominators); } void -EM::iterate(V column) +EM::iterate(const double *column) { - normaliseColumn(column); - expectation(column); - maximisation(column); + double *norm = allocate<double>(m_binCount); + v_copy(norm, column, m_binCount); + normaliseColumn(norm, m_binCount); + expectation(norm); + maximisation(norm); + deallocate(norm); } -const float * +const double * EM::templateFor(int instrument, int note, int shift) { return silvet_templates[instrument].data[note] + shift; } void -EM::expectation(const V &column) +EM::expectation(const double *column) { // cerr << "."; - for (int i = 0; i < m_binCount; ++i) { - m_estimate[i] = epsilon; - } + v_set(m_estimate, epsilon, m_binCount); - for (int i = 0; i < m_instrumentCount; ++i) { + for (int i = 0; i < m_sourceCount; ++i) { for (int n = 0; n < m_noteCount; ++n) { + const double pitch = m_pitches[n]; + const double source = m_sources[i][n]; for (int f = 0; f < m_shiftCount; ++f) { - const float *w = templateFor(i, n, f); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - double shift = m_shifts[f][n]; - for (int j = 0; j < m_binCount; ++j) { - m_estimate[j] += w[j] * pitch * source * shift; - } + const double *w = templateFor(i, n, f); + const double shift = m_shifts[f][n]; + const double factor = pitch * source * shift; + v_add_with_gain(m_estimate, w, m_binCount, factor); } } } @@ -158,75 +164,75 @@ } void -EM::maximisation(const V &column) +EM::maximisation(const double *column) { - V newPitches = m_pitches; + v_set(m_updatePitches, epsilon, m_noteCount); + for (int i = 0; i < m_shiftCount; ++i) { + v_set(m_updateShifts[i], epsilon, m_noteCount); + } + for (int i = 0; i < m_sourceCount; ++i) { + v_set(m_updateSources[i], epsilon, m_noteCount); + } + + double *contributions = allocate<double>(m_binCount); for (int n = 0; n < m_noteCount; ++n) { - newPitches[n] = epsilon; - if (n >= m_lowestPitch && n <= m_highestPitch) { - for (int i = 0; i < m_instrumentCount; ++i) { - for (int f = 0; f < m_shiftCount; ++f) { - const float *w = templateFor(i, n, f); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - double shift = m_shifts[f][n]; - for (int j = 0; j < m_binCount; ++j) { - newPitches[n] += w[j] * m_q[j] * pitch * source * shift; + + const double pitch = m_pitches[n]; + + for (int f = 0; f < m_shiftCount; ++f) { + + const double shift = m_shifts[f][n]; + + for (int i = 0; i < m_sourceCount; ++i) { + + const double source = m_sources[i][n]; + const double factor = pitch * source * shift; + const double *w = templateFor(i, n, f); + + v_copy(contributions, w, m_binCount); + v_multiply(contributions, m_q, m_binCount); + v_scale(contributions, factor, m_binCount); + + double total = v_sum(contributions, m_binCount); + + if (n >= m_lowestPitch && n <= m_highestPitch) { + + m_updatePitches[n] += total; + + if (inRange(i, n)) { + m_updateSources[i][n] += total; } } - } - } - if (m_pitchSparsity != 1.0) { - newPitches[n] = pow(newPitches[n], m_pitchSparsity); - } - } - normaliseColumn(newPitches); - Grid newShifts = m_shifts; - - for (int f = 0; f < m_shiftCount; ++f) { - for (int n = 0; n < m_noteCount; ++n) { - newShifts[f][n] = epsilon; - for (int i = 0; i < m_instrumentCount; ++i) { - const float *w = templateFor(i, n, f); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - double shift = m_shifts[f][n]; - for (int j = 0; j < m_binCount; ++j) { - newShifts[f][n] += w[j] * m_q[j] * pitch * source * shift; - } + m_updateShifts[f][n] += total; } } } - normaliseGrid(newShifts); - Grid newSources = m_sources; + if (m_pitchSparsity != 1.0) { + for (int n = 0; n < m_noteCount; ++n) { + m_updatePitches[n] = + pow(m_updatePitches[n], m_pitchSparsity); + } + } - for (int i = 0; i < m_instrumentCount; ++i) { + if (m_sourceSparsity != 1.0) { for (int n = 0; n < m_noteCount; ++n) { - newSources[i][n] = epsilon; - if (inRange(i, n)) { - for (int f = 0; f < m_shiftCount; ++f) { - const float *w = templateFor(i, n, f); - double pitch = m_pitches[n]; - double source = m_sources[i][n]; - double shift = m_shifts[f][n]; - for (int j = 0; j < m_binCount; ++j) { - newSources[i][n] += w[j] * m_q[j] * pitch * source * shift; - } - } - } - if (m_sourceSparsity != 1.0) { - newSources[i][n] = pow(newSources[i][n], m_sourceSparsity); + for (int i = 0; i < m_sourceCount; ++i) { + m_updateSources[i][n] = + pow(m_updateSources[i][n], m_sourceSparsity); } } } - normaliseGrid(newSources); - m_pitches = newPitches; - m_shifts = newShifts; - m_sources = newSources; + normaliseColumn(m_updatePitches, m_noteCount); + normaliseGrid(m_updateShifts, m_shiftCount, m_noteCount); + normaliseGrid(m_updateSources, m_sourceCount, m_noteCount); + + std::swap(m_pitches, m_updatePitches); + std::swap(m_shifts, m_updateShifts); + std::swap(m_sources, m_updateSources); }
--- a/src/EM.h Tue May 06 16:31:32 2014 +0100 +++ b/src/EM.h Tue May 06 16:36:46 2014 +0100 @@ -24,46 +24,52 @@ EM(); ~EM(); - void iterate(std::vector<double> column); + int getBinCount() const { return m_binCount; } // size of input column + int getNoteCount() const { return m_noteCount; } // size of pitch column + int getSourceCount() const { return m_sourceCount; } - const std::vector<double> &getEstimate() const { + void iterate(const double *column); + + const double *getEstimate() const { // bin count return m_estimate; } - const std::vector<double> &getPitchDistribution() const { + const double *getPitchDistribution() const { // note count return m_pitches; } - const std::vector<std::vector<double> > &getSources() const { + const double *const *getSources() const { // source count * note count return m_sources; } private: - typedef std::vector<double> V; - typedef std::vector<std::vector<double> > Grid; + double *m_pitches; + double **m_shifts; + double **m_sources; - V m_pitches; - Grid m_shifts; - Grid m_sources; + double *m_updatePitches; + double **m_updateShifts; + double **m_updateSources; - V m_estimate; - V m_q; + double *m_estimate; + double *m_q; - int m_noteCount; - int m_shiftCount; // 1 + 2 * max template shift - int m_binCount; - int m_instrumentCount; + const int m_noteCount; + const int m_shiftCount; // 1 + 2 * max template shift + const int m_binCount; + const int m_sourceCount; - double m_pitchSparsity; - double m_sourceSparsity; + const double m_pitchSparsity; + const double m_sourceSparsity; - int m_lowestPitch; - int m_highestPitch; + const int m_lowestPitch; + const int m_highestPitch; - void normaliseColumn(V &column); - void normaliseGrid(Grid &grid); - void expectation(const V &column); - void maximisation(const V &column); + void normaliseColumn(double *column, int size); + void normaliseGrid(double **grid, int size1, int size2); - const float *templateFor(int instrument, int note, int shift); + void expectation(const double *column); // size is m_binCount + void maximisation(const double *column); // size is m_binCount + + const double *templateFor(int instrument, int note, int shift); void rangeFor(int instrument, int &minPitch, int &maxPitch); bool inRange(int instrument, int pitch); };
--- a/src/Silvet.cpp Tue May 06 16:31:32 2014 +0100 +++ b/src/Silvet.cpp Tue May 06 16:36:46 2014 +0100 @@ -413,10 +413,10 @@ EM em; for (int j = 0; j < iterations; ++j) { - em.iterate(filtered[i + k]); + em.iterate(filtered[i + k].data()); } - vector<double> pitches = em.getPitchDistribution(); + const double *pitches = em.getPitchDistribution(); for (int j = 0; j < processingNotes; ++j) { pitchSubMatrix[k][j] = pitches[j] * sum;
--- a/testdata/timing/results.txt Tue May 06 16:31:32 2014 +0100 +++ b/testdata/timing/results.txt Tue May 06 16:36:46 2014 +0100 @@ -100,3 +100,93 @@ user 2m59.740s sys 0m0.237s + +EM TWEAKS: + +commit:a0dedcbfa628, as commit:ce64d11ef336 but with variables hoisted +out of loops and consts added wherever applicable + +real 1m44.548s +user 1m44.460s +sys 0m0.183s + +conclusion: compiler already knows this stuff + +commit:64b08cc12da0, as commit:ce64d11ef336 but with loops merged so +as theoretically to reduce intermediate calculations + +real 3m46.969s +user 3m46.850s +sys 0m0.220s + +commit:6075e92d63ab, as commit:64b08cc12da0 but with innermost loop +reverted to three loops with simple bodies instead of one with a more +complex body + +real 1m44.767s +user 1m44.490s +sys 0m0.190s + +commit:97b77e7cb94c, as commit:6075e92d63ab but with templates stored +as doubles instead of floats (doubling the size of the plugin binary) + +real 1m40.135s +user 1m39.820s +sys 0m0.230s + +commit:a6e136aaa202, as commit:97b77e7cb94c but with target vectors & +grids initialised to epsilon instead of copied & then overwritten +(this one also makes the intention clearer I think so is worth doing) + +real 1m39.277s +user 1m39.000s +sys 0m0.183s + + +BQVEC: + +commit:81eaba98985b, as commit:a6e136aaa202 but converted to use bqvec +for basic allocation etc; processing logic unchanged + +real 1m37.320s +user 1m36.863s +sys 0m0.240s + +commit:891cbcf1e4d2, as commit:81eaba98985b but with some calculations +vectorised [note: has silly bug] + +real 1m24.961s +user 1m24.663s +sys 0m0.177s + +commit:853b2d750688, as commit:891cbcf1e4d2 but with silly bug fixed + +real 1m26.876s +user 1m26.387s +sys 0m0.267s + +commit:9ecad4c9c2a2, as commit:853b2d750688 but using a couple of +bqvec calls in expectation function + +real 1m9.153s +user 1m8.837s +sys 0m0.187s + +(this seems unlikely -- what have I broken?) + +commit:8259193b3b16, as commit:9ecad4c9c2a2 but avoiding some +allocations + +real 1m10.631s +user 1m10.327s +sys 0m0.180s + +(still broken?) + +commit:19f6832fdc8a, as commit:9ecad4c9c2a2 but with the arguments to +v_add_with_gain supplied in the right order (that's what I'd broken!) + +real 1m28.957s +user 1m28.437s +sys 0m0.213s +
--- a/yeti/scratch/generateTemplatesC.yeti Tue May 06 16:31:32 2014 +0100 +++ b/yeti/scratch/generateTemplatesC.yeti Tue May 06 16:36:46 2014 +0100 @@ -106,7 +106,7 @@ " const char *name;", " int lowest;", " int highest;", - " float data[SILVET_TEMPLATE_NOTE_COUNT][SILVET_TEMPLATE_SIZE];", + " double data[SILVET_TEMPLATE_NOTE_COUNT][SILVET_TEMPLATE_SIZE];", "} silvet_template_t;", "", "static int silvet_templates_lowest_note = \(overallLowest);",