changeset 183:146eb9e35baa

* Improve output from Profiler class and make it incur less (no) overhead in release builds with NO_TIMING defined * Fix a lock contention issue in spectrogram * Marginal optimisations elsewhere
author Chris Cannam
date Tue, 10 Oct 2006 14:51:17 +0000
parents f75f8a1cd7b1
children 5a916fee6d2d
files base/Profiler.cpp base/Profiler.h base/RealTime.cpp base/RealTime.h base/ResizeableBitset.h data/fft/FFTDataServer.cpp data/fft/FFTDataServer.h data/fft/FFTFileCache.cpp data/fft/FFTFileCache.h data/model/FFTModel.cpp
diffstat 10 files changed, 222 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/base/Profiler.cpp	Mon Oct 09 10:49:46 2006 +0000
+++ b/base/Profiler.cpp	Tue Oct 10 14:51:17 2006 +0000
@@ -23,16 +23,8 @@
 
 #include <vector>
 #include <algorithm>
-
-//#define NO_TIMING 1
-
-#define WANT_TIMING 1
-
-#ifdef NDEBUG
-#ifndef WANT_TIMING
-#define NO_TIMING 1
-#endif
-#endif
+#include <set>
+#include <map>
 
 using std::cerr;
 using std::endl;
@@ -63,72 +55,133 @@
     pair.second.first += time;
     pair.second.second = pair.second.second + rt;
 
-    TimePair &timePair(m_lastCalls[id]);
-    timePair.first = time;
-    timePair.second = rt;
+    TimePair &lastPair(m_lastCalls[id]);
+    lastPair.first = time;
+    lastPair.second = rt;
+
+    TimePair &worstPair(m_worstCalls[id]);
+    if (time > worstPair.first) {
+        worstPair.first = time;
+    }
+    if (rt > worstPair.second) {
+        worstPair.second = rt;
+    }
 #endif
 }
 
-void Profiles::dump()
+void Profiles::dump() const
 {
-#ifndef NO_TIMING    
-    cerr << "Profiles::dump() :\n";
+#ifndef NO_TIMING
 
-    // I'm finding these two confusing dumped out in random order,
-    // so I'm going to sort them alphabetically:
+    fprintf(stderr, "Profiling points:\n");
 
-    std::vector<const char *> profileNames;
-    for (ProfileMap::iterator i = m_profiles.begin();
-	 i != m_profiles.end(); ++i) {
-	profileNames.push_back((*i).first);
+    fprintf(stderr, "\nBy name:\n");
+
+    typedef std::set<const char *, std::less<std::string> > StringSet;
+
+    StringSet profileNames;
+    for (ProfileMap::const_iterator i = m_profiles.begin();
+         i != m_profiles.end(); ++i) {
+        profileNames.insert(i->first);
     }
 
-    std::sort(profileNames.begin(), profileNames.end());
+    for (StringSet::const_iterator i = profileNames.begin();
+         i != profileNames.end(); ++i) {
 
-    for (std::vector<const char *>::iterator i = profileNames.begin();
-	 i != profileNames.end(); ++i) {
+        ProfileMap::const_iterator j = m_profiles.find(*i);
 
-        cerr << "-> " << *i << ":  CPU: " 
-	     << m_profiles[*i].first << " calls, "
-	     << int((m_profiles[*i].second.first * 1000.0) / CLOCKS_PER_SEC) << "ms, "
-	     << (((double)m_profiles[*i].second.first * 1000000.0 /
-		  (double)m_profiles[*i].first) / CLOCKS_PER_SEC) << "us/call"
-	     << endl;
+        if (j == m_profiles.end()) continue;
 
-        cerr << "-> " << *i << ": real: " 
-	     << m_profiles[*i].first << " calls, "
-	     << m_profiles[*i].second.second << ", "
-	     << (m_profiles[*i].second.second / m_profiles[*i].first)
-	     << "/call"
-	     << endl;
+        const ProfilePair &pp(j->second);
 
-	cerr << "-> " << *i << ": last:  CPU: "
-	     << int((m_lastCalls[*i].first * 1000.0) / CLOCKS_PER_SEC) << "ms, "
-	     << "   real: "
-	     << m_lastCalls[*i].second << endl;
+        fprintf(stderr, "%s(%d):\n", *i, pp.first);
+
+        fprintf(stderr, "\tCPU:  \t%.9g ms/call \t[%d ms total]\n",
+                (((double)pp.second.first * 1000.0 /
+		  (double)pp.first) / CLOCKS_PER_SEC),
+                int((pp.second.first * 1000.0) / CLOCKS_PER_SEC));
+
+        fprintf(stderr, "\tReal: \t%s ms      \t[%s ms total]\n",
+                ((pp.second.second / pp.first) * 1000).toString().c_str(),
+                (pp.second.second * 1000).toString().c_str());
+
+        WorstCallMap::const_iterator k = m_worstCalls.find(*i);
+        if (k == m_worstCalls.end()) continue;
+        
+        const TimePair &wc(k->second);
+
+        fprintf(stderr, "\tWorst:\t%s ms/call \t[%d ms CPU]\n",
+                (wc.second * 1000).toString().c_str(),
+                int((wc.first * 1000.0) / CLOCKS_PER_SEC));
     }
 
-    cerr << "Profiles::dump() finished\n";
+    typedef std::multimap<RealTime, const char *> TimeRMap;
+    typedef std::multimap<int, const char *> IntRMap;
+    
+    TimeRMap totmap, avgmap, worstmap;
+    IntRMap ncallmap;
+
+    for (ProfileMap::const_iterator i = m_profiles.begin();
+         i != m_profiles.end(); ++i) {
+        totmap.insert(TimeRMap::value_type(i->second.second.second, i->first));
+        avgmap.insert(TimeRMap::value_type(i->second.second.second /
+                                           i->second.first, i->first));
+        ncallmap.insert(IntRMap::value_type(i->second.first, i->first));
+    }
+
+    for (WorstCallMap::const_iterator i = m_worstCalls.begin();
+         i != m_worstCalls.end(); ++i) {
+        worstmap.insert(TimeRMap::value_type(i->second.second,
+                                             i->first));
+    }
+
+
+    fprintf(stderr, "\nBy total:\n");
+    for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) {
+        --i;
+        fprintf(stderr, "%-40s  %s ms\n", i->second,
+                (i->first * 1000).toString().c_str());
+    }
+
+    fprintf(stderr, "\nBy average:\n");
+    for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) {
+        --i;
+        fprintf(stderr, "%-40s  %s ms\n", i->second,
+                (i->first * 1000).toString().c_str());
+    }
+
+    fprintf(stderr, "\nBy worst case:\n");
+    for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) {
+        --i;
+        fprintf(stderr, "%-40s  %s ms\n", i->second,
+                (i->first * 1000).toString().c_str(), i->second);
+    }
+
+    fprintf(stderr, "\nBy number of calls:\n");
+    for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) {
+        --i;
+        fprintf(stderr, "%-40s  %d\n", i->second, i->first);
+    }
+
 #endif
 }
 
+#ifndef NO_TIMING    
+
 Profiler::Profiler(const char* c, bool showOnDestruct)
     : m_c(c),
       m_showOnDestruct(showOnDestruct)
 {
-#ifndef NO_TIMING
     m_startCPU = clock();
 
     struct timeval tv;
     (void)gettimeofday(&tv, 0);
     m_startTime = RealTime::fromTimeval(tv);
-#endif
 }
 
 void
-Profiler::update()
+Profiler::update() const
 {
-#ifndef NO_TIMING
     clock_t elapsedCPU = clock() - m_startCPU;
 
     struct timeval tv;
@@ -138,12 +191,10 @@
     cerr << "Profiler : id = " << m_c
 	 << " - elapsed so far = " << ((elapsedCPU * 1000) / CLOCKS_PER_SEC)
 	 << "ms CPU, " << elapsedTime << " real" << endl;
-#endif
 }    
 
 Profiler::~Profiler()
 {
-#ifndef NO_TIMING
     clock_t elapsedCPU = clock() - m_startCPU;
 
     struct timeval tv;
@@ -156,6 +207,7 @@
         cerr << "Profiler : id = " << m_c
              << " - elapsed = " << ((elapsedCPU * 1000) / CLOCKS_PER_SEC)
 	     << "ms CPU, " << elapsedTime << " real" << endl;
-#endif
 }
  
+#endif
+
--- a/base/Profiler.h	Mon Oct 09 10:49:46 2006 +0000
+++ b/base/Profiler.h	Tue Oct 10 14:51:17 2006 +0000
@@ -30,6 +30,15 @@
 
 #include "RealTime.h"
 
+//#define NO_TIMING 1
+
+#define WANT_TIMING 1
+
+#ifdef NDEBUG
+#ifndef WANT_TIMING
+#define NO_TIMING 1
+#endif
+#endif
 
 /**
  * Profiling classes
@@ -47,7 +56,7 @@
     ~Profiles();
 
     void accumulate(const char* id, clock_t time, RealTime rt);
-    void dump();
+    void dump() const;
 
 protected:
     Profiles();
@@ -56,19 +65,38 @@
     typedef std::pair<int, TimePair> ProfilePair;
     typedef std::map<const char *, ProfilePair> ProfileMap;
     typedef std::map<const char *, TimePair> LastCallMap;
+    typedef std::map<const char *, TimePair> WorstCallMap;
     ProfileMap m_profiles;
     LastCallMap m_lastCalls;
+    WorstCallMap m_worstCalls;
 
     static Profiles* m_instance;
 };
 
+#ifndef NO_TIMING
+
+/**
+ * Profile point instance class.  Construct one of these on the stack
+ * at the start of a function, in order to record the time consumed
+ * within that function.  The profiler object should be effectively
+ * optimised out if NO_TIMING is defined, so any overhead in a release
+ * build should be negligible so long as you remember to define that.
+ */
 class Profiler
 {
 public:
-    Profiler(const char*, bool showOnDestruct = false);
+    /**
+     * Create a profile point instance that records time consumed
+     * against the given profiling point name.  If showOnDestruct is
+     * true, the time consumed will be printed to stderr when the
+     * object is destroyed; otherwise, only the accumulated, mean and
+     * worst-case times will be shown when the program exits or
+     * Profiles::dump() is called.
+     */
+    Profiler(const char *name, bool showOnDestruct = false);
     ~Profiler();
 
-    void update();
+    void update() const;
 
 protected:
     const char* m_c;
@@ -76,6 +104,18 @@
     RealTime m_startTime;
     bool m_showOnDestruct;
 };
- 
+
+#else
+
+class Profiler
+{
+public:
+    Profiler(const char *, bool) { }
+    ~Profiler() { }
+    
+    void update() { }
+};
 
 #endif
+
+#endif
--- a/base/RealTime.cpp	Mon Oct 09 10:49:46 2006 +0000
+++ b/base/RealTime.cpp	Tue Oct 10 14:51:17 2006 +0000
@@ -173,6 +173,13 @@
     return s;
 }
 
+RealTime
+RealTime::operator*(int m) const
+{
+    double t = (double(nsec) / ONE_BILLION) * m;
+    t += sec * m;
+    return fromSeconds(t);
+}
 
 RealTime
 RealTime::operator/(int d) const
--- a/base/RealTime.h	Mon Oct 09 10:49:46 2006 +0000
+++ b/base/RealTime.h	Tue Oct 10 14:51:17 2006 +0000
@@ -92,6 +92,7 @@
         else return sec <= r.sec;
     }
 
+    RealTime operator*(int m) const;
     RealTime operator/(int d) const;
 
     // Find the fractional difference between times
--- a/base/ResizeableBitset.h	Mon Oct 09 10:49:46 2006 +0000
+++ b/base/ResizeableBitset.h	Tue Oct 10 14:51:17 2006 +0000
@@ -25,7 +25,7 @@
     ResizeableBitset() : m_bits(0) {
     }
     ResizeableBitset(size_t size) : m_bits(new std::vector<uint8_t>) {
-        m_bits->assign(size / 8 + 1, 0);
+        m_bits->assign((size >> 3) + 1, 0);
     }
     ResizeableBitset(const ResizeableBitset &b) {
         m_bits = new std::vector<uint8_t>(*b.m_bits);
@@ -45,19 +45,19 @@
             delete m_bits;
             m_bits = new std::vector<uint8_t>;
         }
-        m_bits->assign(bits / 8 + 1, 0);
+        m_bits->assign((bits >> 3) + 1, 0);
     }
     
     bool get(size_t column) const {
-        return ((*m_bits)[column / 8]) & (1u << (column % 8));
+        return ((*m_bits)[column >> 3]) & (1u << (column & 0x07));
     }
     
     void set(size_t column) {
-        ((*m_bits)[column / 8]) |=  (uint8_t(1) << (column % 8));
+        ((*m_bits)[column >> 3]) |=  (uint8_t(1) << (column & 0x07));
     }
 
     void reset(size_t column) {
-        ((*m_bits)[column / 8]) &= ~(uint8_t(1) << (column % 8));
+        ((*m_bits)[column >> 3]) &= ~(uint8_t(1) << (column & 0x07));
     }
 
     void copy(size_t source, size_t dest) {
--- a/data/fft/FFTDataServer.cpp	Mon Oct 09 10:49:46 2006 +0000
+++ b/data/fft/FFTDataServer.cpp	Tue Oct 10 14:51:17 2006 +0000
@@ -23,6 +23,7 @@
 #include "system/System.h"
 
 #include "base/StorageAdviser.h"
+#include "base/Profiler.h"
 
 
 #define DEBUG_FFT_SERVER 1
@@ -313,6 +314,9 @@
     m_windowIncrement(windowIncrement),
     m_fftSize(fftSize),
     m_polar(polar),
+    m_width(0),
+    m_height(0),
+    m_cacheWidth(0),
     m_memoryCache(false),
     m_compactCache(false),
     m_lastUsedCache(-1),
@@ -439,6 +443,8 @@
 #ifdef DEBUG_FFT_SERVER
     std::cerr << "FFTDataServer(" << this << "): suspend" << std::endl;
 #endif
+    Profiler profiler("FFTDataServer::suspend", false);
+
     QMutexLocker locker(&m_writeMutex);
     m_suspended = true;
     for (CacheVector::iterator i = m_caches.begin(); i != m_caches.end(); ++i) {
@@ -452,6 +458,8 @@
 #ifdef DEBUG_FFT_SERVER
     std::cerr << "FFTDataServer(" << this << "): suspendWrites" << std::endl;
 #endif
+    Profiler profiler("FFTDataServer::suspendWrites", false);
+
     m_suspended = true;
 }
 
@@ -461,6 +469,8 @@
 #ifdef DEBUG_FFT_SERVER
     std::cerr << "FFTDataServer(" << this << "): resume" << std::endl;
 #endif
+    Profiler profiler("FFTDataServer::resume", false);
+
     m_suspended = false;
     if (m_fillThread) {
         if (m_fillThread->isFinished()) {
@@ -476,6 +486,8 @@
 FFTCache *
 FFTDataServer::getCacheAux(size_t c)
 {
+    Profiler profiler("FFTDataServer::getCacheAux", false);
+
     QMutexLocker locker(&m_writeMutex);
 
     if (m_lastUsedCache == -1) {
@@ -557,10 +569,14 @@
 float
 FFTDataServer::getMagnitudeAt(size_t x, size_t y)
 {
+    Profiler profiler("FFTDataServer::getMagnitudeAt", false);
+
     size_t col;
     FFTCache *cache = getCache(x, col);
 
     if (!cache->haveSetColumnAt(col)) {
+        std::cerr << "FFTDataServer::getMagnitudeAt: calling fillColumn(" 
+                  << x << ")" << std::endl;
         fillColumn(x);
     }
     return cache->getMagnitudeAt(col, y);
@@ -569,6 +585,8 @@
 float
 FFTDataServer::getNormalizedMagnitudeAt(size_t x, size_t y)
 {
+    Profiler profiler("FFTDataServer::getNormalizedMagnitudeAt", false);
+
     size_t col;
     FFTCache *cache = getCache(x, col);
 
@@ -581,6 +599,8 @@
 float
 FFTDataServer::getMaximumMagnitudeAt(size_t x)
 {
+    Profiler profiler("FFTDataServer::getMaximumMagnitudeAt", false);
+
     size_t col;
     FFTCache *cache = getCache(x, col);
 
@@ -593,6 +613,8 @@
 float
 FFTDataServer::getPhaseAt(size_t x, size_t y)
 {
+    Profiler profiler("FFTDataServer::getPhaseAt", false);
+
     size_t col;
     FFTCache *cache = getCache(x, col);
 
@@ -605,6 +627,8 @@
 void
 FFTDataServer::getValuesAt(size_t x, size_t y, float &real, float &imaginary)
 {
+    Profiler profiler("FFTDataServer::getValuesAt", false);
+
     size_t col;
     FFTCache *cache = getCache(x, col);
 
@@ -623,9 +647,14 @@
 bool
 FFTDataServer::isColumnReady(size_t x)
 {
+    Profiler profiler("FFTDataServer::isColumnReady", false);
+
     if (!haveCache(x)) {
         if (m_lastUsedCache == -1) {
-            if (m_suspended) resume();
+            if (m_suspended) {
+                std::cerr << "FFTDataServer::isColumnReady(" << x << "): no cache, calling resume" << std::endl;
+                resume();
+            }
             m_fillThread->start();
         }
         return false;
@@ -640,6 +669,8 @@
 void
 FFTDataServer::fillColumn(size_t x)
 {
+    Profiler profiler("FFTDataServer::fillColumn", false);
+
     size_t col;
 #ifdef DEBUG_FFT_SERVER_FILL
     std::cout << "FFTDataServer::fillColumn(" << x << ")" << std::endl;
@@ -720,7 +751,10 @@
                        m_workbuffer + m_fftSize/2,
                        factor);
 
-    if (m_suspended) resume();
+    if (m_suspended) {
+//        std::cerr << "FFTDataServer::fillColumn(" << x << "): calling resume" << std::endl;
+//        resume();
+    }
 }    
 
 size_t
--- a/data/fft/FFTDataServer.h	Mon Oct 09 10:49:46 2006 +0000
+++ b/data/fft/FFTDataServer.h	Tue Oct 10 14:51:17 2006 +0000
@@ -124,6 +124,8 @@
     size_t m_width;
     size_t m_height;
     size_t m_cacheWidth;
+    size_t m_cacheWidthPower;
+    size_t m_cacheWidthMask;
     bool m_memoryCache;
     bool m_compactCache;
 
--- a/data/fft/FFTFileCache.cpp	Mon Oct 09 10:49:46 2006 +0000
+++ b/data/fft/FFTFileCache.cpp	Tue Oct 10 14:51:17 2006 +0000
@@ -89,6 +89,8 @@
 float
 FFTFileCache::getMagnitudeAt(size_t x, size_t y) const
 {
+    Profiler profiler("FFTFileCache::getMagnitudeAt", false);
+
     float value = 0.f;
 
     switch (m_storageType) {
@@ -294,3 +296,22 @@
         2 * sizeof(size_t); // matrix file header size
 }
 
+void
+FFTFileCache::populateReadBuf(size_t x) const
+{
+    Profiler profiler("FFTFileCache::populateReadBuf", false);
+
+    if (!m_readbuf) {
+        m_readbuf = new char[m_mfc->getHeight() * 2 * m_mfc->getCellSize()];
+    }
+    m_mfc->getColumnAt(x, m_readbuf);
+    if (m_mfc->haveSetColumnAt(x + 1)) {
+        m_mfc->getColumnAt
+            (x + 1, m_readbuf + m_mfc->getCellSize() * m_mfc->getHeight());
+        m_readbufWidth = 2;
+    } else {
+        m_readbufWidth = 1;
+    }
+    m_readbufCol = x;
+}
+
--- a/data/fft/FFTFileCache.h	Mon Oct 09 10:49:46 2006 +0000
+++ b/data/fft/FFTFileCache.h	Tue Oct 10 14:51:17 2006 +0000
@@ -94,20 +94,7 @@
         }
     }
 
-    void populateReadBuf(size_t x) const {
-        if (!m_readbuf) {
-            m_readbuf = new char[m_mfc->getHeight() * 2 * m_mfc->getCellSize()];
-        }
-        m_mfc->getColumnAt(x, m_readbuf);
-        if (m_mfc->haveSetColumnAt(x + 1)) {
-            m_mfc->getColumnAt
-                (x + 1, m_readbuf + m_mfc->getCellSize() * m_mfc->getHeight());
-            m_readbufWidth = 2;
-        } else {
-            m_readbufWidth = 1;
-        }
-        m_readbufCol = x;
-    }
+    void populateReadBuf(size_t x) const;
 
     float getNormalizationFactor(size_t col) const {
         if (m_storageType != Compact) {
--- a/data/model/FFTModel.cpp	Mon Oct 09 10:49:46 2006 +0000
+++ b/data/model/FFTModel.cpp	Tue Oct 10 14:51:17 2006 +0000
@@ -16,6 +16,8 @@
 #include "FFTModel.h"
 #include "DenseTimeValueModel.h"
 
+#include "base/Profiler.h"
+
 #include <cassert>
 
 FFTModel::FFTModel(const DenseTimeValueModel *model,
@@ -81,6 +83,8 @@
 void
 FFTModel::getColumn(size_t x, Column &result) const
 {
+    Profiler profiler("FFTModel::getColumn", false);
+
     result.clear();
     size_t height(getHeight());
     for (size_t y = 0; y < height; ++y) {