changeset 478:1405f4a2caf3

* Add use-log-scale estimator. Not quite right yet; the model doesn't actually have any data in it yet at the point where we want to make this decision * Update changelog
author Chris Cannam
date Tue, 11 Nov 2008 13:54:47 +0000 (2008-11-11)
parents e0784311a103
children f933062a7f80
files base/LogRange.cpp base/LogRange.h data/model/DenseThreeDimensionalModel.h data/model/EditableDenseThreeDimensionalModel.cpp data/model/EditableDenseThreeDimensionalModel.h data/model/FFTModel.h
diffstat 6 files changed, 95 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/base/LogRange.cpp	Tue Nov 11 09:41:45 2008 +0000
+++ b/base/LogRange.cpp	Tue Nov 11 13:54:47 2008 +0000
@@ -72,3 +72,45 @@
 {
     return powf(10.0, value);
 }
+
+static float
+sd(const std::vector<float> &values, size_t start, size_t n)
+{
+    float sum = 0.f, mean = 0.f, variance = 0.f;
+    for (size_t i = 0; i < n; ++i) {
+        sum += values[start + i];
+    }
+    mean = sum / n;
+    for (size_t i = 0; i < n; ++i) {
+        float diff = values[start + i] - mean;
+        variance += diff * diff;
+    }
+    variance = variance / n;
+    return sqrtf(variance);
+}
+
+bool
+LogRange::useLogScale(std::vector<float> values)
+{
+    // Principle: Partition the data into two sets around the median;
+    // calculate the standard deviation of each set; if the two SDs
+    // are very different, it's likely that a log scale would be good.
+
+    if (values.size() < 4) return false;
+    std::sort(values.begin(), values.end());
+    size_t mi = values.size() / 2;
+
+    float sd0 = sd(values, 0, mi);
+    float sd1 = sd(values, mi, values.size() - mi);
+
+    std::cerr << "LogRange::useLogScale: sd0 = "
+              << sd0 << ", sd1 = " << sd1 << std::endl;
+
+    if (sd0 == 0 || sd1 == 0) return false;
+
+    // I wonder what method of determining "one sd much bigger than
+    // the other" would be appropriate here...
+    if (std::max(sd0, sd1) / std::min(sd0, sd1) > 10.f) return true;
+    else return false;
+}
+    
--- a/base/LogRange.h	Tue Nov 11 09:41:45 2008 +0000
+++ b/base/LogRange.h	Tue Nov 11 13:54:47 2008 +0000
@@ -16,6 +16,8 @@
 #ifndef _LOG_RANGE_H_
 #define _LOG_RANGE_H_
 
+#include <vector>
+
 class LogRange
 {
 public:
@@ -39,6 +41,14 @@
      * means taking the value'th power of ten.
      */
     static float unmap(float value);
+
+    /**
+     * Estimate whether a set of values would be more properly shown
+     * using a logarithmic than a linear scale.  This is only ever
+     * going to be a rough guess.
+     */
+    static bool useLogScale(std::vector<float> values);
+
 };
 
 #endif
--- a/data/model/DenseThreeDimensionalModel.h	Tue Nov 11 09:41:45 2008 +0000
+++ b/data/model/DenseThreeDimensionalModel.h	Tue Nov 11 13:54:47 2008 +0000
@@ -80,6 +80,12 @@
     virtual QString getBinName(size_t n) const = 0;
 
     /**
+     * Estimate whether a logarithmic scale might be appropriate for
+     * the value scale.
+     */
+    virtual bool shouldUseLogValueScale() const = 0;
+
+    /**
      * Utility function to query whether a given bin is greater than
      * its (vertical) neighbours.
      */
--- a/data/model/EditableDenseThreeDimensionalModel.cpp	Tue Nov 11 09:41:45 2008 +0000
+++ b/data/model/EditableDenseThreeDimensionalModel.cpp	Tue Nov 11 13:54:47 2008 +0000
@@ -15,8 +15,9 @@
 
 #include "EditableDenseThreeDimensionalModel.h"
 
+#include "base/LogRange.h"
+
 #include <QTextStream>
-
 #include <QStringList>
 
 #include <iostream>
@@ -247,6 +248,35 @@
     emit modelChanged();
 }
 
+bool
+EditableDenseThreeDimensionalModel::shouldUseLogValueScale() const
+{
+    std::vector<float> sample;
+    std::vector<int> n;
+    
+    for (int i = 0; i < 10; ++i) {
+        size_t index = i * 10;
+        if (index < m_data.size()) {
+            const Column &c = m_data[index];
+            while (c.size() > sample.size()) {
+                sample.push_back(0.f);
+                n.push_back(0);
+            }
+            for (int j = 0; j < c.size(); ++j) {
+                sample[j] += c[j];
+                ++n[j];
+            }
+        }
+    }
+
+    if (sample.empty()) return false;
+    for (int j = 0; j < sample.size(); ++j) {
+        if (n[j]) sample[j] /= n[j];
+    }
+    
+    return LogRange::useLogScale(sample);
+}
+
 void
 EditableDenseThreeDimensionalModel::setCompletion(int completion, bool update)
 {
--- a/data/model/EditableDenseThreeDimensionalModel.h	Tue Nov 11 09:41:45 2008 +0000
+++ b/data/model/EditableDenseThreeDimensionalModel.h	Tue Nov 11 13:54:47 2008 +0000
@@ -106,6 +106,8 @@
     virtual void setBinName(size_t n, QString);
     virtual void setBinNames(std::vector<QString> names);
 
+    bool shouldUseLogValueScale() const;
+
     virtual void setCompletion(int completion, bool update = true);
     virtual int getCompletion() const { return m_completion; }
 
--- a/data/model/FFTModel.h	Tue Nov 11 09:41:45 2008 +0000
+++ b/data/model/FFTModel.h	Tue Nov 11 13:54:47 2008 +0000
@@ -139,6 +139,10 @@
     virtual void getColumn(size_t x, Column &result) const;
     virtual QString getBinName(size_t n) const;
 
+    virtual bool shouldUseLogValueScale() const {
+        return true; // Although obviously it's up to the user...
+    }
+
     /**
      * Calculate an estimated frequency for a stable signal in this
      * bin, using phase unwrapping.  This will be completely wrong if