diff base/LogRange.cpp @ 478:1405f4a2caf3

* Add use-log-scale estimator. Not quite right yet; the model doesn't actually have any data in it yet at the point where we want to make this decision * Update changelog
author Chris Cannam
date Tue, 11 Nov 2008 13:54:47 +0000
parents 9525c9d7e54d
children bdc9bb371a9f
line wrap: on
line diff
--- a/base/LogRange.cpp	Tue Nov 11 09:41:45 2008 +0000
+++ b/base/LogRange.cpp	Tue Nov 11 13:54:47 2008 +0000
@@ -72,3 +72,45 @@
 {
     return powf(10.0, value);
 }
+
+static float
+sd(const std::vector<float> &values, size_t start, size_t n)
+{
+    float sum = 0.f, mean = 0.f, variance = 0.f;
+    for (size_t i = 0; i < n; ++i) {
+        sum += values[start + i];
+    }
+    mean = sum / n;
+    for (size_t i = 0; i < n; ++i) {
+        float diff = values[start + i] - mean;
+        variance += diff * diff;
+    }
+    variance = variance / n;
+    return sqrtf(variance);
+}
+
+bool
+LogRange::useLogScale(std::vector<float> values)
+{
+    // Principle: Partition the data into two sets around the median;
+    // calculate the standard deviation of each set; if the two SDs
+    // are very different, it's likely that a log scale would be good.
+
+    if (values.size() < 4) return false;
+    std::sort(values.begin(), values.end());
+    size_t mi = values.size() / 2;
+
+    float sd0 = sd(values, 0, mi);
+    float sd1 = sd(values, mi, values.size() - mi);
+
+    std::cerr << "LogRange::useLogScale: sd0 = "
+              << sd0 << ", sd1 = " << sd1 << std::endl;
+
+    if (sd0 == 0 || sd1 == 0) return false;
+
+    // I wonder what method of determining "one sd much bigger than
+    // the other" would be appropriate here...
+    if (std::max(sd0, sd1) / std::min(sd0, sd1) > 10.f) return true;
+    else return false;
+}
+