Mercurial > hg > svcore
diff base/LogRange.cpp @ 478:1405f4a2caf3
* Add use-log-scale estimator. Not quite right yet; the model doesn't
actually have any data in it yet at the point where we want to make
this decision
* Update changelog
author | Chris Cannam |
---|---|
date | Tue, 11 Nov 2008 13:54:47 +0000 |
parents | 9525c9d7e54d |
children | bdc9bb371a9f |
line wrap: on
line diff
--- a/base/LogRange.cpp Tue Nov 11 09:41:45 2008 +0000 +++ b/base/LogRange.cpp Tue Nov 11 13:54:47 2008 +0000 @@ -72,3 +72,45 @@ { return powf(10.0, value); } + +static float +sd(const std::vector<float> &values, size_t start, size_t n) +{ + float sum = 0.f, mean = 0.f, variance = 0.f; + for (size_t i = 0; i < n; ++i) { + sum += values[start + i]; + } + mean = sum / n; + for (size_t i = 0; i < n; ++i) { + float diff = values[start + i] - mean; + variance += diff * diff; + } + variance = variance / n; + return sqrtf(variance); +} + +bool +LogRange::useLogScale(std::vector<float> values) +{ + // Principle: Partition the data into two sets around the median; + // calculate the standard deviation of each set; if the two SDs + // are very different, it's likely that a log scale would be good. + + if (values.size() < 4) return false; + std::sort(values.begin(), values.end()); + size_t mi = values.size() / 2; + + float sd0 = sd(values, 0, mi); + float sd1 = sd(values, mi, values.size() - mi); + + std::cerr << "LogRange::useLogScale: sd0 = " + << sd0 << ", sd1 = " << sd1 << std::endl; + + if (sd0 == 0 || sd1 == 0) return false; + + // I wonder what method of determining "one sd much bigger than + // the other" would be appropriate here... + if (std::max(sd0, sd1) / std::min(sd0, sd1) > 10.f) return true; + else return false; +} +