# HG changeset patch # User Chris Cannam # Date 1226411687 0 # Node ID 1405f4a2caf32763144bc484140dbddcb85ab87b # Parent e0784311a1037f0f741a6d50463c496fdc7ee183 * Add use-log-scale estimator. Not quite right yet; the model doesn't actually have any data in it yet at the point where we want to make this decision * Update changelog diff -r e0784311a103 -r 1405f4a2caf3 base/LogRange.cpp --- a/base/LogRange.cpp Tue Nov 11 09:41:45 2008 +0000 +++ b/base/LogRange.cpp Tue Nov 11 13:54:47 2008 +0000 @@ -72,3 +72,45 @@ { return powf(10.0, value); } + +static float +sd(const std::vector &values, size_t start, size_t n) +{ + float sum = 0.f, mean = 0.f, variance = 0.f; + for (size_t i = 0; i < n; ++i) { + sum += values[start + i]; + } + mean = sum / n; + for (size_t i = 0; i < n; ++i) { + float diff = values[start + i] - mean; + variance += diff * diff; + } + variance = variance / n; + return sqrtf(variance); +} + +bool +LogRange::useLogScale(std::vector values) +{ + // Principle: Partition the data into two sets around the median; + // calculate the standard deviation of each set; if the two SDs + // are very different, it's likely that a log scale would be good. + + if (values.size() < 4) return false; + std::sort(values.begin(), values.end()); + size_t mi = values.size() / 2; + + float sd0 = sd(values, 0, mi); + float sd1 = sd(values, mi, values.size() - mi); + + std::cerr << "LogRange::useLogScale: sd0 = " + << sd0 << ", sd1 = " << sd1 << std::endl; + + if (sd0 == 0 || sd1 == 0) return false; + + // I wonder what method of determining "one sd much bigger than + // the other" would be appropriate here... + if (std::max(sd0, sd1) / std::min(sd0, sd1) > 10.f) return true; + else return false; +} + diff -r e0784311a103 -r 1405f4a2caf3 base/LogRange.h --- a/base/LogRange.h Tue Nov 11 09:41:45 2008 +0000 +++ b/base/LogRange.h Tue Nov 11 13:54:47 2008 +0000 @@ -16,6 +16,8 @@ #ifndef _LOG_RANGE_H_ #define _LOG_RANGE_H_ +#include + class LogRange { public: @@ -39,6 +41,14 @@ * means taking the value'th power of ten. */ static float unmap(float value); + + /** + * Estimate whether a set of values would be more properly shown + * using a logarithmic than a linear scale. This is only ever + * going to be a rough guess. + */ + static bool useLogScale(std::vector values); + }; #endif diff -r e0784311a103 -r 1405f4a2caf3 data/model/DenseThreeDimensionalModel.h --- a/data/model/DenseThreeDimensionalModel.h Tue Nov 11 09:41:45 2008 +0000 +++ b/data/model/DenseThreeDimensionalModel.h Tue Nov 11 13:54:47 2008 +0000 @@ -80,6 +80,12 @@ virtual QString getBinName(size_t n) const = 0; /** + * Estimate whether a logarithmic scale might be appropriate for + * the value scale. + */ + virtual bool shouldUseLogValueScale() const = 0; + + /** * Utility function to query whether a given bin is greater than * its (vertical) neighbours. */ diff -r e0784311a103 -r 1405f4a2caf3 data/model/EditableDenseThreeDimensionalModel.cpp --- a/data/model/EditableDenseThreeDimensionalModel.cpp Tue Nov 11 09:41:45 2008 +0000 +++ b/data/model/EditableDenseThreeDimensionalModel.cpp Tue Nov 11 13:54:47 2008 +0000 @@ -15,8 +15,9 @@ #include "EditableDenseThreeDimensionalModel.h" +#include "base/LogRange.h" + #include - #include #include @@ -247,6 +248,35 @@ emit modelChanged(); } +bool +EditableDenseThreeDimensionalModel::shouldUseLogValueScale() const +{ + std::vector sample; + std::vector n; + + for (int i = 0; i < 10; ++i) { + size_t index = i * 10; + if (index < m_data.size()) { + const Column &c = m_data[index]; + while (c.size() > sample.size()) { + sample.push_back(0.f); + n.push_back(0); + } + for (int j = 0; j < c.size(); ++j) { + sample[j] += c[j]; + ++n[j]; + } + } + } + + if (sample.empty()) return false; + for (int j = 0; j < sample.size(); ++j) { + if (n[j]) sample[j] /= n[j]; + } + + return LogRange::useLogScale(sample); +} + void EditableDenseThreeDimensionalModel::setCompletion(int completion, bool update) { diff -r e0784311a103 -r 1405f4a2caf3 data/model/EditableDenseThreeDimensionalModel.h --- a/data/model/EditableDenseThreeDimensionalModel.h Tue Nov 11 09:41:45 2008 +0000 +++ b/data/model/EditableDenseThreeDimensionalModel.h Tue Nov 11 13:54:47 2008 +0000 @@ -106,6 +106,8 @@ virtual void setBinName(size_t n, QString); virtual void setBinNames(std::vector names); + bool shouldUseLogValueScale() const; + virtual void setCompletion(int completion, bool update = true); virtual int getCompletion() const { return m_completion; } diff -r e0784311a103 -r 1405f4a2caf3 data/model/FFTModel.h --- a/data/model/FFTModel.h Tue Nov 11 09:41:45 2008 +0000 +++ b/data/model/FFTModel.h Tue Nov 11 13:54:47 2008 +0000 @@ -139,6 +139,10 @@ virtual void getColumn(size_t x, Column &result) const; virtual QString getBinName(size_t n) const; + virtual bool shouldUseLogValueScale() const { + return true; // Although obviously it's up to the user... + } + /** * Calculate an estimated frequency for a stable signal in this * bin, using phase unwrapping. This will be completely wrong if