annotate base/LogRange.cpp @ 1852:a454c7477b4f

Be more cautious about firing up an RDF file parser to identify a document - don't do it at all if the document is not apparently text
author Chris Cannam
date Thu, 30 Apr 2020 14:46:07 +0100
parents 7e3532d56abb
children
rev   line source
Chris@224 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@224 2
Chris@224 3 /*
Chris@224 4 Sonic Visualiser
Chris@224 5 An audio file viewer and annotation editor.
Chris@224 6 Centre for Digital Music, Queen Mary, University of London.
Chris@224 7 This file copyright 2006 Chris Cannam.
Chris@224 8
Chris@224 9 This program is free software; you can redistribute it and/or
Chris@224 10 modify it under the terms of the GNU General Public License as
Chris@224 11 published by the Free Software Foundation; either version 2 of the
Chris@224 12 License, or (at your option) any later version. See the file
Chris@224 13 COPYING included with this distribution for more information.
Chris@224 14 */
Chris@224 15
Chris@224 16 #include "LogRange.h"
Chris@573 17 #include "system/System.h"
Chris@224 18
Chris@224 19 #include <algorithm>
Chris@464 20 #include <iostream>
Chris@224 21 #include <cmath>
Chris@224 22
Chris@224 23 void
Chris@1044 24 LogRange::mapRange(double &min, double &max, double logthresh)
Chris@224 25 {
Chris@1395 26 static double eps = 1e-10;
Chris@1395 27
Chris@1385 28 // ensure that max > min:
Chris@224 29 if (min > max) std::swap(min, max);
Chris@224 30 if (max == min) max = min + 1;
Chris@224 31
Chris@1385 32 if (min >= 0.0) {
Chris@224 33
Chris@1385 34 // and max > min, so we know min >= 0 and max > 0
Chris@1385 35
Chris@1385 36 max = log10(max);
Chris@224 37
Chris@1385 38 if (min == 0.0) min = std::min(logthresh, max);
Chris@1044 39 else min = log10(min);
Chris@224 40
Chris@1385 41 } else if (max <= 0.0) {
Chris@1385 42
Chris@1385 43 // and max > min, so we know min < 0 and max <= 0
Chris@224 44
Chris@1385 45 min = log10(-min);
Chris@1385 46
Chris@1385 47 if (max == 0.0) max = std::min(logthresh, min);
Chris@1044 48 else max = log10(-max);
Chris@224 49
Chris@224 50 std::swap(min, max);
Chris@464 51
Chris@224 52 } else {
Chris@224 53
Chris@224 54 // min < 0 and max > 0
Chris@224 55
Chris@1044 56 max = log10(std::max(max, -min));
Chris@224 57 min = std::min(logthresh, max);
Chris@224 58 }
Chris@224 59
Chris@1395 60 if (fabs(max - min) < eps) min = max - 1;
Chris@224 61 }
Chris@224 62
Chris@1044 63 double
Chris@1044 64 LogRange::map(double value, double thresh)
Chris@224 65 {
Chris@1385 66 if (value == 0.0) return thresh;
Chris@1044 67 return log10(fabs(value));
Chris@224 68 }
Chris@224 69
Chris@1044 70 double
Chris@1044 71 LogRange::unmap(double value)
Chris@266 72 {
Chris@1044 73 return pow(10.0, value);
Chris@266 74 }
Chris@478 75
Chris@1038 76 static double
Chris@1044 77 sd(const std::vector<double> &values, int start, int n)
Chris@478 78 {
Chris@1385 79 double sum = 0.0, mean = 0.0, variance = 0.0;
Chris@1038 80 for (int i = 0; i < n; ++i) {
Chris@478 81 sum += values[start + i];
Chris@478 82 }
Chris@478 83 mean = sum / n;
Chris@1038 84 for (int i = 0; i < n; ++i) {
Chris@1038 85 double diff = values[start + i] - mean;
Chris@478 86 variance += diff * diff;
Chris@478 87 }
Chris@478 88 variance = variance / n;
Chris@1038 89 return sqrt(variance);
Chris@478 90 }
Chris@478 91
Chris@478 92 bool
Chris@1392 93 LogRange::shouldUseLogScale(std::vector<double> values)
Chris@478 94 {
Chris@478 95 // Principle: Partition the data into two sets around the median;
Chris@478 96 // calculate the standard deviation of each set; if the two SDs
Chris@478 97 // are very different, it's likely that a log scale would be good.
Chris@478 98
Chris@1038 99 int n = int(values.size());
Chris@1038 100 if (n < 4) return false;
Chris@478 101 std::sort(values.begin(), values.end());
Chris@1038 102 int mi = n / 2;
Chris@478 103
Chris@1038 104 double sd0 = sd(values, 0, mi);
Chris@1038 105 double sd1 = sd(values, mi, n - mi);
Chris@478 106
Chris@690 107 SVDEBUG << "LogRange::useLogScale: sd0 = "
Chris@687 108 << sd0 << ", sd1 = " << sd1 << endl;
Chris@478 109
Chris@478 110 if (sd0 == 0 || sd1 == 0) return false;
Chris@478 111
Chris@478 112 // I wonder what method of determining "one sd much bigger than
Chris@478 113 // the other" would be appropriate here...
Chris@1038 114 if (std::max(sd0, sd1) / std::min(sd0, sd1) > 10.) return true;
Chris@478 115 else return false;
Chris@478 116 }
Chris@478 117