changeset 535:3ccf48fb81d6

* make compression optional in editable dense 3d model, and some tweaks
author Chris Cannam
date Fri, 23 Jan 2009 14:00:29 +0000
parents 6038cb6fcd30
children beb51f558e9c
files data/fileio/CSVFileReader.cpp data/model/EditableDenseThreeDimensionalModel.cpp data/model/EditableDenseThreeDimensionalModel.h rdf/RDFImporter.cpp transform/FeatureExtractionModelTransformer.cpp
diffstat 5 files changed, 79 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/CSVFileReader.cpp	Fri Jan 23 13:31:51 2009 +0000
+++ b/data/fileio/CSVFileReader.cpp	Fri Jan 23 14:00:29 2009 +0000
@@ -159,9 +159,11 @@
                     break;
 		
                 case CSVFormat::ThreeDimensionalModel:
-                    model3 = new EditableDenseThreeDimensionalModel(sampleRate,
-                                                                    windowSize,
-                                                                    list.size());
+                    model3 = new EditableDenseThreeDimensionalModel
+                        (sampleRate,
+                         windowSize,
+                         list.size(),
+                         EditableDenseThreeDimensionalModel::NoCompression);
                     model = model3;
                     break;
                 }
--- a/data/model/EditableDenseThreeDimensionalModel.cpp	Fri Jan 23 13:31:51 2009 +0000
+++ b/data/model/EditableDenseThreeDimensionalModel.cpp	Fri Jan 23 14:00:29 2009 +0000
@@ -28,10 +28,12 @@
 EditableDenseThreeDimensionalModel::EditableDenseThreeDimensionalModel(size_t sampleRate,
                                                                        size_t resolution,
                                                                        size_t yBinCount,
+                                                                       CompressionType compression,
                                                                        bool notifyOnAdd) :
     m_sampleRate(sampleRate),
     m_resolution(resolution),
     m_yBinCount(yBinCount),
+    m_compression(compression),
     m_minimum(0.0),
     m_maximum(0.0),
     m_haveExtents(false),
@@ -73,7 +75,7 @@
 
     EditableDenseThreeDimensionalModel *model =
         new EditableDenseThreeDimensionalModel
-	(m_sampleRate, m_resolution, m_yBinCount);
+	(m_sampleRate, m_resolution, m_yBinCount, m_compression);
 
     model->m_minimum = m_minimum;
     model->m_maximum = m_maximum;
@@ -152,11 +154,11 @@
 EditableDenseThreeDimensionalModel::getValueAt(size_t index, size_t n) const
 {
     Column c = getColumn(index);
-    if (n < c.size()) return s.at(n);
+    if (n < c.size()) return c.at(n);
     return m_minimum;
 }
 
-static int given = 0, stored = 0;
+//static int given = 0, stored = 0;
 
 void
 EditableDenseThreeDimensionalModel::truncateAndStore(size_t index,
@@ -166,19 +168,36 @@
 
     //std::cout << "truncateAndStore(" << index << ", " << values.size() << ")" << std::endl;
 
+    // The default case is to store the entire column at m_data[index]
+    // and place 0 at m_trunc[index] to indicate that it has not been
+    // truncated.  We only do clever stuff if one of the clever-stuff
+    // tests works out.
+
     m_trunc[index] = 0;
-    if (index == 0 || values.size() != m_yBinCount) {
-        given += values.size();
-        stored += values.size();
+    if (index == 0 ||
+        m_compression == NoCompression ||
+        values.size() != m_yBinCount) {
+//        given += values.size();
+//        stored += values.size();
         m_data[index] = values;
         return;
     }
 
-    static int maxdist = 120;
+    // Maximum distance between a column and the one we refer to as
+    // the source of its truncated values.  Limited by having to fit
+    // in a signed char, but in any case small values are usually
+    // better
+    static int maxdist = 6;
 
-    bool known = false;
-    bool top = false;
+    bool known = false; // do we know whether to truncate at top or bottom?
+    bool top = false;   // if we do know, will we truncate at top?
 
+    // If the previous column is not truncated, then it is the only
+    // candidate for comparison.  If it is truncated, then the column
+    // that it refers to is the only candidate.  Either way, we only
+    // have one possible column to compare against here, and we are
+    // being careful to ensure it is not a truncated one (to avoid
+    // doing more work recursively when uncompressing).
     int tdist = 1;
     int ptrunc = m_trunc[index-1];
     if (ptrunc < 0) {
@@ -198,12 +217,14 @@
 
         int bcount = 0, tcount = 0;
         if (!known || !top) {
+            // count how many identical values there are at the bottom
             for (int i = 0; i < h; ++i) {
                 if (values.at(i) == p.at(i)) ++bcount;
                 else break;
             }
         }
         if (!known || top) {
+            // count how many identical values there are at the top
             for (int i = h; i > 0; --i) {
                 if (values.at(i-1) == p.at(i-1)) ++tcount;
                 else break;
@@ -211,41 +232,41 @@
         }
         if (!known) top = (tcount > bcount);
 
-        int limit = h / 4;
+        int limit = h / 4; // don't bother unless we have at least this many
         if ((top ? tcount : bcount) > limit) {
         
             if (!top) {
+                // create a new column with h - bcount values from bcount up
                 Column tcol(h - bcount);
-                given += values.size();
-                stored += h - bcount;
+//                given += values.size();
+//                stored += h - bcount;
                 for (int i = bcount; i < h; ++i) {
                     tcol[i - bcount] = values.at(i);
                 }
                 m_data[index] = tcol;
                 m_trunc[index] = -tdist;
-                //std::cout << "bottom " << bcount << " as col at " << -tdist << std::endl;
                 return;
             } else {
+                // create a new column with h - tcount values from 0 up
                 Column tcol(h - tcount);
-                given += values.size();
-                stored += h - tcount;
+//                given += values.size();
+//                stored += h - tcount;
                 for (int i = 0; i < h - tcount; ++i) {
                     tcol[i] = values.at(i);
                 }
                 m_data[index] = tcol;
                 m_trunc[index] = tdist;
-                //std::cout << "top " << tcount << " as col at " << -tdist << std::endl;
                 return;
             }
         }
     }                
 
-    given += values.size();
-    stored += values.size();
-
+//    given += values.size();
+//    stored += values.size();
 //    std::cout << "given: " << given << ", stored: " << stored << " (" 
 //              << ((float(stored) / float(given)) * 100.f) << "%)" << std::endl;
 
+    // default case if nothing wacky worked out
     m_data[index] = values;
     return;
 }
@@ -253,6 +274,8 @@
 EditableDenseThreeDimensionalModel::Column
 EditableDenseThreeDimensionalModel::expandAndRetrieve(size_t index) const
 {
+    // See comment above m_trunc declaration in header
+
     assert(index < m_data.size());
     Column c = m_data.at(index);
     if (index == 0) {
--- a/data/model/EditableDenseThreeDimensionalModel.h	Fri Jan 23 13:31:51 2009 +0000
+++ b/data/model/EditableDenseThreeDimensionalModel.h	Fri Jan 23 14:00:29 2009 +0000
@@ -25,9 +25,25 @@
     Q_OBJECT
 
 public:
+
+    // EditableDenseThreeDimensionalModel supports a basic compression
+    // method that reduces the size of multirate data (e.g. wavelet
+    // transform outputs) that are stored as plain 3d grids by about
+    // 60% or thereabouts.  However, it can only be used for models
+    // whose columns are set in order from 0 and never subsequently
+    // changed.  If the model is going to be actually edited, it must
+    // have NoCompression.
+
+    enum CompressionType
+    {
+        NoCompression,
+        BasicMultirateCompression
+    };
+
     EditableDenseThreeDimensionalModel(size_t sampleRate,
 				       size_t resolution,
 				       size_t yBinCount,
+                                       CompressionType compression,
 				       bool notifyOnAdd = true);
 
     virtual bool isOK() const;
@@ -125,7 +141,15 @@
     typedef QVector<Column> ValueMatrix;
     ValueMatrix m_data;
 
-    std::vector<signed char> m_trunc; // +ve -> top is truncated, -ve -> bottom
+    // m_trunc is used for simple compression.  If at least the top N
+    // elements of column x (for N = some proportion of the column
+    // height) are equal to those of an earlier column x', then
+    // m_trunc[x] will contain x-x' and column x will be truncated so
+    // as to remove the duplicate elements.  If the equal elements are
+    // at the bottom, then m_trunc[x] will contain x'-x (a negative
+    // value).  If m_trunc[x] is 0 then the whole of column x is
+    // stored.
+    std::vector<signed char> m_trunc;
     void truncateAndStore(size_t index, const Column & values);
     Column expandAndRetrieve(size_t index) const;
 
@@ -134,6 +158,7 @@
     size_t m_sampleRate;
     size_t m_resolution;
     size_t m_yBinCount;
+    CompressionType m_compression;
     float m_minimum;
     float m_maximum;
     bool m_haveExtents;
--- a/rdf/RDFImporter.cpp	Fri Jan 23 13:31:51 2009 +0000
+++ b/rdf/RDFImporter.cpp	Fri Jan 23 14:00:29 2009 +0000
@@ -342,8 +342,9 @@
         } else {
 
             EditableDenseThreeDimensionalModel *m =
-                new EditableDenseThreeDimensionalModel(sampleRate, hopSize,
-                                                       height, false);
+                new EditableDenseThreeDimensionalModel
+                (sampleRate, hopSize, height, 
+                 EditableDenseThreeDimensionalModel::NoCompression, false);
             
             EditableDenseThreeDimensionalModel::Column column;
 
--- a/transform/FeatureExtractionModelTransformer.cpp	Fri Jan 23 13:31:51 2009 +0000
+++ b/transform/FeatureExtractionModelTransformer.cpp	Fri Jan 23 14:00:29 2009 +0000
@@ -327,7 +327,9 @@
 
         EditableDenseThreeDimensionalModel *model =
             new EditableDenseThreeDimensionalModel
-            (modelRate, modelResolution, binCount, false);
+            (modelRate, modelResolution, binCount,
+             EditableDenseThreeDimensionalModel::BasicMultirateCompression,
+             false);
 
 	if (!m_descriptor->binNames.empty()) {
 	    std::vector<QString> names;