changeset 1450:a12fd0456f0c streaming-csv-writer

Merge from default branch
author Chris Cannam
date Tue, 17 Apr 2018 10:35:42 +0100
parents deabf9fd3d28 (diff) 48e9f538e6e9 (current diff)
children b40f67578976
files data/fileio/CSVFileWriter.cpp data/fileio/test/svcore-data-fileio-test.cpp data/model/test/MockWaveModel.cpp
diffstat 8 files changed, 530 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/CSVFileWriter.cpp	Thu Mar 01 18:02:22 2018 +0000
+++ b/data/fileio/CSVFileWriter.cpp	Tue Apr 17 10:35:42 2018 +0100
@@ -14,6 +14,7 @@
 */
 
 #include "CSVFileWriter.h"
+#include "CSVStreamWriter.h"
 
 #include "model/Model.h"
 #include "model/SparseOneDimensionalModel.h"
@@ -27,6 +28,7 @@
 
 #include <QFile>
 #include <QTextStream>
+#include <exception>
 
 CSVFileWriter::CSVFileWriter(QString path,
                              Model *model,
@@ -59,26 +61,13 @@
 void
 CSVFileWriter::write()
 {
-    try {
-        TempWriteFile temp(m_path);
-
-        QFile file(temp.getTemporaryFilename());
-        if (!file.open(QIODevice::WriteOnly | QIODevice::Text)) {
-            m_error = tr("Failed to open file %1 for writing")
-                .arg(temp.getTemporaryFilename());
-            return;
-        }
-    
-        QTextStream out(&file);
-        out << m_model->toDelimitedDataStringWithOptions
-            (m_delimiter, m_options);
-
-        file.close();
-        temp.moveToTarget();
-
-    } catch (FileOperationFailed &f) {
-        m_error = f.what();
-    }
+    Selection all {
+        m_model->getStartFrame(),
+        m_model->getEndFrame()
+    };
+    MultiSelection selections;
+    selections.addSelection(all);
+    writeSelection(&selections); 
 }
 
 void
@@ -96,22 +85,23 @@
     
         QTextStream out(&file);
 
-        for (MultiSelection::SelectionList::iterator i =
-                 selection->getSelections().begin();
-             i != selection->getSelections().end(); ++i) {
-        
-            sv_frame_t f0(i->getStartFrame()), f1(i->getEndFrame());
-            out << m_model->toDelimitedDataStringSubsetWithOptions
-                (m_delimiter, m_options, f0, f1);
-        }
+        bool completed = CSVStreamWriter::writeInChunks(
+            out,
+            *m_model,
+            *selection,
+            m_reporter,
+            m_delimiter,
+            m_options
+        );
 
         file.close();
-        temp.moveToTarget();
+        if (completed) {
+            temp.moveToTarget();
+        }
 
     } catch (FileOperationFailed &f) {
         m_error = f.what();
+    } catch (const std::exception &e) { // ProgressReporter could throw
+        m_error = e.what();
     }
 }
-
-
-
--- a/data/fileio/CSVFileWriter.h	Thu Mar 01 18:02:22 2018 +0000
+++ b/data/fileio/CSVFileWriter.h	Tue Apr 17 10:35:42 2018 +0100
@@ -23,6 +23,7 @@
 
 class Model;
 class MultiSelection;
+class ProgressReporter;
 
 class CSVFileWriter : public QObject
 {
@@ -33,6 +34,16 @@
                   Model *model,
                   QString delimiter = ",",
                   DataExportOptions options = DataExportDefaults);
+
+    CSVFileWriter(QString path,
+                  Model *model,
+                  ProgressReporter *reporter,
+                  QString delimiter = ",",
+                  DataExportOptions options = DataExportDefaults) 
+    : CSVFileWriter(path, model, delimiter, options)
+    {
+        m_reporter = reporter;
+    }
     virtual ~CSVFileWriter();
 
     virtual bool isOK() const;
@@ -47,6 +58,7 @@
     QString m_error;
     QString m_delimiter;
     DataExportOptions m_options;
+    ProgressReporter *m_reporter = nullptr;
 };
 
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/CSVStreamWriter.h	Tue Apr 17 10:35:42 2018 +0100
@@ -0,0 +1,149 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2017 Queen Mary, University of London.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#ifndef CSV_STREAM_WRITER_H
+#define CSV_STREAM_WRITER_H
+
+#include "base/BaseTypes.h"
+#include "base/Selection.h"
+#include "base/ProgressReporter.h"
+#include "base/DataExportOptions.h"
+#include "data/model/Model.h"
+#include <QString>
+#include <algorithm>
+#include <numeric>
+
+namespace CSVStreamWriter
+{
+
+template <class OutStream>
+bool
+writeInChunks(OutStream& oss,
+              const Model& model,
+              const MultiSelection& regions,
+              ProgressReporter* reporter = nullptr,
+              QString delimiter = ",",
+              DataExportOptions options = DataExportDefaults,
+              const sv_frame_t blockSize = 16384)
+{
+    const auto selections = regions.getSelections();
+    if (blockSize <= 0 || selections.empty()) return false;
+
+    // TODO, some form of checking validity of selections?
+    const auto nFramesToWrite = std::accumulate(
+        selections.begin(),
+        selections.end(),
+        0,
+        [](sv_frame_t acc, const Selection& current) -> sv_frame_t {
+            return acc + (current.getEndFrame() - current.getStartFrame());
+        }
+    );
+    const auto finalFrameOfLastRegion = (*selections.crbegin()).getEndFrame();
+
+    const auto wasCancelled = [&reporter]() { 
+        return reporter && reporter->wasCancelled(); 
+    };
+
+    sv_frame_t nFramesWritten = 0;
+    int previousProgress = 0;
+
+    for (const auto& extents : selections) {
+        const auto startFrame = extents.getStartFrame();
+        const auto endFrame = extents.getEndFrame();
+        auto readPtr = startFrame;
+        while (readPtr < endFrame) {
+            if (wasCancelled()) return false;
+
+            const auto start = readPtr;
+            const auto end = std::min(start + blockSize, endFrame);
+            const auto data = model.toDelimitedDataStringSubsetWithOptions(
+                delimiter,
+                options,
+                start,
+                end
+            ).trimmed();
+
+            if ( data != "" ) {
+                oss << data << (end < finalFrameOfLastRegion ? "\n" : "");
+            }
+
+            nFramesWritten += end - start;
+            const auto currentProgress = 100 * nFramesWritten / nFramesToWrite;
+            const bool hasIncreased = currentProgress > previousProgress;
+            if (hasIncreased) {
+                if (reporter) reporter->setProgress(currentProgress);
+                previousProgress = currentProgress;
+            }
+            readPtr = end;
+        }
+    }
+    return !wasCancelled(); // setProgress could process event loop
+}
+
+template <class OutStream>
+bool 
+writeInChunks(OutStream& oss,
+              const Model& model,
+              const Selection& extents,
+              ProgressReporter* reporter = nullptr,
+              QString delimiter = ",",
+              DataExportOptions options = DataExportDefaults,
+              const sv_frame_t blockSize = 16384)
+{
+    const auto startFrame = extents.isEmpty() ?
+        model.getStartFrame() : extents.getStartFrame();
+    const auto endFrame = extents.isEmpty() ?
+        model.getEndFrame() : extents.getEndFrame();
+    const auto hasValidExtents = startFrame >= 0 && endFrame > startFrame;
+    if (!hasValidExtents) return false;
+    Selection all {
+        startFrame,
+        endFrame
+    };
+    MultiSelection regions;
+    regions.addSelection(all);
+    return CSVStreamWriter::writeInChunks(
+        oss,
+        model,
+        regions,
+        reporter,
+        delimiter,
+        options,
+        blockSize
+    );
+}
+
+template <class OutStream>
+bool 
+writeInChunks(OutStream& oss,
+              const Model& model,
+              ProgressReporter* reporter = nullptr,
+              QString delimiter = ",",
+              DataExportOptions options = DataExportDefaults,
+              const sv_frame_t blockSize = 16384)
+{
+    const Selection empty;
+    return CSVStreamWriter::writeInChunks(
+        oss,
+        model,
+        empty,
+        reporter,
+        delimiter,
+        options,
+        blockSize
+    );
+}
+} // namespace CSVStreamWriter
+#endif
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/fileio/test/CSVStreamWriterTest.h	Tue Apr 17 10:35:42 2018 +0100
@@ -0,0 +1,328 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Sonic Visualiser
+    An audio file viewer and annotation editor.
+    Centre for Digital Music, Queen Mary, University of London.
+    This file copyright 2017 Queen Mary, University of London.
+    
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+*/
+
+#ifndef TEST_CSV_STREAM_H
+#define TEST_CSV_STREAM_H
+
+#include <QtTest>
+#include <QObject>
+#include <sstream>
+#include <functional>
+
+#include "base/ProgressReporter.h"
+#include "base/DataExportOptions.h"
+#include "base/Selection.h"
+#include "data/model/NoteModel.h"
+#include "../CSVStreamWriter.h"
+#include "../../model/test/MockWaveModel.h"
+
+class StubReporter : public ProgressReporter
+{
+public:
+    StubReporter( std::function<bool()> isCancelled )
+        : m_isCancelled(isCancelled) {}
+    bool isDefinite() const override { return true; }
+    void setDefinite(bool) override {}
+    bool wasCancelled() const override { return m_isCancelled(); }
+    void setMessage(QString) override {}
+    void setProgress(int p) override
+    { 
+        ++m_calls;
+        m_percentageLog.push_back(p);
+    }
+
+    size_t getCallCount() const { return m_calls; }
+    std::vector<int> getPercentageLog() const { return m_percentageLog; }
+    void reset() { m_calls = 0; }
+private:
+    size_t m_calls = 0;
+    std::function<bool()> m_isCancelled;
+    std::vector<int> m_percentageLog;
+};
+
+class CSVStreamWriterTest : public QObject
+{
+    Q_OBJECT
+public:
+    std::string getExpectedString()
+    {
+        return
+        {
+          "0,0,0\n"
+          "1,0,0\n"
+          "2,0,0\n"
+          "3,0,0\n"
+          "4,1,1\n"
+          "5,1,1\n"
+          "6,1,1\n"
+          "7,1,1\n"
+          "8,1,1\n"
+          "9,1,1\n"
+          "10,1,1\n"
+          "11,1,1\n"
+          "12,1,1\n"
+          "13,1,1\n"
+          "14,1,1\n"
+          "15,1,1\n"
+          "16,1,1\n"
+          "17,1,1\n"
+          "18,1,1\n"
+          "19,1,1\n"
+          "20,0,0\n"
+          "21,0,0\n"
+          "22,0,0\n"
+          "23,0,0"
+        };
+    }
+
+private slots:
+    void simpleValidOutput()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+
+        std::ostringstream oss;
+        const auto result = CSVStreamWriter::writeInChunks(oss, mwm);
+        QVERIFY( oss.str() == getExpectedString() );
+        QVERIFY( result );
+    }
+
+    void callsReporterCorrectTimes()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+        StubReporter reporter { []() -> bool { return false; } };
+        const auto expected = getExpectedString();
+
+        std::ostringstream oss;
+        const auto writeStreamWithBlockSize = [&](int blockSize) {
+            return CSVStreamWriter::writeInChunks(
+                oss,
+                mwm,
+                &reporter,
+                ",",
+                DataExportDefaults,
+                blockSize
+            );
+        };
+
+        const auto reset = [&]() {
+            oss.str({});
+            reporter.reset();
+        };
+
+        const auto nonIntegerMultipleResult = writeStreamWithBlockSize(5);
+        QVERIFY( nonIntegerMultipleResult );
+        QVERIFY( reporter.getCallCount() == 5 /* 4.8 rounded up */ );
+        QVERIFY( oss.str() == expected );
+        reset();
+
+        const auto integerMultiple = writeStreamWithBlockSize(2);
+        QVERIFY( integerMultiple );
+        QVERIFY( reporter.getCallCount() == 12 );
+        QVERIFY( oss.str() == expected );
+        reset();
+
+        const auto largerThanNumberOfSamples = writeStreamWithBlockSize(100);
+        QVERIFY( largerThanNumberOfSamples );
+        QVERIFY( reporter.getCallCount() == 1 );
+        QVERIFY( oss.str() == expected );
+        reset();
+
+        const auto zero = writeStreamWithBlockSize(0);
+        QVERIFY( zero == false );
+        QVERIFY( reporter.getCallCount() == 0 );
+    }
+
+    void isCancellable()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+        StubReporter reporter { []() -> bool { return true; } };
+
+        std::ostringstream oss;
+        const auto cancelImmediately = CSVStreamWriter::writeInChunks(
+            oss,
+            mwm,
+            &reporter,
+            ",",
+            DataExportDefaults,
+            4
+        );
+        QVERIFY( cancelImmediately == false );
+        QVERIFY( reporter.getCallCount() == 0 );
+
+        StubReporter cancelMidway { 
+            [&]() { return cancelMidway.getCallCount() == 3; } 
+        };
+        const auto cancelledMidway = CSVStreamWriter::writeInChunks(
+            oss,
+            mwm,
+            &cancelMidway,
+            ",",
+            DataExportDefaults,
+            4
+        );
+        QVERIFY( cancelMidway.getCallCount() == 3 );
+        QVERIFY( cancelledMidway == false );
+    }
+
+    void zeroStartTimeReportsPercentageCorrectly()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+        StubReporter reporter { []() -> bool { return false; } };
+        std::ostringstream oss;
+        const auto succeeded = CSVStreamWriter::writeInChunks(
+            oss,
+            mwm,
+            &reporter,
+            ",",
+            DataExportDefaults,
+            4
+        );
+        QVERIFY( succeeded == true );
+        QVERIFY( reporter.getCallCount() == 6 );
+        const std::vector<int> expectedCallLog {
+            16,
+            33,
+            50,
+            66,
+            83,
+            100
+        };
+        QVERIFY( reporter.getPercentageLog() == expectedCallLog );
+        QVERIFY( oss.str() == getExpectedString() );
+    }
+
+    void nonZeroStartTimeReportsPercentageCorrectly()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+        StubReporter reporter { []() -> bool { return false; } };
+        std::ostringstream oss;
+        const auto writeSubSection = CSVStreamWriter::writeInChunks(
+            oss,
+            mwm,
+            {4, 20},
+            &reporter,
+            ",",
+            DataExportDefaults,
+            4
+        );
+        QVERIFY( reporter.getCallCount() == 4 );
+        const std::vector<int> expectedCallLog {
+            25,
+            50,
+            75,
+            100
+        };
+        QVERIFY( reporter.getPercentageLog() == expectedCallLog );
+        QVERIFY( writeSubSection == true );
+        const std::string expectedOutput {
+          "4,1,1\n"
+          "5,1,1\n"
+          "6,1,1\n"
+          "7,1,1\n"
+          "8,1,1\n"
+          "9,1,1\n"
+          "10,1,1\n"
+          "11,1,1\n"
+          "12,1,1\n"
+          "13,1,1\n"
+          "14,1,1\n"
+          "15,1,1\n"
+          "16,1,1\n"
+          "17,1,1\n"
+          "18,1,1\n"
+          "19,1,1"
+        };
+        QVERIFY( oss.str() == expectedOutput );
+    }
+
+    void multipleSelectionOutput()
+    {
+        MockWaveModel mwm({ DC, DC }, 16, 4);
+        StubReporter reporter { []() -> bool { return false; } };
+        std::ostringstream oss;
+        MultiSelection regions;
+        regions.addSelection({0, 2});
+        regions.addSelection({4, 6});
+        regions.addSelection({16, 18});
+        qDebug("End frame: %lld", mwm.getEndFrame());
+        const std::string expectedOutput {
+          "0,0,0\n"
+          "1,0,0\n"
+          "4,1,1\n"
+          "5,1,1\n"
+          "16,1,1\n"
+          "17,1,1"
+        };
+        const auto wroteMultiSection = CSVStreamWriter::writeInChunks(
+            oss,
+            mwm,
+            regions,
+            &reporter,
+            ",",
+            DataExportDefaults,
+            2
+        );
+        QVERIFY( wroteMultiSection == true );
+        QVERIFY( reporter.getCallCount() == 3 );
+        const std::vector<int> expectedCallLog { 33, 66, 100 };
+        QVERIFY( reporter.getPercentageLog() == expectedCallLog );
+        qDebug("%s", oss.str().c_str());
+        QVERIFY( oss.str() == expectedOutput );
+    }
+
+    void writeSparseModel()
+    {
+        const auto pentatonicFromRoot = [](float midiPitch) {
+            return std::vector<float> {
+                0 + midiPitch,
+                2 + midiPitch,
+                4 + midiPitch,
+                7 + midiPitch,
+                9 + midiPitch
+            };
+        };
+        const auto cMajorPentatonic = pentatonicFromRoot(60.0);
+        NoteModel notes(8 /* sampleRate */, 4 /* resolution */);
+        sv_frame_t startFrame = 0;
+        for (const auto& note : cMajorPentatonic) {
+            notes.addPoint({startFrame, note, 4, 1.f, ""});
+            startFrame += 8;
+        }
+        qDebug("Create Expected Output\n");
+
+        // NB. removed end line break
+        const auto expectedOutput = notes.toDelimitedDataString(",").trimmed();
+
+        StubReporter reporter { []() -> bool { return false; } };
+        std::ostringstream oss;
+        qDebug("End frame: %lld", notes.getEndFrame());
+        qDebug("Write streaming\n");
+        const auto wroteSparseModel = CSVStreamWriter::writeInChunks(
+            oss,
+            notes,
+            &reporter,
+            ",",
+            DataExportDefaults,
+            2
+        );
+
+        qDebug("\n%s\n", expectedOutput.toLocal8Bit().data());
+        qDebug("\n%s\n", oss.str().c_str());
+        QVERIFY( wroteSparseModel == true );
+        QVERIFY( oss.str() == expectedOutput.toStdString() );
+    }
+};
+
+#endif
\ No newline at end of file
--- a/data/fileio/test/files.pri	Thu Mar 01 18:02:22 2018 +0000
+++ b/data/fileio/test/files.pri	Tue Apr 17 10:35:42 2018 +0100
@@ -1,10 +1,13 @@
 
 TEST_HEADERS += \
-	     AudioFileReaderTest.h \
-	     AudioFileWriterTest.h \
-	     AudioTestData.h \
-             EncodingTest.h \
-             MIDIFileReaderTest.h
-	     
+	../../model/test/MockWaveModel.h \
+	AudioFileReaderTest.h \
+	AudioFileWriterTest.h \
+	AudioTestData.h \
+	EncodingTest.h \
+	MIDIFileReaderTest.h \
+	CSVStreamWriterTest.h
+     
 TEST_SOURCES += \
-	     svcore-data-fileio-test.cpp
+	../../model/test/MockWaveModel.cpp \
+	svcore-data-fileio-test.cpp
--- a/data/fileio/test/svcore-data-fileio-test.cpp	Thu Mar 01 18:02:22 2018 +0000
+++ b/data/fileio/test/svcore-data-fileio-test.cpp	Tue Apr 17 10:35:42 2018 +0100
@@ -16,6 +16,7 @@
 #include "AudioFileWriterTest.h"
 #include "EncodingTest.h"
 #include "MIDIFileReaderTest.h"
+#include "CSVStreamWriterTest.h"
 
 #include <QtTest>
 
@@ -70,6 +71,12 @@
         else ++bad;
     }
 
+    {
+        CSVStreamWriterTest t;
+        if (QTest::qExec(&t, argc, argv) == 0) ++good;
+        else ++bad;
+    }
+
     if (bad > 0) {
     SVCERR << "\n********* " << bad << " test suite(s) failed!\n" << endl;
         return 1;
@@ -78,4 +85,3 @@
         return 0;
     }
 }
-
--- a/data/model/test/MockWaveModel.cpp	Thu Mar 01 18:02:22 2018 +0000
+++ b/data/model/test/MockWaveModel.cpp	Tue Apr 17 10:35:42 2018 +0100
@@ -55,7 +55,7 @@
     vector<floatvec_t> data(tochannel - fromchannel + 1);
     
     for (int c = fromchannel; c <= tochannel; ++c) {
-        data.push_back(getData(c, start, count));
+        data[c] = getData(c, start, count);
     }
 
     return data;
--- a/files.pri	Thu Mar 01 18:02:22 2018 +0000
+++ b/files.pri	Tue Apr 17 10:35:42 2018 +0100
@@ -51,6 +51,7 @@
            data/fileio/CSVFileReader.h \
            data/fileio/CSVFileWriter.h \
            data/fileio/CSVFormat.h \
+           data/fileio/CSVStreamWriter.h \
            data/fileio/DataFileReader.h \
            data/fileio/DataFileReaderFactory.h \
            data/fileio/FileFinder.h \