changeset 1305:9f9f55a8af92 mp3-gapless

Add gapless flag to MP3FileReader, and implement trimming the delay samples from the start (padding is not yet trimmed from end)
author Chris Cannam
date Tue, 29 Nov 2016 11:35:56 +0000
parents 7cff8367d9b1
children b325e91505b5
files data/fileio/AudioFileReaderFactory.cpp data/fileio/CodedAudioFileReader.cpp data/fileio/CodedAudioFileReader.h data/fileio/MP3FileReader.cpp data/fileio/MP3FileReader.h
diffstat 5 files changed, 160 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/data/fileio/AudioFileReaderFactory.cpp	Tue Nov 29 08:58:50 2016 +0000
+++ b/data/fileio/AudioFileReaderFactory.cpp	Tue Nov 29 11:35:56 2016 +0000
@@ -165,7 +165,8 @@
 #ifdef HAVE_MAD
     if (!reader && MP3FileReader::supports(source)) {
         reader = new MP3FileReader
-            (source, decodeMode, cacheMode, targetRate, normalised, reporter);
+            (source, decodeMode, cacheMode, MP3FileReader::Gapless,
+             targetRate, normalised, reporter);
         CHECK(reader);
     }
 #endif
@@ -234,7 +235,8 @@
 #ifdef HAVE_MAD
     if (!reader) {
         reader = new MP3FileReader
-            (source, decodeMode, cacheMode, targetRate, normalised, reporter);
+            (source, decodeMode, cacheMode, MP3FileReader::Gapless,
+             targetRate, normalised, reporter);
         CHECK(reader);
     }
 #endif
--- a/data/fileio/CodedAudioFileReader.cpp	Tue Nov 29 08:58:50 2016 +0000
+++ b/data/fileio/CodedAudioFileReader.cpp	Tue Nov 29 11:35:56 2016 +0000
@@ -48,6 +48,8 @@
     m_normalised(normalised),
     m_max(0.f),
     m_gain(1.f),
+    m_trimFromStart(0),
+    m_trimFromEnd(0),
     m_clippedCount(0),
     m_firstNonzero(0),
     m_lastNonzero(0)
@@ -94,6 +96,13 @@
 }
 
 void
+CodedAudioFileReader::setSamplesToTrim(sv_frame_t fromStart, sv_frame_t fromEnd)
+{
+    m_trimFromStart = fromStart;
+    m_trimFromEnd = fromEnd;
+}
+
+void
 CodedAudioFileReader::startSerialised(QString id)
 {
     SVDEBUG << "CodedAudioFileReader(" << this << ")::startSerialised: id = " << id << endl;
@@ -223,6 +232,11 @@
     if (!m_initialised) return;
 
     for (sv_frame_t i = 0; i < nframes; ++i) {
+
+        if (m_trimFromStart > 0) {
+            --m_trimFromStart;
+            continue;
+        }
         
         for (int c = 0; c < m_channelCount; ++c) {
 
@@ -253,6 +267,11 @@
     if (!m_initialised) return;
 
     for (sv_frame_t i = 0; i < nframes; ++i) {
+
+        if (m_trimFromStart > 0) {
+            --m_trimFromStart;
+            continue;
+        }
         
         for (int c = 0; c < m_channelCount; ++c) {
 
@@ -283,6 +302,11 @@
     if (!m_initialised) return;
 
     for (float sample: samples) {
+
+        if (m_trimFromStart > 0) {
+            --m_trimFromStart;
+            continue;
+        }
         
         m_cacheWriteBuffer[m_cacheWriteBufferIndex++] = sample;
 
@@ -376,6 +400,7 @@
     float clip = 1.0;
     sv_frame_t count = sz * m_channelCount;
 
+    // statistics
     for (sv_frame_t j = 0; j < sz; ++j) {
         for (int c = 0; c < m_channelCount; ++c) {
             sv_frame_t i = j * m_channelCount + c;
--- a/data/fileio/CodedAudioFileReader.h	Tue Nov 29 08:58:50 2016 +0000
+++ b/data/fileio/CodedAudioFileReader.h	Tue Nov 29 11:35:56 2016 +0000
@@ -62,6 +62,9 @@
 
     void initialiseDecodeCache(); // samplerate, channels must have been set
 
+    // compensation for encoder delays:
+    void setSamplesToTrim(sv_frame_t fromStart, sv_frame_t fromEnd);
+    
     // may throw InsufficientDiscSpace:
     void addSamplesToDecodeCache(float **samples, sv_frame_t nframes);
     void addSamplesToDecodeCache(float *samplesInterleaved, sv_frame_t nframes);
@@ -104,6 +107,9 @@
     float m_max;
     float m_gain;
 
+    sv_frame_t m_trimFromStart;
+    sv_frame_t m_trimFromEnd;
+    
     sv_frame_t m_clippedCount;
     sv_frame_t m_firstNonzero;
     sv_frame_t m_lastNonzero;
--- a/data/fileio/MP3FileReader.cpp	Tue Nov 29 08:58:50 2016 +0000
+++ b/data/fileio/MP3FileReader.cpp	Tue Nov 29 11:35:56 2016 +0000
@@ -39,13 +39,19 @@
 #define open _open
 #endif
 
+using std::string;
+
+static sv_frame_t DEFAULT_DECODER_DELAY = 529;
+
 MP3FileReader::MP3FileReader(FileSource source, DecodeMode decodeMode, 
-                             CacheMode mode, sv_samplerate_t targetRate,
+                             CacheMode mode, GaplessMode gaplessMode,
+                             sv_samplerate_t targetRate,
                              bool normalised,
                              ProgressReporter *reporter) :
     CodedAudioFileReader(mode, targetRate, normalised),
     m_source(source),
     m_path(source.getLocalFilename()),
+    m_gaplessMode(gaplessMode),
     m_decodeErrorShown(false),
     m_decodeThread(0)
 {
@@ -65,6 +71,10 @@
     m_done = false;
     m_reporter = reporter;
 
+    if (m_gaplessMode == Gapless) {
+        CodedAudioFileReader::setSamplesToTrim(DEFAULT_DECODER_DELAY, 0);
+    }
+    
     struct stat stat;
     if (::stat(m_path.toLocal8Bit().data(), &stat) == -1 || stat.st_size == 0) {
 	m_error = QString("File %1 does not exist.").arg(m_path);
@@ -365,26 +375,92 @@
     return data->reader->filter(stream, frame);
 }
 
+static string toMagic(uint32_t fourcc)
+{
+    string magic("....");
+    for (int i = 0; i < 4; ++i) {
+        magic[3-i] = char((fourcc >> (8*i)) & 0xff);
+    }
+    return magic;
+}
+
 enum mad_flow
 MP3FileReader::filter(struct mad_stream const *stream,
                       struct mad_frame *)
 {
     if (m_mp3FrameCount > 0) {
+        // only handle info frame if it appears as first mp3 frame
         return MAD_FLOW_CONTINUE;
+    }
+
+    if (m_gaplessMode == Gappy) {
+        // Our non-gapless mode does not even filter out the Xing/LAME
+        // frame. That's because the main reason non-gapless mode
+        // exists is for backward compatibility with MP3FileReader
+        // behaviour before the gapless support was added, so we even
+        // need to keep the spurious 1152 samples resulting from
+        // feeding Xing/LAME frame to the decoder as otherwise we'd
+        // have different output from before.
+        SVDEBUG << "MP3FileReader: Not gapless mode, not checking Xing/LAME frame"
+                << endl;
+        return MAD_FLOW_CONTINUE;
+    }
+    
+    struct mad_bitptr ptr = stream->anc_ptr;
+    string magic = toMagic(mad_bit_read(&ptr, 32));
+
+    if (magic == "Xing" || magic == "Info") {
+
+        SVDEBUG << "MP3FileReader: Found Xing/LAME metadata frame (magic = \""
+                << magic << "\")" << endl;
+
+        // All we want at this point is the LAME encoder delay and
+        // padding values. We expect to see the Xing/Info magic (which
+        // we've already read), then 116 bytes of Xing data, then LAME
+        // magic, 5 byte version string, 12 bytes of LAME data that we
+        // aren't currently interested in, then the delays encoded as
+        // two 12-bit numbers into three bytes.
+        //
+        // (See gabriel.mp3-tech.org/mp3infotag.html)
+        
+        for (int skip = 0; skip < 116; ++skip) {
+            (void)mad_bit_read(&ptr, 8);
+        }
+
+        magic = toMagic(mad_bit_read(&ptr, 32));
+
+        if (magic == "LAME") {
+
+            SVDEBUG << "MP3FileReader: Found LAME-specific metadata" << endl;
+
+            for (int skip = 0; skip < 5 + 12; ++skip) {
+                (void)mad_bit_read(&ptr, 8);
+            }
+
+            uint32_t delay = mad_bit_read(&ptr, 12);
+            uint32_t padding = mad_bit_read(&ptr, 12);
+
+            sv_frame_t delayToDrop = DEFAULT_DECODER_DELAY + delay;
+            sv_frame_t paddingToDrop = padding - DEFAULT_DECODER_DELAY;
+            if (paddingToDrop < 0) paddingToDrop = 0;
+
+            SVDEBUG << "MP3FileReader: LAME encoder delay = " << delay
+                    << ", padding = " << padding << endl;
+
+            SVDEBUG << "MP3FileReader: Will be trimming " << delayToDrop
+                    << " samples at start and " << paddingToDrop
+                    << " at end" << endl;
+
+            CodedAudioFileReader::setSamplesToTrim(delayToDrop, paddingToDrop);
+            
+        } else {
+            SVDEBUG << "MP3FileReader: Xing frame has no LAME metadata" << endl;
+        }
+            
+        return MAD_FLOW_IGNORE;
+        
     } else {
-        struct mad_bitptr ptr = stream->anc_ptr;
-        unsigned long fourcc = mad_bit_read(&ptr, 32);
-        std::string magic("....");
-        for (int i = 0; i < 4; ++i) {
-            magic[3-i] = char((fourcc >> (8*i)) & 0xff);
-        }
-        if (magic == "Xing" || magic == "Info" || magic == "LAME") {
-            SVDEBUG << "MP3FileReader: Discarding metadata frame (magic = \""
-                    << magic << "\")" << endl;
-            return MAD_FLOW_IGNORE;
-        } else {
-            return MAD_FLOW_CONTINUE;
-        }
+        return MAD_FLOW_CONTINUE;
     }
 }
 
--- a/data/fileio/MP3FileReader.h	Tue Nov 29 08:58:50 2016 +0000
+++ b/data/fileio/MP3FileReader.h	Tue Nov 29 11:35:56 2016 +0000
@@ -32,9 +32,43 @@
     Q_OBJECT
 
 public:
+    /**
+     * How the MP3FileReader should handle leading and trailing gaps.
+     * See http://lame.sourceforge.net/tech-FAQ.txt for a technical
+     * explanation of the numbers here.
+     */
+    enum GaplessMode {
+        /**
+         * Trim unwanted samples from the start and end of the decoded
+         * audio. From the start, trim a number of samples equal to
+         * the decoder delay (a fixed 529 samples) plus any encoder
+         * delay that may be specified in Xing/LAME metadata. From the
+         * end, trim any padding specified in Xing/LAME metadata, less
+         * the fixed decoder delay. This usually results in "gapless"
+         * audio, i.e. with no spurious zero padding at either end.
+         */
+        Gapless,
+
+        /**
+         * Do not trim any samples. Also do not suppress any frames
+         * from being passed to the mp3 decoder, even Xing/LAME
+         * metadata frames. This will result in the audio being padded
+         * with zeros at either end: at the start, typically
+         * 529+576+1152 = 2257 samples for LAME-encoded mp3s; at the
+         * end an unknown number depending on the fill ratio of the
+         * final coded frame, but typically less than 1152-529 = 623.
+         *
+         * This mode produces the same output as produced by older
+         * versions of this code before the gapless option was added,
+         * and is present mostly for backward compatibility.
+         */
+        Gappy
+    };
+    
     MP3FileReader(FileSource source,
                   DecodeMode decodeMode,
                   CacheMode cacheMode,
+                  GaplessMode gaplessMode,
                   sv_samplerate_t targetRate = 0,
                   bool normalised = false,
                   ProgressReporter *reporter = 0);
@@ -68,6 +102,7 @@
     QString m_title;
     QString m_maker;
     TagMap m_tags;
+    GaplessMode m_gaplessMode;
     sv_frame_t m_fileSize;
     double m_bitrateNum;
     int m_bitrateDenom;