Mercurial > hg > svcore
changeset 1311:90ac1df228aa 3.0-integration
Merge from branch mp3-gapless
author | Chris Cannam |
---|---|
date | Tue, 29 Nov 2016 16:45:53 +0000 |
parents | 7cff8367d9b1 (current diff) aa1b1fc2d018 (diff) |
children | 079e553dc16e |
files | |
diffstat | 8 files changed, 277 insertions(+), 88 deletions(-) [+] |
line wrap: on
line diff
--- a/data/fileio/AudioFileReaderFactory.cpp Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/AudioFileReaderFactory.cpp Tue Nov 29 16:45:53 2016 +0000 @@ -165,7 +165,8 @@ #ifdef HAVE_MAD if (!reader && MP3FileReader::supports(source)) { reader = new MP3FileReader - (source, decodeMode, cacheMode, targetRate, normalised, reporter); + (source, decodeMode, cacheMode, MP3FileReader::Gapless, + targetRate, normalised, reporter); CHECK(reader); } #endif @@ -234,7 +235,8 @@ #ifdef HAVE_MAD if (!reader) { reader = new MP3FileReader - (source, decodeMode, cacheMode, targetRate, normalised, reporter); + (source, decodeMode, cacheMode, MP3FileReader::Gapless, + targetRate, normalised, reporter); CHECK(reader); } #endif
--- a/data/fileio/CodedAudioFileReader.cpp Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/CodedAudioFileReader.cpp Tue Nov 29 16:45:53 2016 +0000 @@ -41,13 +41,15 @@ m_cacheFileReader(0), m_cacheWriteBuffer(0), m_cacheWriteBufferIndex(0), - m_cacheWriteBufferSize(16384), + m_cacheWriteBufferSize(65536), m_resampler(0), m_resampleBuffer(0), m_fileFrameCount(0), m_normalised(normalised), m_max(0.f), m_gain(1.f), + m_trimFromStart(0), + m_trimFromEnd(0), m_clippedCount(0), m_firstNonzero(0), m_lastNonzero(0) @@ -94,6 +96,13 @@ } void +CodedAudioFileReader::setFramesToTrim(sv_frame_t fromStart, sv_frame_t fromEnd) +{ + m_trimFromStart = fromStart; + m_trimFromEnd = fromEnd; +} + +void CodedAudioFileReader::startSerialised(QString id) { SVDEBUG << "CodedAudioFileReader(" << this << ")::startSerialised: id = " << id << endl; @@ -118,6 +127,11 @@ SVDEBUG << "CodedAudioFileReader::initialiseDecodeCache: file rate = " << m_fileRate << endl; + if (m_channelCount == 0) { + SVCERR << "CodedAudioFileReader::initialiseDecodeCache: No channel count set!" << endl; + throw std::logic_error("No channel count set"); + } + if (m_fileRate == 0) { SVDEBUG << "CodedAudioFileReader::initialiseDecodeCache: ERROR: File sample rate unknown (bug in subclass implementation?)" << endl; throw FileOperationFailed("(coded file)", "File sample rate unknown (bug in subclass implementation?)"); @@ -212,6 +226,11 @@ m_data.clear(); } + if (m_trimFromEnd >= (m_cacheWriteBufferSize * m_channelCount)) { + SVCERR << "WARNING: CodedAudioFileReader::setSamplesToTrim: Can't handle trimming more frames from end (" << m_trimFromEnd << ") than can be stored in cache-write buffer (" << (m_cacheWriteBufferSize * m_channelCount) << "), won't trim anything from the end after all"; + m_trimFromEnd = 0; + } + m_initialised = true; } @@ -223,25 +242,20 @@ if (!m_initialised) return; for (sv_frame_t i = 0; i < nframes; ++i) { + + if (m_trimFromStart > 0) { + --m_trimFromStart; + continue; + } for (int c = 0; c < m_channelCount; ++c) { float sample = samples[c][i]; - m_cacheWriteBuffer[m_cacheWriteBufferIndex++] = sample; - if (m_cacheWriteBufferIndex == - m_cacheWriteBufferSize * m_channelCount) { + } - pushBuffer(m_cacheWriteBuffer, m_cacheWriteBufferSize, false); - m_cacheWriteBufferIndex = 0; - } - - if (m_cacheWriteBufferIndex % 10240 == 0 && - m_cacheFileReader) { - m_cacheFileReader->updateFrameCount(); - } - } + pushCacheWriteBufferMaybe(false); } } @@ -253,25 +267,20 @@ if (!m_initialised) return; for (sv_frame_t i = 0; i < nframes; ++i) { + + if (m_trimFromStart > 0) { + --m_trimFromStart; + continue; + } for (int c = 0; c < m_channelCount; ++c) { float sample = samples[i * m_channelCount + c]; m_cacheWriteBuffer[m_cacheWriteBufferIndex++] = sample; + } - if (m_cacheWriteBufferIndex == - m_cacheWriteBufferSize * m_channelCount) { - - pushBuffer(m_cacheWriteBuffer, m_cacheWriteBufferSize, false); - m_cacheWriteBufferIndex = 0; - } - - if (m_cacheWriteBufferIndex % 10240 == 0 && - m_cacheFileReader) { - m_cacheFileReader->updateFrameCount(); - } - } + pushCacheWriteBufferMaybe(false); } } @@ -283,20 +292,15 @@ if (!m_initialised) return; for (float sample: samples) { + + if (m_trimFromStart > 0) { + --m_trimFromStart; + continue; + } m_cacheWriteBuffer[m_cacheWriteBufferIndex++] = sample; - if (m_cacheWriteBufferIndex == - m_cacheWriteBufferSize * m_channelCount) { - - pushBuffer(m_cacheWriteBuffer, m_cacheWriteBufferSize, false); - m_cacheWriteBufferIndex = 0; - } - - if (m_cacheWriteBufferIndex % 10240 == 0 && - m_cacheFileReader) { - m_cacheFileReader->updateFrameCount(); - } + pushCacheWriteBufferMaybe(false); } } @@ -312,9 +316,7 @@ return; } - pushBuffer(m_cacheWriteBuffer, - m_cacheWriteBufferIndex / m_channelCount, - true); + pushCacheWriteBufferMaybe(true); delete[] m_cacheWriteBuffer; m_cacheWriteBuffer = 0; @@ -354,6 +356,48 @@ } void +CodedAudioFileReader::pushCacheWriteBufferMaybe(bool final) +{ + if (final || + (m_cacheWriteBufferIndex == + m_cacheWriteBufferSize * m_channelCount)) { + + if (m_trimFromEnd > 0) { + + sv_frame_t framesToPush = + (m_cacheWriteBufferIndex / m_channelCount) - m_trimFromEnd; + + if (framesToPush <= 0 && !final) { + // This won't do, the buffer is full so we have to push + // something. Should have checked for this earlier + throw std::logic_error("Buffer full but nothing to push"); + } + + pushBuffer(m_cacheWriteBuffer, framesToPush, final); + + m_cacheWriteBufferIndex -= framesToPush * m_channelCount; + + for (sv_frame_t i = 0; i < m_cacheWriteBufferIndex; ++i) { + m_cacheWriteBuffer[i] = + m_cacheWriteBuffer[framesToPush * m_channelCount + i]; + } + + } else { + + pushBuffer(m_cacheWriteBuffer, + m_cacheWriteBufferIndex / m_channelCount, + final); + + m_cacheWriteBufferIndex = 0; + } + + if (m_cacheFileReader) { + m_cacheFileReader->updateFrameCount(); + } + } +} + +sv_frame_t CodedAudioFileReader::pushBuffer(float *buffer, sv_frame_t sz, bool final) { m_fileFrameCount += sz; @@ -368,6 +412,8 @@ } else { pushBufferNonResampling(buffer, sz); } + + return sz; } void @@ -376,6 +422,7 @@ float clip = 1.0; sv_frame_t count = sz * m_channelCount; + // statistics for (sv_frame_t j = 0; j < sz; ++j) { for (int c = 0; c < m_channelCount; ++c) { sv_frame_t i = j * m_channelCount + c; @@ -419,16 +466,6 @@ case CacheInMemory: m_dataLock.lock(); - /* - if (m_data.size() < 5120) { - for (int i = 0; i < count && i < 5120; ++i) { - if (i % 8 == 0) cerr << i << ": "; - cerr << buffer[i] << " "; - if (i % 8 == 7) cerr << endl; - } - } - cerr << endl; - */ m_data.insert(m_data.end(), buffer, buffer + count); m_dataLock.unlock(); break; @@ -439,7 +476,7 @@ CodedAudioFileReader::pushBufferResampling(float *buffer, sv_frame_t sz, double ratio, bool final) { - SVDEBUG << "pushBufferResampling: ratio = " << ratio << ", sz = " << sz << ", final = " << final << endl; +// SVDEBUG << "pushBufferResampling: ratio = " << ratio << ", sz = " << sz << ", final = " << final << endl; if (sz > 0) { @@ -462,7 +499,7 @@ sv_frame_t padSamples = padFrames * m_channelCount; - SVDEBUG << "frameCount = " << m_frameCount << ", equivFileFrames = " << double(m_frameCount) / ratio << ", m_fileFrameCount = " << m_fileFrameCount << ", padFrames= " << padFrames << ", padSamples = " << padSamples << endl; + SVDEBUG << "CodedAudioFileReader::pushBufferResampling: frameCount = " << m_frameCount << ", equivFileFrames = " << double(m_frameCount) / ratio << ", m_fileFrameCount = " << m_fileFrameCount << ", padFrames = " << padFrames << ", padSamples = " << padSamples << endl; float *padding = new float[padSamples]; for (sv_frame_t i = 0; i < padSamples; ++i) padding[i] = 0.f;
--- a/data/fileio/CodedAudioFileReader.h Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/CodedAudioFileReader.h Tue Nov 29 16:45:53 2016 +0000 @@ -62,6 +62,9 @@ void initialiseDecodeCache(); // samplerate, channels must have been set + // compensation for encoder delays: + void setFramesToTrim(sv_frame_t fromStart, sv_frame_t fromEnd); + // may throw InsufficientDiscSpace: void addSamplesToDecodeCache(float **samples, sv_frame_t nframes); void addSamplesToDecodeCache(float *samplesInterleaved, sv_frame_t nframes); @@ -76,8 +79,14 @@ void endSerialised(); private: - void pushBuffer(float *interleaved, sv_frame_t sz, bool final); + void pushCacheWriteBufferMaybe(bool final); + + sv_frame_t pushBuffer(float *interleaved, sv_frame_t sz, bool final); + + // to be called only by pushBuffer void pushBufferResampling(float *interleaved, sv_frame_t sz, double ratio, bool final); + + // to be called only by pushBuffer and pushBufferResampling void pushBufferNonResampling(float *interleaved, sv_frame_t sz); protected: @@ -93,7 +102,7 @@ SNDFILE *m_cacheFileWritePtr; WavFileReader *m_cacheFileReader; float *m_cacheWriteBuffer; - sv_frame_t m_cacheWriteBufferIndex; + sv_frame_t m_cacheWriteBufferIndex; // samples sv_frame_t m_cacheWriteBufferSize; // frames Resampler *m_resampler; @@ -104,6 +113,9 @@ float m_max; float m_gain; + sv_frame_t m_trimFromStart; + sv_frame_t m_trimFromEnd; + sv_frame_t m_clippedCount; sv_frame_t m_firstNonzero; sv_frame_t m_lastNonzero;
--- a/data/fileio/MP3FileReader.cpp Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/MP3FileReader.cpp Tue Nov 29 16:45:53 2016 +0000 @@ -39,13 +39,19 @@ #define open _open #endif +using std::string; + +static sv_frame_t DEFAULT_DECODER_DELAY = 529; + MP3FileReader::MP3FileReader(FileSource source, DecodeMode decodeMode, - CacheMode mode, sv_samplerate_t targetRate, + CacheMode mode, GaplessMode gaplessMode, + sv_samplerate_t targetRate, bool normalised, ProgressReporter *reporter) : CodedAudioFileReader(mode, targetRate, normalised), m_source(source), m_path(source.getLocalFilename()), + m_gaplessMode(gaplessMode), m_decodeErrorShown(false), m_decodeThread(0) { @@ -65,6 +71,10 @@ m_done = false; m_reporter = reporter; + if (m_gaplessMode == Gapless) { + CodedAudioFileReader::setFramesToTrim(DEFAULT_DECODER_DELAY, 0); + } + struct stat stat; if (::stat(m_path.toLocal8Bit().data(), &stat) == -1 || stat.st_size == 0) { m_error = QString("File %1 does not exist.").arg(m_path); @@ -307,6 +317,7 @@ data.start = (unsigned char const *)mm; data.length = sz; + data.finished = false; data.reader = this; mad_decoder_init(&decoder, // decoder to initialise @@ -333,7 +344,10 @@ { DecoderData *data = (DecoderData *)dp; - if (!data->length) return MAD_FLOW_STOP; + if (!data->length) { + data->finished = true; + return MAD_FLOW_STOP; + } unsigned char const *start = data->start; sv_frame_t length = data->length; @@ -365,26 +379,92 @@ return data->reader->filter(stream, frame); } +static string toMagic(unsigned long fourcc) +{ + string magic("...."); + for (int i = 0; i < 4; ++i) { + magic[3-i] = char((fourcc >> (8*i)) & 0xff); + } + return magic; +} + enum mad_flow MP3FileReader::filter(struct mad_stream const *stream, struct mad_frame *) { if (m_mp3FrameCount > 0) { + // only handle info frame if it appears as first mp3 frame return MAD_FLOW_CONTINUE; + } + + if (m_gaplessMode == Gappy) { + // Our non-gapless mode does not even filter out the Xing/LAME + // frame. That's because the main reason non-gapless mode + // exists is for backward compatibility with MP3FileReader + // behaviour before the gapless support was added, so we even + // need to keep the spurious 1152 samples resulting from + // feeding Xing/LAME frame to the decoder as otherwise we'd + // have different output from before. + SVDEBUG << "MP3FileReader: Not gapless mode, not checking Xing/LAME frame" + << endl; + return MAD_FLOW_CONTINUE; + } + + struct mad_bitptr ptr = stream->anc_ptr; + string magic = toMagic(mad_bit_read(&ptr, 32)); + + if (magic == "Xing" || magic == "Info") { + + SVDEBUG << "MP3FileReader: Found Xing/LAME metadata frame (magic = \"" + << magic << "\")" << endl; + + // All we want at this point is the LAME encoder delay and + // padding values. We expect to see the Xing/Info magic (which + // we've already read), then 116 bytes of Xing data, then LAME + // magic, 5 byte version string, 12 bytes of LAME data that we + // aren't currently interested in, then the delays encoded as + // two 12-bit numbers into three bytes. + // + // (See gabriel.mp3-tech.org/mp3infotag.html) + + for (int skip = 0; skip < 116; ++skip) { + (void)mad_bit_read(&ptr, 8); + } + + magic = toMagic(mad_bit_read(&ptr, 32)); + + if (magic == "LAME") { + + SVDEBUG << "MP3FileReader: Found LAME-specific metadata" << endl; + + for (int skip = 0; skip < 5 + 12; ++skip) { + (void)mad_bit_read(&ptr, 8); + } + + auto delay = mad_bit_read(&ptr, 12); + auto padding = mad_bit_read(&ptr, 12); + + sv_frame_t delayToDrop = DEFAULT_DECODER_DELAY + delay; + sv_frame_t paddingToDrop = padding - DEFAULT_DECODER_DELAY; + if (paddingToDrop < 0) paddingToDrop = 0; + + SVDEBUG << "MP3FileReader: LAME encoder delay = " << delay + << ", padding = " << padding << endl; + + SVDEBUG << "MP3FileReader: Will be trimming " << delayToDrop + << " samples from start and " << paddingToDrop + << " from end" << endl; + + CodedAudioFileReader::setFramesToTrim(delayToDrop, paddingToDrop); + + } else { + SVDEBUG << "MP3FileReader: Xing frame has no LAME metadata" << endl; + } + + return MAD_FLOW_IGNORE; + } else { - struct mad_bitptr ptr = stream->anc_ptr; - unsigned long fourcc = mad_bit_read(&ptr, 32); - std::string magic("...."); - for (int i = 0; i < 4; ++i) { - magic[3-i] = char((fourcc >> (8*i)) & 0xff); - } - if (magic == "Xing" || magic == "Info" || magic == "LAME") { - SVDEBUG << "MP3FileReader: Discarding metadata frame (magic = \"" - << magic << "\")" << endl; - return MAD_FLOW_IGNORE; - } else { - return MAD_FLOW_CONTINUE; - } + return MAD_FLOW_CONTINUE; } } @@ -493,8 +573,9 @@ DecoderData *data = (DecoderData *)dp; if (stream->error == MAD_ERROR_LOSTSYNC && - data->length == 0) { - // We are at end of file, losing sync is expected behaviour + data->finished) { + // We are at end of file, losing sync is expected behaviour, + // don't report it return MAD_FLOW_CONTINUE; }
--- a/data/fileio/MP3FileReader.h Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/MP3FileReader.h Tue Nov 29 16:45:53 2016 +0000 @@ -32,9 +32,43 @@ Q_OBJECT public: + /** + * How the MP3FileReader should handle leading and trailing gaps. + * See http://lame.sourceforge.net/tech-FAQ.txt for a technical + * explanation of the numbers here. + */ + enum GaplessMode { + /** + * Trim unwanted samples from the start and end of the decoded + * audio. From the start, trim a number of samples equal to + * the decoder delay (a fixed 529 samples) plus any encoder + * delay that may be specified in Xing/LAME metadata. From the + * end, trim any padding specified in Xing/LAME metadata, less + * the fixed decoder delay. This usually results in "gapless" + * audio, i.e. with no spurious zero padding at either end. + */ + Gapless, + + /** + * Do not trim any samples. Also do not suppress any frames + * from being passed to the mp3 decoder, even Xing/LAME + * metadata frames. This will result in the audio being padded + * with zeros at either end: at the start, typically + * 529+576+1152 = 2257 samples for LAME-encoded mp3s; at the + * end an unknown number depending on the fill ratio of the + * final coded frame, but typically less than 1152-529 = 623. + * + * This mode produces the same output as produced by older + * versions of this code before the gapless option was added, + * and is present mostly for backward compatibility. + */ + Gappy + }; + MP3FileReader(FileSource source, DecodeMode decodeMode, CacheMode cacheMode, + GaplessMode gaplessMode, sv_samplerate_t targetRate = 0, bool normalised = false, ProgressReporter *reporter = 0); @@ -68,6 +102,7 @@ QString m_title; QString m_maker; TagMap m_tags; + GaplessMode m_gaplessMode; sv_frame_t m_fileSize; double m_bitrateNum; int m_bitrateDenom; @@ -89,6 +124,7 @@ struct DecoderData { unsigned char const *start; sv_frame_t length; + bool finished; MP3FileReader *reader; };
--- a/data/fileio/test/AudioFileReaderTest.h Tue Nov 29 08:58:50 2016 +0000 +++ b/data/fileio/test/AudioFileReaderTest.h Tue Nov 29 16:45:53 2016 +0000 @@ -112,19 +112,25 @@ // Our limits are pretty relaxed -- we're not testing decoder // or resampler quality here, just whether the results are - // plainly wrong (e.g. at wrong samplerate or with an offset) + // plainly wrong (e.g. at wrong samplerate or with an offset). - double limit = 0.01; - double edgeLimit = limit * 10; // in first or final edgeSize frames + double maxLimit = 0.01; + double meanLimit = 0.001; + double edgeLimit = maxLimit * 10; // in first or final edgeSize frames int edgeSize = 100; if (nominalDepth < 16) { - limit = 0.02; - } - if (extension == "ogg" || extension == "mp3" || - extension == "aac" || extension == "m4a") { - limit = 0.1; - edgeLimit = limit * 3; + maxLimit = 0.02; + meanLimit = 0.02; + } else if (extension == "ogg" || extension == "mp3") { + maxLimit = 0.1; + meanLimit = 0.035; + edgeLimit = maxLimit * 3; + } else if (extension == "aac" || extension == "m4a") { + maxLimit = 0.3; // seems max diff can be quite large here + // even when mean is fairly small + meanLimit = 0.01; + edgeLimit = maxLimit * 3; } // And we ignore completely the last few frames when upsampling @@ -135,7 +141,8 @@ if (extension == "aac" || extension == "m4a") { // our m4a file appears to have a fixed offset of 1024 (at // file sample rate) - offset = int(round((1024 / nominalRate) * readRate)); + // offset = int(round((1024 / nominalRate) * readRate)); + offset = 0; } if (extension == "mp3") { @@ -157,13 +164,15 @@ } for (int c = 0; c < channels; ++c) { + float maxdiff = 0.f; int maxAt = 0; float totdiff = 0.f; + for (int i = 0; i < refFrames; ++i) { int ix = i + offset; if (ix >= read) { - cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward are lost)" << endl; + cerr << "ERROR: audiofile " << audiofile << " reads truncated (read-rate reference frames " << i << " onward, of " << refFrames << ", are lost)" << endl; QVERIFY(ix < read); } if (ix + discard >= read) { @@ -171,7 +180,7 @@ // resampling (discard > 0) continue; } - float diff = fabsf(test[(ix) * channels + c] - + float diff = fabsf(test[ix * channels + c] - reference[i * channels + c]); totdiff += diff; // in edge areas, record this only if it exceeds edgeLimit @@ -187,16 +196,28 @@ } } } + + // check for spurious material at end + for (sv_frame_t i = refFrames; i + offset < read; ++i) { + sv_frame_t ix = i + offset; + float quiet = 1e-6f; + float mag = fabsf(test[ix * channels + c]); + if (mag > quiet) { + cerr << "ERROR: audiofile " << audiofile << " contains spurious data after end of reference (found sample " << test[ix * channels + c] << " at index " << ix << " of channel " << c << ")" << endl; + QVERIFY(mag < quiet); + } + } + float meandiff = totdiff / float(read); // cerr << "meandiff on channel " << c << ": " << meandiff << endl; // cerr << "maxdiff on channel " << c << ": " << maxdiff << " at " << maxAt << endl; - if (meandiff >= limit) { + if (meandiff >= meanLimit) { cerr << "ERROR: for audiofile " << audiofile << ": mean diff = " << meandiff << " for channel " << c << endl; - QVERIFY(meandiff < limit); + QVERIFY(meandiff < meanLimit); } - if (maxdiff >= limit) { + if (maxdiff >= maxLimit) { cerr << "ERROR: for audiofile " << audiofile << ": max diff = " << maxdiff << " at frame " << maxAt << " of " << read << " on channel " << c << " (mean diff = " << meandiff << ")" << endl; - QVERIFY(maxdiff < limit); + QVERIFY(maxdiff < maxLimit); } } }