# HG changeset patch # User Chris Cannam # Date 1223583028 0 # Node ID 3e0f1f7bec850def607e051089243dfa87a75b05 # Parent ba7aaacb7211663780b9409a3a0c8088adf4e484 * Fix a nasty and long-standing race condition in MatrixFile's use of FileReadThread that was causing crashes sometimes diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fft/FFTDataServer.cpp --- a/data/fft/FFTDataServer.cpp Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fft/FFTDataServer.cpp Thu Oct 09 20:10:28 2008 +0000 @@ -27,7 +27,7 @@ #include "base/Profiler.h" #include "base/Thread.h" // for debug mutex locker -#define DEBUG_FFT_SERVER 1 +//#define DEBUG_FFT_SERVER 1 //#define DEBUG_FFT_SERVER_FILL 1 #ifdef DEBUG_FFT_SERVER_FILL diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fft/FFTFileCache.cpp --- a/data/fft/FFTFileCache.cpp Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fft/FFTFileCache.cpp Thu Oct 09 20:10:28 2008 +0000 @@ -19,6 +19,7 @@ #include "base/Profiler.h" #include "base/Thread.h" +#include "base/Exceptions.h" #include @@ -308,13 +309,19 @@ if (!m_readbuf) { m_readbuf = new char[m_mfc->getHeight() * 2 * m_mfc->getCellSize()]; } - m_mfc->getColumnAt(x, m_readbuf); - if (m_mfc->haveSetColumnAt(x + 1)) { - m_mfc->getColumnAt - (x + 1, m_readbuf + m_mfc->getCellSize() * m_mfc->getHeight()); - m_readbufWidth = 2; - } else { - m_readbufWidth = 1; + try { + m_mfc->getColumnAt(x, m_readbuf); + if (m_mfc->haveSetColumnAt(x + 1)) { + m_mfc->getColumnAt + (x + 1, m_readbuf + m_mfc->getCellSize() * m_mfc->getHeight()); + m_readbufWidth = 2; + } else { + m_readbufWidth = 1; + } + } catch (FileReadFailed f) { + std::cerr << "ERROR: FFTFileCache::populateReadBuf: File read failed: " + << f.what() << std::endl; + memset(m_readbuf, 0, m_mfc->getHeight() * 2 * m_mfc->getCellSize()); } m_readbufCol = x; } diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fileio/FileReadThread.cpp --- a/data/fileio/FileReadThread.cpp Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fileio/FileReadThread.cpp Thu Oct 09 20:10:28 2008 +0000 @@ -21,7 +21,7 @@ #include #include -#define DEBUG_FILE_READ_THREAD 1 +//#define DEBUG_FILE_READ_THREAD 1 FileReadThread::FileReadThread() : m_nextToken(0), @@ -141,6 +141,24 @@ } bool +FileReadThread::haveRequest(int token) +{ + MutexLocker locker(&m_mutex, "FileReadThread::haveRequest::m_mutex"); + + bool found = false; + + if (m_queue.find(token) != m_queue.end()) { + found = true; + } else if (m_cancelledRequests.find(token) != m_cancelledRequests.end()) { + found = true; + } else if (m_readyRequests.find(token) != m_readyRequests.end()) { + found = true; + } + + return found; +} + +bool FileReadThread::getRequest(int token, Request &request) { MutexLocker locker(&m_mutex, "FileReadThread::getRequest::m_mutex"); @@ -244,6 +262,9 @@ } else { if (r < 0) { ::perror("ERROR: FileReadThread::process: Read failed"); + std::cerr << "ERROR: FileReadThread::process: read of " + << request.size << " at " + << request.start << " failed" << std::endl; request.size = 0; } else if (r < ssize_t(request.size)) { std::cerr << "WARNING: FileReadThread::process: read " @@ -267,11 +288,15 @@ m_queue.erase(token); m_readyRequests[token] = request; #ifdef DEBUG_FILE_READ_THREAD - std::cerr << "FileReadThread::process: done, marking as ready" << std::endl; + std::cerr << "FileReadThread::process: done, marking as ready (success = " << m_readyRequests[token].successful << ")" << std::endl; #endif } else { #ifdef DEBUG_FILE_READ_THREAD - std::cerr << "FileReadThread::process: request disappeared or exiting" << std::endl; + if (m_exiting) { + std::cerr << "FileReadThread::process: exiting" << std::endl; + } else { + std::cerr << "FileReadThread::process: request disappeared" << std::endl; + } #endif } } diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fileio/FileReadThread.h --- a/data/fileio/FileReadThread.h Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fileio/FileReadThread.h Thu Oct 09 20:10:28 2008 +0000 @@ -50,6 +50,7 @@ virtual bool isReady(int token); virtual bool isCancelled(int token); // and safe to delete + virtual bool haveRequest(int token); virtual bool getRequest(int token, Request &request); virtual void done(int token); diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fileio/MatrixFile.cpp --- a/data/fileio/MatrixFile.cpp Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fileio/MatrixFile.cpp Thu Oct 09 20:10:28 2008 +0000 @@ -35,7 +35,7 @@ #include #include -#define DEBUG_MATRIX_FILE 1 +//#define DEBUG_MATRIX_FILE 1 //#define DEBUG_MATRIX_FILE_READ_SET 1 #ifdef DEBUG_MATRIX_FILE_READ_SET @@ -521,8 +521,6 @@ #endif } -static int alloc = 0; - void MatrixFile::primeCache(size_t x, bool goingLeft) { @@ -565,16 +563,30 @@ MutexLocker locker(&m_cacheMutex, "MatrixFile::primeCache::m_cacheMutex"); - FileReadThread::Request request; + // Check for the existence of the request first; if it exists, + // check whether it's ready. Only when we know it's ready do we + // retrieve the actual request, because the reason we need the + // request is to check whether it was successful or not and + // extract data from it, and none of that can be relied upon if we + // retrieve the request before it's ready. (There used to be a + // race condition here, where we retrieved the request and only + // afterwards checked the ready status, pulling data from the + // request if it was found to be ready then.) if (m_requestToken >= 0 && - m_readThread->getRequest(m_requestToken, request)) { + m_readThread->haveRequest(m_requestToken)) { if (x >= m_requestingX && x < m_requestingX + m_requestingWidth) { if (m_readThread->isReady(m_requestToken)) { + FileReadThread::Request request; + if (!m_readThread->getRequest(m_requestToken, request)) { + std::cerr << "ERROR: MatrixFile::primeCache: File read thread has lost our request!" << std::endl; + throw FileReadFailed(m_fileName); + } + if (!request.successful) { std::cerr << "ERROR: MatrixFile::primeCache: Last request was unsuccessful" << std::endl; throw FileReadFailed(m_fileName); @@ -610,25 +622,32 @@ return; } - // the current request is no longer of any use - m_readThread->cancel(m_requestToken); + FileReadThread::Request dud; - // crude way to avoid leaking the data - while (!m_readThread->isCancelled(m_requestToken)) { - usleep(10000); - } + if (!m_readThread->getRequest(m_requestToken, dud)) { + + std::cerr << "ERROR: MatrixFile::primeCache: Inconsistent replies from FileReadThread" << std::endl; + + } else { + + // current request is for the wrong area, so no longer of any use + m_readThread->cancel(m_requestToken); + + // crude way to avoid leaking the data + while (!m_readThread->isCancelled(m_requestToken)) { + usleep(10000); + } #ifdef DEBUG_MATRIX_FILE_READ_SET - std::cerr << "cancelled " << m_requestToken << std::endl; + std::cerr << "cancelled " << m_requestToken << std::endl; #endif - if (m_spareData) { -// std::cerr << this << ": Freeing spare data" << std::endl; - free(m_spareData); + if (m_spareData) { + free(m_spareData); + } + m_spareData = dud.data; + m_readThread->done(m_requestToken); } -// std::cerr << this << ": Moving request data to spare" << std::endl; - m_spareData = request.data; - m_readThread->done(m_requestToken); m_requestToken = -1; } @@ -638,6 +657,8 @@ if (m_fd < 0) resume(); } + FileReadThread::Request request; + request.fd = m_fd; request.mutex = &m_fdMutex; request.start = m_headerSize + rx * m_height * m_cellSize; diff -r ba7aaacb7211 -r 3e0f1f7bec85 data/fileio/MatrixFile.h --- a/data/fileio/MatrixFile.h Thu Oct 09 13:13:33 2008 +0000 +++ b/data/fileio/MatrixFile.h Thu Oct 09 20:10:28 2008 +0000 @@ -68,7 +68,7 @@ void reset(); bool haveSetColumnAt(size_t x) const { return m_columnBitset->get(x); } - void getColumnAt(size_t x, void *data); + void getColumnAt(size_t x, void *data); // may throw FileReadFailed void setColumnAt(size_t x, const void *data); void suspend();