annotate dsp/tempotracking/DownBeat.h @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 05 Jun 2009 15:12:39 +0000
parents befe5aa6b450
children e5907ae6de17
rev   line source
c@279 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
c@279 2
c@279 3 /*
c@279 4 QM DSP Library
c@279 5
c@279 6 Centre for Digital Music, Queen Mary, University of London.
c@279 7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@279 8 All rights reserved.
c@279 9 */
c@279 10
c@279 11 #ifndef DOWNBEAT_H
c@279 12 #define DOWNBEAT_H
c@279 13
c@279 14 #include <vector>
c@279 15
c@279 16 #include "dsp/rateconversion/Decimator.h"
c@279 17
c@279 18 using std::vector;
c@279 19
c@289 20 class FFTReal;
c@289 21
c@279 22 /**
c@279 23 * This class takes an input audio signal and a sequence of beat
c@279 24 * locations (calculated e.g. by TempoTrackV2) and estimates which of
c@279 25 * the beat locations are downbeats (first beat of the bar).
c@279 26 *
c@279 27 * The input audio signal is expected to have been downsampled to a
c@279 28 * very low sampling rate (e.g. 2700Hz). A utility function for
c@279 29 * downsampling and buffering incoming block-by-block audio is
c@279 30 * provided.
c@279 31 */
c@279 32 class DownBeat
c@279 33 {
c@279 34 public:
c@279 35 /**
c@279 36 * Construct a downbeat locator that will operate on audio at the
c@279 37 * downsampled by the given decimation factor from the given
c@279 38 * original sample rate, plus beats extracted from the same audio
c@279 39 * at the given original sample rate with the given frame
c@279 40 * increment.
c@279 41 *
c@279 42 * decimationFactor must be a power of two no greater than 64, and
c@279 43 * dfIncrement must be a multiple of decimationFactor.
c@279 44 */
c@279 45 DownBeat(float originalSampleRate,
c@279 46 size_t decimationFactor,
c@279 47 size_t dfIncrement);
c@279 48 ~DownBeat();
c@279 49
c@280 50 void setBeatsPerBar(int bpb);
c@280 51
c@279 52 /**
c@279 53 * Estimate which beats are down-beats.
c@279 54 *
c@279 55 * audio contains the input audio stream after downsampling, and
c@279 56 * audioLength contains the number of samples in this downsampled
c@279 57 * stream.
c@279 58 *
c@279 59 * beats contains a series of beat positions expressed in
c@279 60 * multiples of the df increment at the audio's original sample
c@279 61 * rate, as described to the constructor.
c@279 62 *
c@279 63 * The returned downbeat array contains a series of indices to the
c@279 64 * beats array.
c@279 65 */
c@280 66 void findDownBeats(const float *audio, // downsampled
c@279 67 size_t audioLength, // after downsampling
c@279 68 const vector<double> &beats,
c@279 69 vector<int> &downbeats);
c@281 70
c@281 71 /**
c@281 72 * Return the beat spectral difference function. This is
c@281 73 * calculated during findDownBeats, so this function can only be
c@281 74 * meaningfully called after that has completed. The returned
c@281 75 * vector contains one value for each of the beat times passed in
c@281 76 * to findDownBeats, less one. Each value contains the spectral
c@281 77 * difference between region prior to the beat's nominal position
c@281 78 * and the region following it.
c@281 79 */
c@281 80 void getBeatSD(vector<double> &beatsd) const;
c@279 81
c@279 82 /**
c@279 83 * For your downsampling convenience: call this function
c@279 84 * repeatedly with input audio blocks containing dfIncrement
c@279 85 * samples at the original sample rate, to decimate them to the
c@279 86 * downsampled rate and buffer them within the DownBeat class.
c@279 87 *
c@279 88 * Call getBufferedAudio() to retrieve the results after all
c@279 89 * blocks have been processed.
c@279 90 */
c@280 91 void pushAudioBlock(const float *audio);
c@279 92
c@279 93 /**
c@279 94 * Retrieve the accumulated audio produced by pushAudioBlock calls.
c@279 95 */
c@280 96 const float *getBufferedAudio(size_t &length) const;
c@280 97
c@280 98 /**
c@280 99 * Clear any buffered downsampled audio data.
c@280 100 */
c@280 101 void resetAudioBuffer();
c@279 102
c@279 103 private:
c@279 104 typedef vector<int> i_vec_t;
c@279 105 typedef vector<vector<int> > i_mat_t;
c@279 106 typedef vector<double> d_vec_t;
c@279 107 typedef vector<vector<double> > d_mat_t;
c@279 108
c@279 109 void makeDecimators();
c@279 110 double measureSpecDiff(d_vec_t oldspec, d_vec_t newspec);
c@279 111
c@280 112 int m_bpb;
c@279 113 float m_rate;
c@279 114 size_t m_factor;
c@279 115 size_t m_increment;
c@279 116 Decimator *m_decimator1;
c@279 117 Decimator *m_decimator2;
c@280 118 float *m_buffer;
c@280 119 float *m_decbuf;
c@279 120 size_t m_bufsiz;
c@279 121 size_t m_buffill;
c@279 122 size_t m_beatframesize;
c@279 123 double *m_beatframe;
c@289 124 FFTReal *m_fft;
c@279 125 double *m_fftRealOut;
c@279 126 double *m_fftImagOut;
c@281 127 d_vec_t m_beatsd;
c@279 128 };
c@279 129
c@279 130 #endif