qm-dsp: dsp/tempotracking/DownBeat.h annotate

annotate dsp/tempotracking/DownBeat.h @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Fri, 05 Jun 2009 15:12:39 +0000
parents	befe5aa6b450
children	e5907ae6de17

rev	line source
c@279	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@279	2
c@279	3 /*
c@279	4 QM DSP Library
c@279	5
c@279	6 Centre for Digital Music, Queen Mary, University of London.
c@279	7 This file copyright 2008-2009 Matthew Davies and QMUL.
c@279	8 All rights reserved.
c@279	9 */
c@279	10
c@279	11 #ifndef DOWNBEAT_H
c@279	12 #define DOWNBEAT_H
c@279	13
c@279	14 #include <vector>
c@279	15
c@279	16 #include "dsp/rateconversion/Decimator.h"
c@279	17
c@279	18 using std::vector;
c@279	19
c@289	20 class FFTReal;
c@289	21
c@279	22 /**
c@279	23 * This class takes an input audio signal and a sequence of beat
c@279	24 * locations (calculated e.g. by TempoTrackV2) and estimates which of
c@279	25 * the beat locations are downbeats (first beat of the bar).
c@279	26 *
c@279	27 * The input audio signal is expected to have been downsampled to a
c@279	28 * very low sampling rate (e.g. 2700Hz). A utility function for
c@279	29 * downsampling and buffering incoming block-by-block audio is
c@279	30 * provided.
c@279	31 */
c@279	32 class DownBeat
c@279	33 {
c@279	34 public:
c@279	35 /**
c@279	36 * Construct a downbeat locator that will operate on audio at the
c@279	37 * downsampled by the given decimation factor from the given
c@279	38 * original sample rate, plus beats extracted from the same audio
c@279	39 * at the given original sample rate with the given frame
c@279	40 * increment.
c@279	41 *
c@279	42 * decimationFactor must be a power of two no greater than 64, and
c@279	43 * dfIncrement must be a multiple of decimationFactor.
c@279	44 */
c@279	45 DownBeat(float originalSampleRate,
c@279	46 size_t decimationFactor,
c@279	47 size_t dfIncrement);
c@279	48 ~DownBeat();
c@279	49
c@280	50 void setBeatsPerBar(int bpb);
c@280	51
c@279	52 /**
c@279	53 * Estimate which beats are down-beats.
c@279	54 *
c@279	55 * audio contains the input audio stream after downsampling, and
c@279	56 * audioLength contains the number of samples in this downsampled
c@279	57 * stream.
c@279	58 *
c@279	59 * beats contains a series of beat positions expressed in
c@279	60 * multiples of the df increment at the audio's original sample
c@279	61 * rate, as described to the constructor.
c@279	62 *
c@279	63 * The returned downbeat array contains a series of indices to the
c@279	64 * beats array.
c@279	65 */
c@280	66 void findDownBeats(const float *audio, // downsampled
c@279	67 size_t audioLength, // after downsampling
c@279	68 const vector<double> &beats,
c@279	69 vector<int> &downbeats);
c@281	70
c@281	71 /**
c@281	72 * Return the beat spectral difference function. This is
c@281	73 * calculated during findDownBeats, so this function can only be
c@281	74 * meaningfully called after that has completed. The returned
c@281	75 * vector contains one value for each of the beat times passed in
c@281	76 * to findDownBeats, less one. Each value contains the spectral
c@281	77 * difference between region prior to the beat's nominal position
c@281	78 * and the region following it.
c@281	79 */
c@281	80 void getBeatSD(vector<double> &beatsd) const;
c@279	81
c@279	82 /**
c@279	83 * For your downsampling convenience: call this function
c@279	84 * repeatedly with input audio blocks containing dfIncrement
c@279	85 * samples at the original sample rate, to decimate them to the
c@279	86 * downsampled rate and buffer them within the DownBeat class.
c@279	87 *
c@279	88 * Call getBufferedAudio() to retrieve the results after all
c@279	89 * blocks have been processed.
c@279	90 */
c@280	91 void pushAudioBlock(const float *audio);
c@279	92
c@279	93 /**
c@279	94 * Retrieve the accumulated audio produced by pushAudioBlock calls.
c@279	95 */
c@280	96 const float *getBufferedAudio(size_t &length) const;
c@280	97
c@280	98 /**
c@280	99 * Clear any buffered downsampled audio data.
c@280	100 */
c@280	101 void resetAudioBuffer();
c@279	102
c@279	103 private:
c@279	104 typedef vector<int> i_vec_t;
c@279	105 typedef vector<vector<int> > i_mat_t;
c@279	106 typedef vector<double> d_vec_t;
c@279	107 typedef vector<vector<double> > d_mat_t;
c@279	108
c@279	109 void makeDecimators();
c@279	110 double measureSpecDiff(d_vec_t oldspec, d_vec_t newspec);
c@279	111
c@280	112 int m_bpb;
c@279	113 float m_rate;
c@279	114 size_t m_factor;
c@279	115 size_t m_increment;
c@279	116 Decimator *m_decimator1;
c@279	117 Decimator *m_decimator2;
c@280	118 float *m_buffer;
c@280	119 float *m_decbuf;
c@279	120 size_t m_bufsiz;
c@279	121 size_t m_buffill;
c@279	122 size_t m_beatframesize;
c@279	123 double *m_beatframe;
c@289	124 FFTReal *m_fft;
c@279	125 double *m_fftRealOut;
c@279	126 double *m_fftImagOut;
c@281	127 d_vec_t m_beatsd;
c@279	128 };
c@279	129
c@279	130 #endif

Mercurial > hg > qm-dsp

annotate dsp/tempotracking/DownBeat.h @ 298:255e431ae3d4