annotate src/MatchPipeline.h @ 246:aac9ad4064ea subsequence tip

Fix incorrect handling of silent tail in the non-subsequence MATCH phase; some debug output changes
author Chris Cannam
date Fri, 24 Jul 2020 14:29:55 +0100
parents 2f3ecf5d2651
children
rev   line source
Chris@105 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@105 2 /*
Chris@105 3 Vamp feature extraction plugin using the MATCH audio alignment
Chris@105 4 algorithm.
Chris@105 5
Chris@105 6 Centre for Digital Music, Queen Mary, University of London.
Chris@236 7 Copyright (c) 2007-2020 Simon Dixon, Chris Cannam, and Queen Mary
Chris@230 8 University of London, Copyright (c) 2014-2015 Tido GmbH.
Chris@105 9
Chris@105 10 This program is free software; you can redistribute it and/or
Chris@105 11 modify it under the terms of the GNU General Public License as
Chris@105 12 published by the Free Software Foundation; either version 2 of the
Chris@105 13 License, or (at your option) any later version. See the file
Chris@105 14 COPYING included with this distribution for more information.
Chris@105 15 */
Chris@105 16
Chris@105 17 #ifndef MATCH_PIPELINE_H
Chris@105 18 #define MATCH_PIPELINE_H
Chris@105 19
Chris@105 20 #include "Matcher.h"
Chris@105 21 #include "Finder.h"
Chris@105 22 #include "FeatureExtractor.h"
Chris@105 23 #include "FeatureConditioner.h"
Chris@105 24 #include "MatchFeatureFeeder.h"
Chris@105 25
Chris@105 26 class MatchPipeline
Chris@105 27 {
Chris@105 28 public:
Chris@105 29 /**
Chris@105 30 * Pipeline consisting of two Matchers, two FeatureConditioners,
Chris@105 31 * two FeatureExtractors, and a Finder. Features may be inserted
Chris@105 32 * at any point in the pipeline.
Chris@105 33 *
Chris@105 34 * The pipeline goes:
Chris@105 35 * Frequency-domain audio
Chris@105 36 * -> Features
Chris@105 37 * -> Conditioned features
Chris@105 38 * -> Matcher
Chris@160 39 *
Chris@160 40 * Only one set of FeatureExtractor::Parameters is provided; this
Chris@160 41 * contains a single reference frequency, but it's possible the
Chris@160 42 * two input streams may have different tuning frequencies. A
Chris@160 43 * separate frequency for the second input can be provided here as
Chris@160 44 * an optional parameter if needed.
Chris@105 45 */
Chris@105 46 MatchPipeline(FeatureExtractor::Parameters feParams,
Chris@105 47 FeatureConditioner::Parameters fcParams,
Chris@143 48 DistanceMetric::Parameters dParams,
Chris@160 49 Matcher::Parameters matchParams,
Chris@160 50 double secondReferenceFrequency = 0.0);
Chris@105 51
Chris@105 52 ~MatchPipeline();
Chris@105 53
Chris@105 54 /**
Chris@105 55 * Feed in data at the first pipeline stage. The input arrays
Chris@105 56 * represent frames of audio from the two different sources. Each
Chris@105 57 * is provided as a single array of alternating real and imaginary
Chris@105 58 * components.
Chris@105 59 *
Chris@105 60 * Input arrays must have at least 2 * (feParams.fftSize/2 + 1)
Chris@105 61 * elements. The arrays will be passed to FeatureExtractor and
Chris@105 62 * then on into the rest of the pipeline.
Chris@105 63 */
Chris@105 64 void feedFrequencyDomainAudio(const float *arr1, const float *arr2);
Chris@105 65
Chris@105 66 /**
Chris@105 67 * Feed in data at the second pipeline stage. The vectors
Chris@105 68 * represent feature frames from two different sources. They will
Chris@105 69 * be passed in to FeatureConditioner and then on to the rest of
Chris@105 70 * the pipeline.
Chris@105 71 */
Chris@183 72 void feedFeatures(const feature_t &f1, const feature_t &f2);
Chris@106 73
Chris@105 74 /**
Chris@105 75 * Feed in data at the third pipeline stage. The vectors represent
Chris@105 76 * conditioned feature frames from two different sources. They
Chris@105 77 * will be passed to MatchFeatureFeeder for feeding to the two
Chris@105 78 * matchers.
Chris@105 79 */
Chris@183 80 void feedConditionedFeatures(const feature_t &f1, const feature_t &f2);
Chris@105 81
Chris@105 82 /**
Chris@106 83 * If a frame was just fed in at the first or second pipeline
Chris@106 84 * stage, it can be retrieved from the second stage here. That is,
Chris@106 85 * if you provided frequency-domain audio, extractFeatures will
Chris@106 86 * give you back the FeatureExtractor's features.
Chris@106 87 */
Chris@183 88 void extractFeatures(feature_t &f1, feature_t &f2);
Chris@106 89
Chris@106 90 /**
Chris@106 91 * Retrieve the conditioned features from the third pipeline stage.
Chris@106 92 */
Chris@183 93 void extractConditionedFeatures(feature_t &f1, feature_t &f2);
Chris@106 94
Chris@106 95 /**
Chris@105 96 * Indicate that both inputs have come to an end.
Chris@105 97 */
Chris@105 98 void finish();
Chris@105 99
Chris@155 100 /**
Chris@155 101 * Retrieve the final path. Only valid once all the features have
Chris@155 102 * been supplied and finish() has been called.
Chris@155 103 *
Chris@155 104 * See Finder::retrievePath for more details.
Chris@155 105 */
Chris@155 106 int retrievePath(bool smooth, std::vector<int> &pathx, std::vector<int> &pathy);
Chris@155 107
Chris@155 108 /**
Chris@155 109 * Retrieve the forward path resulting from the online search.
Chris@155 110 *
Chris@155 111 * See MatchFeatureFeeder::retrieveForwardPath for more details.
Chris@155 112 */
Chris@155 113 void retrieveForwardPath(std::vector<int> &pathx, std::vector<int> &pathy);
Chris@173 114
Chris@173 115 /**
Chris@173 116 * Get the path cost for the overall path to the end of both
Chris@173 117 * sources.
Chris@173 118 *
Chris@173 119 * See Finder::getOverallCost for more details.
Chris@173 120 */
Chris@173 121 double getOverallCost();
Chris@173 122
Chris@237 123 /**
Chris@237 124 * Return true if the feature's level is above a low threshold,
Chris@237 125 * intended to determine when either of the input streams has
Chris@237 126 * ended (the last frame for a stream is considered to be the last
Chris@237 127 * one that was above the threshold). This is different from the
Chris@237 128 * silence threshold in FeatureConditioner.
Chris@237 129 *
Chris@237 130 * Users of this class do not normally need to call this function
Chris@237 131 * explicitly: it's used internally when processing the
Chris@237 132 * streams. It is exposed here in case other code wants to perform
Chris@237 133 * a similar test in a consistent way.
Chris@237 134 */
Chris@237 135 static bool isAboveEndingThreshold(const feature_t &f);
Chris@237 136
Chris@105 137 private:
Chris@105 138 FeatureExtractor m_fe1;
Chris@105 139 FeatureExtractor m_fe2;
Chris@105 140 FeatureConditioner m_fc1;
Chris@105 141 FeatureConditioner m_fc2;
Chris@105 142 Matcher m_pm1;
Chris@105 143 Matcher m_pm2;
Chris@105 144 MatchFeatureFeeder m_feeder;
Chris@105 145 int m_lastFrameIn1;
Chris@105 146 int m_lastFrameIn2;
Chris@105 147 int m_frameNo;
Chris@183 148 feature_t m_f1;
Chris@183 149 feature_t m_f2;
Chris@183 150 feature_t m_c1;
Chris@183 151 feature_t m_c2;
Chris@183 152 bool aboveThreshold(const feature_t &f);
Chris@166 153 FeatureExtractor::Parameters paramsWithFreq(FeatureExtractor::Parameters,
Chris@166 154 double);
Chris@105 155 };
Chris@105 156
Chris@105 157 #endif