annotate xtract/xtract_vector.h @ 114:f5040ed4e555

Added new extraction function: xtract_subbands()
author Jamie Bullock <jamie@postlude.co.uk>
date Fri, 15 Feb 2008 15:49:49 +0000
parents 72a9a393d5bd
children 859495925633
rev   line source
jamie@1 1 /* libxtract feature extraction library
jamie@1 2 *
jamie@1 3 * Copyright (C) 2006 Jamie Bullock
jamie@1 4 *
jamie@1 5 * This program is free software; you can redistribute it and/or modify
jamie@1 6 * it under the terms of the GNU General Public License as published by
jamie@1 7 * the Free Software Foundation; either version 2 of the License, or
jamie@1 8 * (at your option) any later version.
jamie@1 9 *
jamie@1 10 * This program is distributed in the hope that it will be useful,
jamie@1 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
jamie@1 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
jamie@1 13 * GNU General Public License for more details.
jamie@1 14 *
jamie@1 15 * You should have received a copy of the GNU General Public License
jamie@1 16 * along with this program; if not, write to the Free Software
jamie@1 17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
jamie@1 18 * USA.
jamie@1 19 */
jamie@1 20
jamie@1 21 /* xtract_scalar.h: declares functions that extract a feature as a vector from an input vector */
jamie@1 22
jamie@56 23 #ifndef XTRACT_VECTOR_H
jamie@56 24 #define XTRACT_VECTOR_H
jamie@1 25
jamie@1 26 #ifdef __cplusplus
jamie@1 27 extern "C" {
jamie@1 28 #endif
jamie@20 29
jamie@20 30 /**
jamie@83 31 * \defgroup vector vector extraction functions
jamie@20 32 *
jamie@83 33 * Defines vector extraction functions, and their parameters.
jamie@20 34 * @{
jamie@20 35 */
jamie@1 36
jamie@105 37 /** \brief Extract frequency domain spectrum from time domain signal
jamie@2 38 *
jamie@2 39 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@2 40 * \param N: the number of array elements to be considered
jamie@105 41 * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second will be cast to an integer and determines the spectrum type (e.g. XTRACT_MAGNITUDE_SPECTRUM, XTRACT_LOG_POWER_SPECTRUM). The third argument determines whether or not the DC component is included in the output. If argv[2] == 1, then the DC component is included in which case the size of the array pointed to by *result must be N+2. For any further use of the array pointed to by *result, the value of N must reflect the (larger) array size. The fourth argument determines whether the magnitude/power coefficients are to be normalised. If argv[3] == 1, then the coefficients are normalised.
jamie@105 42 * \param *result: a pointer to an array of size N containing N/2 magnitude/power/log magnitude/log power coefficients and N/2 bin frequencies.
jamie@105 43 *
jamie@105 44 * The magnitude/power coefficients are scaled to the range 0-1 so that for a given coefficient x, 0 <= x <= 1
jamie@105 45 *
jamie@113 46 * \note Before calling xtract_spectrum(), the FFT must be initialised by calling xtract_init_fft(N, XTRACT_SPECTRUM)
jamie@113 47 *
jamie@2 48 */
jamie@54 49 int xtract_spectrum(const float *data, const int N, const void *argv, float *result);
jamie@1 50
jamie@30 51 /** \brief Extract autocorrelation from time domain signal using FFT based method
jamie@30 52 *
jamie@30 53 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@30 54 * \param N: the number of array elements to be considered
jamie@30 55 * \param *argv: a pointer to NULL
jamie@30 56 * \param *result: the autocorrelation of N values from the array pointed to by *data
jamie@30 57 */
jamie@43 58 int xtract_autocorrelation_fft(const float *data, const int N, const void *argv, float *result);
jamie@30 59
jamie@30 60 /** \brief Extract Mel Frequency Cepstral Coefficients based on a method described by Rabiner
jamie@30 61 *
jamie@72 62 * \param *data: a pointer to the first element in an array of spectral magnitudes, e.g. the first half of the array pointed to by *resul from xtract_spectrum()
jamie@30 63 * \param N: the number of array elements to be considered
jamie@30 64 * \param *argv: a pointer to a data structure of type xtract_mel_filter, containing n_filters coefficient tables to make up a mel-spaced filterbank
jamie@30 65 * \param *result: a pointer to an array containing the resultant MFCC
jamie@30 66 *
jamie@30 67 * The data structure pointed to by *argv must be obtained by first calling xtract_init_mfcc
jamie@30 68 */
jamie@43 69 int xtract_mfcc(const float *data, const int N, const void *argv, float *result);
jamie@30 70
jamie@30 71 /** \brief Extract the Discrete Cosine transform of a time domain signal
jamie@30 72 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@30 73 * \param N: the number of array elements to be considered
jamie@30 74 * \param *argv: a pointer to NULL
jamie@30 75 * \param *result: a pointer to an array containing resultant dct coefficients
jamie@30 76 */
jamie@43 77 int xtract_dct(const float *data, const int N, const void *argv, float *result);
jamie@1 78
jamie@2 79 /** \brief Extract autocorrelation from time domain signal using time-domain autocorrelation technique
jamie@2 80 *
jamie@2 81 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@2 82 * \param N: the number of array elements to be considered
jamie@2 83 * \param *argv: a pointer to NULL
jamie@2 84 * \param *result: the autocorrelation of N values from the array pointed to by *data
jamie@2 85 */
jamie@43 86 int xtract_autocorrelation(const float *data, const int N, const void *argv, float *result);
jamie@1 87
jamie@2 88 /** \brief Extract Average Magnitude Difference Function from time domain signal
jamie@2 89 *
jamie@47 90 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@2 91 * \param N: the number of array elements to be considered
jamie@2 92 * \param *argv: a pointer to NULL
jamie@2 93 * \param *result: the AMDF of N values from the array pointed to by *data
jamie@2 94 */
jamie@43 95 int xtract_amdf(const float *data, const int N, const void *argv, float *result);
jamie@1 96
jamie@2 97 /** \brief Extract Average Squared Difference Function from time domain signal
jamie@2 98 *
jamie@2 99 * \param *data: a pointer to the first element in an array of floats representing an audio vector
jamie@2 100 * \param N: the number of array elements to be considered
jamie@2 101 * \param *argv: a pointer to NULL
jamie@2 102 * \param *result: the ASDF of N values from the array pointed to by *data
jamie@2 103 */
jamie@43 104 int xtract_asdf(const float *data, const int N, const void *argv, float *result);
jamie@1 105
jamie@2 106 /** \brief Extract Bark band coefficients based on a method
jamie@54 107 * \param *data: a pointer to the first element in an array of floats representing the magnitude coefficients from the magnitude spectrum of an audio vector, (e.g. the first half of the array pointed to by *result from xtract_spectrum().
jamie@2 108 * \param N: the number of array elements to be considered
jamie@42 109 * \param *argv: a pointer to an array of ints representing the limits of each bark band. This can be obtained by calling xtract_init_bark.
jamie@2 110 * \param *result: a pointer to an array containing resultant bark coefficients
jamie@2 111 *
jamie@2 112 * The limits array pointed to by *argv must be obtained by first calling xtract_init_bark
jamie@2 113 *
jamie@2 114 */
jamie@43 115 int xtract_bark_coefficients(const float *data, const int N, const void *argv, float *result);
jamie@1 116
jamie@52 117 /** \brief Extract the amplitude and frequency of spectral peaks from a magnitude spectrum
jamie@59 118 * \param *data: a pointer to an array of size N containing N magnitude/power/log magnitude/log power coefficients. (e.g. the first half of the array pointed to by *result from xtract_spectrum().
jamie@59 119 * \param N: the size of the input array (note: it is assumed that enough memory has been allocated for an output array twice the size)
jamie@55 120 * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second representing the peak threshold as percentage of the magnitude of the maximum peak found
jamie@59 121 * \param *result: a pointer to an array of size N * 2 containing N magnitude/power/log magnitude/log power coefficients and N bin frequencies.
jamie@45 122 *
jamie@2 123 */
jamie@52 124 int xtract_peak_spectrum(const float *data, const int N, const void *argv, float *result);
jamie@20 125
jamie@38 126 /** \brief Extract the harmonic spectrum of from a of a peak spectrum
jamie@52 127 * \param *data: a pointer to the first element in an array of floats representing the peak spectrum of an audio vector (e.g. *result from xtract_peaks). It is expected that the first half of the array pointed to by *data will contain amplitudes for each peak considered, and the the second half will contain the respective frequencies
jamie@38 128 * \param N: the size of the array pointed to by *data
jamie@38 129 * \param *argv: a pointer to an array containing the fundamental (f0) of the spectrum, and a threshold (t) where 0<=t<=1.0, and t determines the distance from the nearest harmonic number within which a partial can be considered harmonic.
jamie@52 130 * \param *result: a pointer to an array of size N containing N/2 magnitude coefficients and N/2 bin frequencies.
jamie@38 131 */
jamie@52 132 int xtract_harmonic_spectrum(const float *data, const int N, const void *argv, float *result);
jamie@38 133
jamie@104 134 /** \brief Extract Linear Predictive Coding Coefficients
jamie@104 135 *
jamie@104 136 * Based on algorithm in Rabiner and Juang as implemented by Jutta Degener in Dr. Dobb's Journal December, 1994.
jamie@104 137 *
jamie@104 138 * Returns N-1 reflection (PARCOR) coefficients and N-1 LPC coefficients via *result
jamie@104 139 *
jamie@104 140 * \param *data: N autocorrelation values e.g the data pointed to by *result from xtract_autocorrelation()
jamie@104 141 * \param N: the number of autocorrelation values to be considered
jamie@104 142 * \param *argv: a pointer to NULL
jamie@104 143 * \param *result: a pointer to an array containing N-1 reflection coefficients and N-1 LPC coefficients.
jamie@104 144 *
jamie@104 145 * An array of size 2 * (N - 1) must be allocated, and *result must point to its first element.
jamie@104 146 */
jamie@104 147 int xtract_lpc(const float *data, const int N, const void *argv, float *result);
jamie@104 148
jamie@104 149 /** \brief Extract Linear Predictive Coding Cepstral Coefficients
jamie@104 150 *
jamie@104 151 * \param *data: a pointer to the first element in an array of LPC coeffiecients e.g. a pointer to the second half of the array pointed to by *result from xtract_lpc()
jamie@104 152 * \param N: the number of LPC coefficients to be considered
jamie@104 153 * \param *argv: a pointer to a float representing the order of the result vector. This must be a whole number. According to Rabiner and Juang the ratio between the number (p) of LPC coefficients and the order (Q) of the LPC cepstrum is given by Q ~ (3/2)p where Q > p.
jamie@104 154 * \param *result: a pointer to an array containing the resultant LPCC.
jamie@104 155 *
jamie@104 156 * An array of size Q, where Q is given by argv[0] must be allocated, and *result must point to its first element.
jamie@104 157 *
jamie@104 158 */
jamie@104 159 int xtract_lpcc(const float *data, const int N, const void *argv, float *result);
jamie@104 160
jamie@114 161 /** \brief Extract subbands from a spectrum
jamie@114 162 *
jamie@114 163 * \param *data: a pointer to an array of size N containing N magnitude/power/log magnitude/log power coefficients. (e.g. the first half of the array pointed to by *result from xtract_spectrum().
jamie@114 164 * \param N: the number of elements from the array pointed to by *data to be considered
jamie@114 165 * \param *argv: A pointer to an array containing four integers. The first represents the extraction function to applied to each subband e.g. XTRACT_SUM or XTRACT_MEAN, the second represents the number of subbands required, and the third represents the frequency scale to be used for the subband bounds as defined in the enumeration xtract_subband_scales_ (libxtract.h). The fourth integer represent the start point of the subbands as a location in the input array as pointed to by *data (e.g. a value of 5 would start the subband extraction at bin 5)
jamie@114 166 * \param *result: A pointer to an array containing the resultant subband values. The calling function is responsible for allocating and freeing memory for *result. xtract_subbands() assumes that at least argv[1] * sizeof(float) bytes have been allocated. If the requested nbands extends the subband range beyond N, then the remaining bands will be set to 0. If the array pointed to by *result has more than argv[1] elements, the superfluous elements will be unchanged.
jamie@114 167 *
jamie@114 168 * xtract_subbands() divides a spectrum into subbands and applies the function given by argv[0] to the values in each subband to give a 'reduced' representation of the spectrum as *result
jamie@114 169 *
jamie@114 170 * Specifying XTRACT_OCTAVE_SUBBANDS will extract subbands at each octave from the start bin until argv[1] is reached or N is reached
jamie@114 171 * Specifying XTRACT_LINEAR_SUBBANDS will extract argv[1] equal sized subbands between the start bin and N
jamie@114 172 *
jamie@114 173 *
jamie@114 174 * It is assumed that a sensible function will be given in argv[0], and for this function argv will always be NULL. Sensible values for argv[0] are XTRACT_MEAN and XTRACT_SUM, although something like XTRACT_IRREGULARITY_K might yield interesting results.
jamie@114 175 *
jamie@114 176 */
jamie@114 177 int xtract_subbands(const float *data, const int N, const void *argv, float *result);
jamie@20 178 /** @} */
jamie@20 179
jamie@1 180 #ifdef __cplusplus
jamie@1 181 }
jamie@1 182 #endif
jamie@1 183
jamie@1 184 #endif