Mercurial > hg > libxtract
changeset 105:f2af1c75e3ed
- Added extra argument to xtract_spectrum to give the option of normalising the magnitude/power coeffificients
- Removed duplicate code block (argc assignment) from descriptors.c
- Added some extra documentation to libxtract.h
author | Jamie Bullock <jamie@postlude.co.uk> |
---|---|
date | Thu, 27 Dec 2007 17:51:07 +0000 |
parents | a32738e9d955 |
children | 3693573a07fa |
files | src/delta.c src/descriptors.c src/vector.c xtract/libxtract.h xtract/xtract_vector.h |
diffstat | 5 files changed, 69 insertions(+), 90 deletions(-) [+] |
line wrap: on
line diff
--- a/src/delta.c Mon Dec 24 13:21:13 2007 +0000 +++ b/src/delta.c Thu Dec 27 17:51:07 2007 +0000 @@ -24,6 +24,7 @@ int xtract_flux(const float *data, const int N, const void *argv , float *result){ + return XTRACT_FEATURE_NOT_IMPLEMENTED; }
--- a/src/descriptors.c Mon Dec 24 13:21:13 2007 +0000 +++ b/src/descriptors.c Thu Dec 27 17:51:07 2007 +0000 @@ -48,79 +48,6 @@ argc = &d->argc; argv_type = &d->argv.type; - switch(f){ - - case XTRACT_VARIANCE: - case XTRACT_STANDARD_DEVIATION: - case XTRACT_AVERAGE_DEVIATION: - case XTRACT_SPECTRAL_VARIANCE: - case XTRACT_SPECTRAL_STANDARD_DEVIATION: - case XTRACT_SPECTRAL_AVERAGE_DEVIATION: - case XTRACT_SPECTRAL_INHARMONICITY: - case XTRACT_LOWEST_VALUE: - case XTRACT_F0: - case XTRACT_FAILSAFE_F0: - case XTRACT_TONALITY: - *argc = 1; - *argv_type = XTRACT_FLOAT; - break; - case XTRACT_SKEWNESS: - case XTRACT_KURTOSIS: - case XTRACT_SPECTRAL_SKEWNESS: - case XTRACT_SPECTRAL_KURTOSIS: - case XTRACT_SPECTRUM: - case XTRACT_PEAK_SPECTRUM: - case XTRACT_HARMONIC_SPECTRUM: - case XTRACT_NOISINESS: - case XTRACT_CREST: - case XTRACT_ROLLOFF: - *argc = 2; - *argv_type = XTRACT_FLOAT; - break; - case XTRACT_MFCC: - *argc = 1; - *argv_type = XTRACT_MEL_FILTER; - break; - case XTRACT_BARK_COEFFICIENTS: - *argc = XTRACT_BARK_BANDS; - *argv_type = XTRACT_INT; - break; - case XTRACT_MEAN: - case XTRACT_SPECTRAL_MEAN: - case XTRACT_SPECTRAL_CENTROID: - case XTRACT_IRREGULARITY_K: - case XTRACT_IRREGULARITY_J: - case XTRACT_TRISTIMULUS_1: - case XTRACT_TRISTIMULUS_2: - case XTRACT_TRISTIMULUS_3: - case XTRACT_SMOOTHNESS: - case XTRACT_FLATNESS: - case XTRACT_SPREAD: - case XTRACT_ZCR: - case XTRACT_LOUDNESS: - case XTRACT_HIGHEST_VALUE: - case XTRACT_SUM: - case XTRACT_RMS_AMPLITUDE: - case XTRACT_POWER: - case XTRACT_SHARPNESS: - case XTRACT_SPECTRAL_SLOPE: - case XTRACT_HPS: - case XTRACT_FLUX: - case XTRACT_ATTACK_TIME: - case XTRACT_DECAY_TIME: - case XTRACT_DELTA_FEATURE: - case XTRACT_AUTOCORRELATION_FFT: - case XTRACT_DCT: - case XTRACT_AUTOCORRELATION: - case XTRACT_AMDF: - case XTRACT_ASDF: - case XTRACT_NONZERO_COUNT: - case XTRACT_ODD_EVEN_RATIO: - default: - *argc = 0; - break; - } - argv_min = &d->argv.min[0]; argv_max = &d->argv.max[0]; argv_def = &d->argv.def[0]; @@ -169,14 +96,22 @@ *(argv_unit + 1) = XTRACT_PERCENT; break; case XTRACT_SPECTRUM: - *argv_min = XTRACT_SR_LOWER_LIMIT / 2; - *argv_max = XTRACT_SR_UPPER_LIMIT / 2; - *argv_def = XTRACT_SR_DEFAULT / 2; + *argv_min = XTRACT_SR_LOWER_LIMIT / XTRACT_FFT_BANDS_MIN; + *argv_max = XTRACT_SR_UPPER_LIMIT / XTRACT_FFT_BANDS_MAX; + *argv_def = XTRACT_SR_DEFAULT / XTRACT_FFT_BANDS_DEF; *argv_unit = XTRACT_HERTZ; *(argv_min + 1) = 0; *(argv_max + 1) = 3 ; *(argv_def + 1) = 0; *(argv_unit + 1) = XTRACT_NONE; + *(argv_min + 2) = 0; + *(argv_max + 2) = 1; + *(argv_def + 2) = 0; + *(argv_unit + 2) = XTRACT_NONE; + *(argv_min + 3) = 0; + *(argv_max + 3) = 1; + *(argv_def + 3) = 0; + *(argv_unit + 3) = XTRACT_NONE; break; case XTRACT_PEAK_SPECTRUM: *argv_min = XTRACT_SR_LOWER_LIMIT / 2; @@ -260,7 +195,6 @@ *argv_donor = XTRACT_INIT_MFCC; break; /* argc = 2 */; - case XTRACT_SPECTRUM: case XTRACT_ROLLOFF: case XTRACT_PEAK_SPECTRUM: *argv_donor = XTRACT_ANY; @@ -288,7 +222,14 @@ *argv_donor = XTRACT_HIGHEST_VALUE; *(argv_donor + 1) = XTRACT_MEAN; break; - /* argc = BARK_BANDS */ + /* argc = 4 */ + case XTRACT_SPECTRUM: + *argv_donor = XTRACT_ANY; + *(argv_donor + 1) = XTRACT_ANY; + *(argv_donor + 2) = XTRACT_ANY; + *(argv_donor + 3) = XTRACT_ANY; + break; + /* BARK_BANDS */ case XTRACT_BARK_COEFFICIENTS: *argv_donor = XTRACT_INIT_BARK; break; @@ -951,7 +892,6 @@ case XTRACT_KURTOSIS: case XTRACT_SPECTRAL_SKEWNESS: case XTRACT_SPECTRAL_KURTOSIS: - case XTRACT_SPECTRUM: case XTRACT_PEAK_SPECTRUM: case XTRACT_HARMONIC_SPECTRUM: case XTRACT_NOISINESS: @@ -960,6 +900,10 @@ *argc = 2; *argv_type = XTRACT_FLOAT; break; + case XTRACT_SPECTRUM: + *argc = 4; + *argv_type = XTRACT_FLOAT; + break; case XTRACT_MFCC: *argc = 1; *argv_type = XTRACT_MEL_FILTER;
--- a/src/vector.c Mon Dec 24 13:21:13 2007 +0000 +++ b/src/vector.c Thu Dec 27 17:51:07 2007 +0000 @@ -45,12 +45,17 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *result){ - float *input, *rfft, q, temp; + float *input, *rfft, q, temp, max; size_t bytes; - int n , NxN, M, vector, withDC, argc; - //fftwf_plan plan; + int n, + NxN, + M, + vector, + withDC, + argc, + normalise; - vector = argc = withDC = 0; + vector = argc = withDC = normalise = 0; M = N >> 1; NxN = XTRACT_SQ(N); @@ -62,13 +67,14 @@ q = *(float *)argv; vector = (int)*((float *)argv+1); withDC = (int)*((float *)argv+2); + normalise = (int)*((float *)argv+3); + + temp = 0.f; + max = 0.f; XTRACT_CHECK_q; if(fft_plans.spectrum_plan == NULL){ - /* FIX: Not sure this should really be here. Might introduce - * DEBUG_POST macro, or some kind of error handler, or leave it to the - * caller... */ fprintf(stderr, "libxtract: Error: xtract_spectrum() has uninitialised plan\n"); return XTRACT_NO_RESULT; @@ -98,6 +104,7 @@ XTRACT_DB_SCALE_OFFSET; result[M + n - 1] = n * q; } + max = result[n] > max ? result[n] : max; } break; @@ -113,6 +120,7 @@ (XTRACT_SQ(rfft[n]) + XTRACT_SQ(rfft[N - n])) / NxN; result[M + n - 1] = n * q; } + max = result[n] > max ? result[n] : max; } break; @@ -133,6 +141,7 @@ XTRACT_DB_SCALE_OFFSET; result[M + n - 1] = n * q; } + max = result[n] > max ? result[n] : max; } break; @@ -149,6 +158,7 @@ XTRACT_SQ(rfft[N - n])) / N; result[M + n - 1] = n * q; } + max = result[n] > max ? result[n] : max; } break; } @@ -157,16 +167,26 @@ /* The DC component */ result[0] = XTRACT_SQ(rfft[0]); result[M + 1] = 0.f; + max = result[0] > max ? result[0] : max; /* The Nyquist */ result[M] = XTRACT_SQ(rfft[M]); result[N + 1] = q * M; + max = result[M] > max ? result[M] : max; + M++; /* So we normalise the Nyquist (below) */ } else { /* The Nyquist */ result[M - 1] = (float)XTRACT_SQ(rfft[M]); result[N - 1] = q * M; + max = result[M - 1] > max ? result[M - 1] : max; } + + if(normalise){ + for(n = 0; n < M; n++) + result[n] /= max; + } + fftwf_free(rfft); free(input);
--- a/xtract/libxtract.h Mon Dec 24 13:21:13 2007 +0000 +++ b/xtract/libxtract.h Thu Dec 27 17:51:07 2007 +0000 @@ -25,8 +25,19 @@ * * This philosophy of 'cascading' features is followed throughout the library, for example with features that operate on the magnitude spectrum of a signal vector (e.g. 'irregularity'), the magnitude spectrum is not calculated 'inside' the respective function, instead, a pointer to the first element in an array containing the magnitude spectrum is passed in as an argument. * - * Hopefully this not only makes the library more efficient when computing large numbers of features, but also makes it more flexible because extraction functions can be combined arbitrarily (one can take the irregularility of the Mel Frequency Cepstral Coefficients for example). + * Hopefully this not only makes the library more efficient when computing large numbers of features, but also makes it more flexible because extraction functions can be combined arbitrarily (one can take the irregularility of the Mel Frequency Cepstral Coefficients for example). * + * All feature extraction functions follow the same prototype: + * +int xtract_function_name(const float *data, const int N, const void *argv, float *result){ + * + * \param const float *data points to an array of floats representing the input data + * \param const int N represents the number of elementes from *data to be considered in the calculation + * \param const void *argv represents an arbitrary list of arguments. Used to pass in values required by the feature calculation + * \param float *result points to an array of floats, or a single float represnting the result of the calculation + * + * + * It is up to the calling function to allocate enough memory for the *data, *argv, and *result, and to free it when required. Some feature extraction functions may also require an _init() function to be called in order to perform some initialisation. The struct xtract_function_descriptor_t is used to give an indication of recommended default values, and argc for the *argv array. * * LibXtract can be downloaded from http://www.sf.net/projects/libxtract *
--- a/xtract/xtract_vector.h Mon Dec 24 13:21:13 2007 +0000 +++ b/xtract/xtract_vector.h Thu Dec 27 17:51:07 2007 +0000 @@ -34,12 +34,15 @@ * @{ */ -/** \brief Extract normalized (0-1) frequency domain spectrum from time domain signal +/** \brief Extract frequency domain spectrum from time domain signal * * \param *data: a pointer to the first element in an array of floats representing an audio vector * \param N: the number of array elements to be considered - * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second will be cast to an integer and determines the spectrum type (e.g. XTRACT_MAGNITUDE_SPECTRUM, XTRACT_LOG_POWER_SPECTRUM). The third argument determines whether or not the DC component is included in the output. If argv[2] == 1, then the DC component is included in which case the size of the array pointed to by *result must be N+2. For any further use of the array pointed to by *result, the value of N must reflect the (larger) array size. - * \param *result: a pointer to an array of size N containing N/2 magnitude/power/log magnitude/log power coefficients and N/2 bin frequencies. + * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second will be cast to an integer and determines the spectrum type (e.g. XTRACT_MAGNITUDE_SPECTRUM, XTRACT_LOG_POWER_SPECTRUM). The third argument determines whether or not the DC component is included in the output. If argv[2] == 1, then the DC component is included in which case the size of the array pointed to by *result must be N+2. For any further use of the array pointed to by *result, the value of N must reflect the (larger) array size. The fourth argument determines whether the magnitude/power coefficients are to be normalised. If argv[3] == 1, then the coefficients are normalised. + * \param *result: a pointer to an array of size N containing N/2 magnitude/power/log magnitude/log power coefficients and N/2 bin frequencies. + * + * The magnitude/power coefficients are scaled to the range 0-1 so that for a given coefficient x, 0 <= x <= 1 + * */ int xtract_spectrum(const float *data, const int N, const void *argv, float *result);