jamie@1: /* libxtract feature extraction library jamie@1: * jamie@1: * Copyright (C) 2006 Jamie Bullock jamie@1: * jamie@1: * This program is free software; you can redistribute it and/or modify jamie@1: * it under the terms of the GNU General Public License as published by jamie@1: * the Free Software Foundation; either version 2 of the License, or jamie@1: * (at your option) any later version. jamie@1: * jamie@1: * This program is distributed in the hope that it will be useful, jamie@1: * but WITHOUT ANY WARRANTY; without even the implied warranty of jamie@1: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the jamie@1: * GNU General Public License for more details. jamie@1: * jamie@1: * You should have received a copy of the GNU General Public License jamie@1: * along with this program; if not, write to the Free Software jamie@1: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, jamie@1: * USA. jamie@1: */ jamie@1: jamie@1: jamie@1: /* xtract_vector.c: defines functions that extract a feature as a single value from an input vector */ jamie@1: jamie@1: #include jamie@43: #include jamie@43: #include jamie@30: jamie@98: #include "xtract/libxtract.h" jamie@98: #include "xtract_macros_private.h" jamie@98: jamie@85: #ifndef roundf jamie@85: float roundf(float f){ jamie@120: if (f - (int)f >= 0.5) jamie@120: return (float)((int)f + 1); jamie@120: else jamie@120: return (float)((int)f); jamie@85: } jamie@85: #endif jamie@85: jamie@113: #ifndef powf jamie@120: #define powf pow jamie@113: #endif jamie@113: jamie@113: #ifndef expf jamie@120: #define expf exp jamie@113: #endif jamie@113: jamie@113: #ifndef sqrtf jamie@120: #define sqrtf sqrt jamie@113: #endif jamie@113: jamie@113: #ifndef fabsf jamie@120: #define fabsf fabs jamie@113: #endif jamie@113: jamie@30: #ifdef XTRACT_FFT jamie@30: jamie@1: #include jamie@98: #include "xtract_globals_private.h" jamie@98: #include "xtract_macros_private.h" jamie@1: jamie@54: int xtract_spectrum(const float *data, const int N, const void *argv, float *result){ jamie@1: jamie@113: float *input, *rfft, q, temp, max, NxN; jamie@43: size_t bytes; jamie@111: int n, jamie@111: m, jamie@105: M, jamie@105: vector, jamie@105: withDC, jamie@105: argc, jamie@105: normalise; jamie@98: jamie@105: vector = argc = withDC = normalise = 0; jamie@1: jamie@54: M = N >> 1; jamie@56: NxN = XTRACT_SQ(N); jamie@54: jamie@54: rfft = (float *)fftwf_malloc(N * sizeof(float)); jamie@43: input = (float *)malloc(bytes = N * sizeof(float)); jamie@43: input = memcpy(input, data, bytes); jamie@1: jamie@56: q = *(float *)argv; jamie@54: vector = (int)*((float *)argv+1); jamie@70: withDC = (int)*((float *)argv+2); jamie@105: normalise = (int)*((float *)argv+3); jamie@105: jamie@105: temp = 0.f; jamie@105: max = 0.f; jamie@46: jamie@56: XTRACT_CHECK_q; jamie@46: jamie@102: if(fft_plans.spectrum_plan == NULL){ jamie@98: fprintf(stderr, jamie@98: "libxtract: Error: xtract_spectrum() has uninitialised plan\n"); jamie@98: return XTRACT_NO_RESULT; jamie@98: } jamie@98: jamie@102: fftwf_execute_r2r(fft_plans.spectrum_plan, input, rfft); jamie@54: jamie@54: switch(vector){ jamie@67: jamie@120: case XTRACT_LOG_MAGNITUDE_SPECTRUM: jamie@120: for(n = 0, m = 0; m < M; ++n, ++m){ jamie@120: if(!withDC && n == 0){ jamie@120: ++n; jamie@120: } jamie@120: if ((temp = XTRACT_SQ(rfft[n]) + jamie@120: XTRACT_SQ(rfft[N - n])) > XTRACT_LOG_LIMIT) jamie@120: temp = logf(sqrtf(temp) / (float)N); jamie@120: else jamie@120: temp = XTRACT_LOG_LIMIT_DB; jamie@111: jamie@120: result[m] = jamie@111: /* Scaling */ jamie@120: (temp + XTRACT_DB_SCALE_OFFSET) / jamie@120: XTRACT_DB_SCALE_OFFSET; jamie@67: jamie@120: XTRACT_SET_FREQUENCY; jamie@120: XTRACT_GET_MAX; jamie@120: } jamie@120: break; jamie@120: jamie@120: case XTRACT_POWER_SPECTRUM: jamie@120: for(n = 0, m = 0; m < M; ++n, ++m){ jamie@120: if(!withDC && n == 0){ jamie@120: ++n; jamie@111: } jamie@111: result[m] = (XTRACT_SQ(rfft[n]) + XTRACT_SQ(rfft[N - n])) / NxN; jamie@120: XTRACT_SET_FREQUENCY; jamie@120: XTRACT_GET_MAX; jamie@120: } jamie@120: break; jamie@67: jamie@120: case XTRACT_LOG_POWER_SPECTRUM: jamie@120: for(n = 0, m = 0; m < M; ++n, ++m){ jamie@120: if(!withDC && n == 0){ jamie@120: ++n; jamie@120: } jamie@120: if ((temp = XTRACT_SQ(rfft[n]) + XTRACT_SQ(rfft[N - n])) > jamie@120: XTRACT_LOG_LIMIT) jamie@120: temp = logf(temp / NxN); jamie@120: else jamie@120: temp = XTRACT_LOG_LIMIT_DB; jamie@111: jamie@120: result[m] = (temp + XTRACT_DB_SCALE_OFFSET) / jamie@120: XTRACT_DB_SCALE_OFFSET; jamie@120: XTRACT_SET_FREQUENCY; jamie@120: XTRACT_GET_MAX; jamie@120: } jamie@120: break; jamie@120: jamie@120: default: jamie@120: /* MAGNITUDE_SPECTRUM */ jamie@120: for(n = 0, m = 0; m < M; ++n, ++m){ jamie@120: if(!withDC && n == 0){ jamie@120: ++n; jamie@111: } jamie@120: result[m] = sqrtf(XTRACT_SQ(rfft[n]) + jamie@120: XTRACT_SQ(rfft[N - n])) / (float)N; jamie@120: XTRACT_SET_FREQUENCY; jamie@120: XTRACT_GET_MAX; jamie@120: } jamie@120: break; jamie@111: jamie@70: } jamie@105: jamie@105: if(normalise){ jamie@105: for(n = 0; n < M; n++) jamie@105: result[n] /= max; jamie@105: } jamie@105: jamie@54: fftwf_free(rfft); jamie@43: free(input); jamie@120: jamie@56: return XTRACT_SUCCESS; jamie@1: } jamie@1: jamie@43: int xtract_autocorrelation_fft(const float *data, const int N, const void *argv, float *result){ jamie@120: jamie@75: float *freq, *time; jamie@75: int n, M; jamie@98: //fftwf_plan plan; jamie@1: jamie@75: M = N << 1; jamie@43: jamie@75: freq = (float *)fftwf_malloc(M * sizeof(float)); jamie@75: /* Zero pad the input vector */ jamie@75: time = (float *)calloc(M, sizeof(float)); jamie@75: time = memcpy(time, data, N * sizeof(float)); jamie@75: jamie@102: fftwf_execute_r2r(fft_plans.autocorrelation_fft_plan_1, time, freq); jamie@98: //plan = fftwf_plan_r2r_1d(M, time, freq, FFTW_R2HC, FFTW_ESTIMATE); jamie@1: jamie@98: //fftwf_execute(plan); jamie@75: jamie@76: for(n = 1; n < N; n++){ jamie@75: freq[n] = XTRACT_SQ(freq[n]) + XTRACT_SQ(freq[M - n]); jamie@120: freq[M - n] = 0.f; jamie@75: } jamie@120: jamie@75: freq[0] = XTRACT_SQ(freq[0]); jamie@75: freq[N] = XTRACT_SQ(freq[N]); jamie@75: jamie@98: //plan = fftwf_plan_r2r_1d(M, freq, time, FFTW_HC2R, FFTW_ESTIMATE); jamie@75: jamie@98: //fftwf_execute(plan); jamie@98: jamie@102: fftwf_execute_r2r(fft_plans.autocorrelation_fft_plan_2, freq, time); jamie@120: jamie@75: /* Normalisation factor */ jamie@75: M = M * N; jamie@75: jamie@75: for(n = 0; n < N; n++) jamie@120: result[n] = time[n] / (float)M; jamie@120: /* result[n] = time[n+1] / (float)M; */ jamie@75: jamie@98: //fftwf_destroy_plan(plan); jamie@75: fftwf_free(freq); jamie@75: free(time); jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@1: } jamie@1: jamie@43: int xtract_mfcc(const float *data, const int N, const void *argv, float *result){ jamie@30: jamie@30: xtract_mel_filter *f; jamie@30: int n, filter; jamie@30: jamie@30: f = (xtract_mel_filter *)argv; jamie@120: jamie@30: for(filter = 0; filter < f->n_filters; filter++){ danstowell@68: result[filter] = 0.f; jamie@30: for(n = 0; n < N; n++){ jamie@71: result[filter] += data[n] * f->filters[filter][n]; jamie@30: } jamie@113: result[filter] = logf(result[filter] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : result[filter]); jamie@30: } jamie@30: jamie@30: xtract_dct(result, f->n_filters, NULL, result); jamie@120: jamie@56: return XTRACT_SUCCESS; jamie@30: } jamie@30: jamie@43: int xtract_dct(const float *data, const int N, const void *argv, float *result){ jamie@120: jamie@98: //fftwf_plan plan; jamie@120: jamie@98: //plan = jamie@120: // fftwf_plan_r2r_1d(N, (float *) data, result, FFTW_REDFT00, FFTW_ESTIMATE); jamie@120: jamie@102: fftwf_execute_r2r(fft_plans.dct_plan, (float *)data, result); jamie@98: //fftwf_execute(plan); jamie@98: //fftwf_destroy_plan(plan); jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@30: } jamie@30: jamie@30: #else jamie@30: jamie@67: int xtract_spectrum(const float *data, const int N, const void *argv, float *result){ jamie@30: danstowell@66: XTRACT_NEEDS_FFTW; danstowell@66: return XTRACT_NO_RESULT; jamie@30: jamie@30: } jamie@30: jamie@43: int xtract_autocorrelation_fft(const float *data, const int N, const void *argv, float *result){ jamie@30: danstowell@66: XTRACT_NEEDS_FFTW; danstowell@66: return XTRACT_NO_RESULT; jamie@30: jamie@30: } jamie@30: jamie@43: int xtract_mfcc(const float *data, const int N, const void *argv, float *result){ jamie@30: danstowell@66: XTRACT_NEEDS_FFTW; danstowell@66: return XTRACT_NO_RESULT; jamie@30: jamie@30: } jamie@30: jamie@43: int xtract_dct(const float *data, const int N, const void *argv, float *result){ jamie@30: danstowell@66: XTRACT_NEEDS_FFTW; danstowell@66: return XTRACT_NO_RESULT; jamie@30: jamie@30: } jamie@30: jamie@30: #endif jamie@30: jamie@43: int xtract_autocorrelation(const float *data, const int N, const void *argv, float *result){ jamie@30: jamie@30: /* Naive time domain implementation */ jamie@120: jamie@30: int n = N, i; jamie@120: jamie@30: float corr; jamie@30: jamie@30: while(n--){ jamie@120: corr = 0; jamie@30: for(i = 0; i < N - n; i++){ jamie@30: corr += data[i] * data[i + n]; jamie@30: } jamie@30: result[n] = corr / N; jamie@30: } jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@30: } jamie@30: jamie@43: int xtract_amdf(const float *data, const int N, const void *argv, float *result){ jamie@1: jamie@1: int n = N, i; jamie@120: jamie@6: float md, temp; jamie@1: jamie@1: while(n--){ jamie@120: md = 0.f; jamie@1: for(i = 0; i < N - n; i++){ jamie@6: temp = data[i] - data[i + n]; jamie@120: temp = (temp < 0 ? -temp : temp); jamie@120: md += temp; jamie@1: } jamie@113: result[n] = md / (float)N; jamie@1: } jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@1: } jamie@1: jamie@43: int xtract_asdf(const float *data, const int N, const void *argv, float *result){ jamie@120: jamie@1: int n = N, i; jamie@120: jamie@1: float sd; jamie@1: jamie@1: while(n--){ jamie@120: sd = 0.f; jamie@1: for(i = 0; i < N - n; i++){ jamie@6: /*sd = 1;*/ jamie@56: sd += XTRACT_SQ(data[i] - data[i + n]); jamie@1: } jamie@113: result[n] = sd / (float)N; jamie@1: } jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@1: } jamie@1: jamie@43: int xtract_bark_coefficients(const float *data, const int N, const void *argv, float *result){ jamie@1: jamie@1: int *limits, band, n; jamie@1: jamie@1: limits = (int *)argv; jamie@120: jamie@59: for(band = 0; band < XTRACT_BARK_BANDS - 1; band++){ jamie@110: result[band] = 0.f; jamie@1: for(n = limits[band]; n < limits[band + 1]; n++) jamie@1: result[band] += data[n]; jamie@1: } jamie@38: jamie@56: return XTRACT_SUCCESS; jamie@1: } jamie@1: jamie@52: int xtract_peak_spectrum(const float *data, const int N, const void *argv, float *result){ jamie@1: jamie@56: float threshold, max, y, y2, y3, p, q, *input = NULL; jamie@43: size_t bytes; jamie@59: int n = N, rv = XTRACT_SUCCESS; jamie@49: jamie@56: threshold = max = y = y2 = y3 = p = q = 0.f; jamie@120: jamie@1: if(argv != NULL){ jamie@56: q = ((float *)argv)[0]; jamie@55: threshold = ((float *)argv)[1]; jamie@1: } jamie@49: else jamie@56: rv = XTRACT_BAD_ARGV; jamie@49: jamie@55: if(threshold < 0 || threshold > 100){ jamie@55: threshold = 0; jamie@56: rv = XTRACT_BAD_ARGV; jamie@1: } jamie@1: jamie@56: XTRACT_CHECK_q; jamie@49: jamie@98: input = (float *)calloc(N, sizeof(float)); jamie@98: jamie@98: bytes = N * sizeof(float); jamie@43: jamie@43: if(input != NULL) jamie@120: input = memcpy(input, data, bytes); jamie@43: else jamie@120: return XTRACT_MALLOC_FAILED; jamie@43: jamie@45: while(n--) jamie@56: max = XTRACT_MAX(max, input[n]); jamie@120: jamie@55: threshold *= .01 * max; jamie@1: jamie@1: result[0] = 0; jamie@59: result[N] = 0; jamie@1: jamie@59: for(n = 1; n < N; n++){ jamie@55: if(input[n] >= threshold){ jamie@119: if(input[n] > input[n - 1] && n + 1 < N && input[n] > input[n + 1]){ jamie@117: result[N + n] = q * (n + (p = .5 * ((y = input[n-1]) - jamie@120: (y3 = input[n+1])) / (input[n - 1] - 2 * jamie@120: (y2 = input[n]) + input[n + 1]))); jamie@52: result[n] = y2 - .25 * (y - y3) * p; jamie@1: } jamie@1: else{ jamie@1: result[n] = 0; jamie@59: result[N + n] = 0; jamie@1: } jamie@1: } jamie@1: else{ jamie@1: result[n] = 0; jamie@59: result[N + n] = 0; jamie@1: } jamie@1: } jamie@120: jamie@43: free(input); jamie@56: return (rv ? rv : XTRACT_SUCCESS); jamie@1: } jamie@120: jamie@52: int xtract_harmonic_spectrum(const float *data, const int N, const void *argv, float *result){ jamie@120: jamie@38: int n = (N >> 1), M = n; jamie@38: jamie@43: const float *freqs, *amps; jamie@55: float f0, threshold, ratio, nearest, distance; jamie@38: jamie@52: amps = data; jamie@52: freqs = data + n; jamie@38: f0 = *((float *)argv); jamie@55: threshold = *((float *)argv+1); jamie@38: jamie@38: ratio = nearest = distance = 0.f; jamie@38: jamie@38: while(n--){ jamie@120: if(freqs[n]){ jamie@120: ratio = freqs[n] / f0; jamie@120: nearest = roundf(ratio); jamie@120: distance = fabs(nearest - ratio); jamie@120: if(distance > threshold) jamie@120: result[n] = result[M + n] = 0.f; jamie@120: else { jamie@120: result[n] = amps[n]; jamie@120: result[M + n] = freqs[n]; jamie@120: } jamie@120: } jamie@120: else jamie@120: result[n] = result[M + n] = 0.f; jamie@38: } jamie@56: return XTRACT_SUCCESS; jamie@38: } jamie@120: jamie@104: int xtract_lpc(const float *data, const int N, const void *argv, float *result){ jamie@104: jamie@104: int i, j, k, M, L; jamie@104: float r = 0.f, jamie@104: error = 0.f; jamie@104: jamie@104: float *ref = NULL, jamie@104: *lpc = NULL ; jamie@104: jamie@104: error = data[0]; jamie@104: k = N; /* The length of *data */ jamie@104: L = N - 1; /* The number of LPC coefficients */ jamie@104: M = L * 2; /* The length of *result */ jamie@104: ref = result; jamie@104: lpc = result+L; jamie@113: jamie@104: if(error == 0.0){ jamie@113: memset(result, 0, M * sizeof(float)); jamie@104: return XTRACT_NO_RESULT; jamie@104: } jamie@113: jamie@104: memset(result, 0, M * sizeof(float)); jamie@104: jamie@104: for (i = 0; i < L; i++) { jamie@104: jamie@104: /* Sum up this iteration's reflection coefficient. */ jamie@104: r = -data[i + 1]; jamie@104: for (j = 0; j < i; j++) jamie@104: r -= lpc[j] * data[i - j]; jamie@104: ref[i] = r /= error; jamie@104: jamie@104: /* Update LPC coefficients and total error. */ jamie@104: lpc[i] = r; jamie@104: for (j = 0; j < i / 2; j++) { jamie@104: float tmp = lpc[j]; jamie@104: lpc[j] = r * lpc[i - 1 - j]; jamie@104: lpc[i - 1 - j] += r * tmp; jamie@104: } jamie@104: if (i % 2) lpc[j] += lpc[j] * r; jamie@104: jamie@104: error *= 1 - r * r; jamie@104: } jamie@104: jamie@104: return XTRACT_SUCCESS; jamie@104: } jamie@104: jamie@104: int xtract_lpcc(const float *data, const int N, const void *argv, float *result){ jamie@104: jamie@104: /* Given N lpc coefficients extract an LPC cepstrum of size argv[0] */ jamie@104: /* Based on an an algorithm by rabiner and Juang */ jamie@104: jamie@104: int n, k; jamie@104: float sum; jamie@104: int order = N - 1; /* Eventually change this to Q = 3/2 p as suggested in Rabiner */ jamie@104: int cep_length; jamie@120: jamie@104: if(argv == NULL) jamie@115: cep_length = N - 1; /* FIX: if we're going to have default values, they should come from the descriptor */ jamie@104: else jamie@115: cep_length = *(int *)argv; jamie@120: //cep_length = (int)((float *)argv)[0]; jamie@104: jamie@104: memset(result, 0, cep_length * sizeof(float)); jamie@104: jamie@104: for (n = 1; n <= order && n <= cep_length; n++){ jamie@104: sum = 0.f; jamie@104: for (k = 1; k < n; k++) jamie@104: sum += k * result[k-1] * data[n - k]; jamie@104: result[n-1] = data[n] + sum / n; jamie@104: } jamie@104: jamie@104: /* be wary of these interpolated values */ jamie@104: for(n = order + 1; n <= cep_length; n++){ jamie@104: sum = 0.f; jamie@104: for (k = n - (order - 1); k < n; k++) jamie@104: sum += k * result[k-1] * data[n - k]; jamie@104: result[n-1] = sum / n; jamie@104: } jamie@104: jamie@104: return XTRACT_SUCCESS; jamie@104: jamie@104: } jamie@104: //int xtract_lpcc_s(const float *data, const int N, const void *argv, float *result){ jamie@104: // return XTRACT_SUCCESS; jamie@104: //} jamie@104: jamie@114: int xtract_subbands(const float *data, const int N, const void *argv, float *result){ jamie@104: jamie@114: int n, bw, xtract_func, nbands, scale, start, lower, *argi, rv; jamie@114: jamie@114: argi = (int *)argv; jamie@114: jamie@114: xtract_func = argi[0]; jamie@114: nbands = argi[1]; jamie@114: scale = argi[2]; jamie@114: start = argi[3]; jamie@114: jamie@114: if(scale == XTRACT_LINEAR_SUBBANDS) jamie@114: bw = floorf((N - start) / nbands); jamie@114: else jamie@114: bw = start; jamie@114: jamie@114: lower = start; jamie@115: rv = XTRACT_SUCCESS; jamie@114: jamie@114: for(n = 0; n < nbands; n++){ jamie@114: jamie@114: /* Bounds sanity check */ jamie@115: if(lower >= N || lower + bw >= N){ jamie@120: // printf("n: %d\n", n); jamie@115: result[n] = 0.f; jamie@114: continue; jamie@115: } jamie@114: jamie@114: rv = xtract[xtract_func](data+lower, bw, NULL, &result[n]); jamie@114: jamie@114: if(rv != XTRACT_SUCCESS) jamie@114: return rv; jamie@114: jamie@114: switch(scale){ jamie@114: case XTRACT_OCTAVE_SUBBANDS: jamie@114: lower += bw; jamie@114: bw = lower; jamie@114: break; jamie@114: case XTRACT_LINEAR_SUBBANDS: jamie@114: lower += bw; jamie@114: break; jamie@114: } jamie@114: jamie@114: } jamie@114: jamie@114: return rv; jamie@114: jamie@114: } jamie@114: jamie@114: jamie@114: