view src/init.c @ 98:ca40a0dc29d6

Removed fftw_plan from xtraction functions. Created new init function xtract_init_fft() for creating plans, which have global scope. Updated examples to reflect the change. New configure option: --with-fft_optimisation (0 = FFTW_ESTIMATE, 1 = FFTW_MEASURE, 2 = FFTW_PATIENT).
author Jamie Bullock <jamie@postlude.co.uk>
date Sat, 06 Oct 2007 16:36:00 +0000
parents 432d97ee02ef
children 068c8c526e51
line wrap: on
line source
/* libxtract feature extraction library
 *  
 * Copyright (C) 2006 Jamie Bullock
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
 * USA.
 */

/* init.c: defines functions that extract a feature as a single value from an input vector */

#ifdef HAVE_CONFIG_H
#    include <config.h>
#endif

#include <math.h>
#include <stdlib.h>

#include "xtract/libxtract.h"
#include "xtract_globals_private.h"

#ifdef XTRACT_FFT
#include <fftw3.h>

#ifndef XTRACT_FFT_OPTIMISATION_LEVEL
/* This should never happen */
#define XTRACT_FFT_OPTIMISATION_LEVEL 1
#endif

int xtract_init_mfcc(int N, float nyquist, int style, float freq_min, float freq_max, int freq_bands, float **fft_tables){

    int n, i, k, *fft_peak, M, next_peak; 
    float norm, mel_freq_max, mel_freq_min, norm_fact, height, inc, val, 
        freq_bw_mel, *mel_peak, *height_norm, *lin_peak;

    mel_peak = height_norm = lin_peak = NULL;
    fft_peak = NULL;
    norm = 1; 

    mel_freq_max = 1127 * log(1 + freq_max / 700);
    mel_freq_min = 1127 * log(1 + freq_min / 700);
    freq_bw_mel = (mel_freq_max - mel_freq_min) / freq_bands;

    mel_peak = (float *)malloc((freq_bands + 2) * sizeof(float)); 
    /* +2 for zeros at start and end */
    lin_peak = (float *)malloc((freq_bands + 2) * sizeof(float));
    fft_peak = (int *)malloc((freq_bands + 2) * sizeof(int));
    height_norm = (float *)malloc(freq_bands * sizeof(float));

    if(mel_peak == NULL || height_norm == NULL || 
                    lin_peak == NULL || fft_peak == NULL)
                    return XTRACT_MALLOC_FAILED;
    
    M = N >> 1;

    mel_peak[0] = mel_freq_min;
    lin_peak[0] = freq_min; // === 700 * (exp(mel_peak[0] / 1127) - 1);
    fft_peak[0] = lin_peak[0] / nyquist * M;


    for (n = 1; n <= freq_bands; n++){	
    /*roll out peak locations - mel, linear and linear on fft window scale */
        mel_peak[n] = mel_peak[n - 1] + freq_bw_mel;
        lin_peak[n] = 700 * (exp(mel_peak[n] / 1127) -1);
        fft_peak[n] = lin_peak[n] / nyquist * M;
    }

    for (n = 0; n < freq_bands; n++){
        /*roll out normalised gain of each peak*/
        if (style == XTRACT_EQUAL_GAIN){
            height = 1;	
            norm_fact = norm;
        }
        else{
            height = 2 / (lin_peak[n + 2] - lin_peak[n]);
            norm_fact = norm / (2 / (lin_peak[2] - lin_peak[0]));
        }
        height_norm[n] = height * norm_fact;
    }

    i = 0;
   
    for(n = 0; n < freq_bands; n++){
	
		// calculate the rise increment
        if(n==0)
            inc = height_norm[n] / fft_peak[n];
        else
            inc = height_norm[n] / (fft_peak[n] - fft_peak[n - 1]);
        val = 0;	
	
		// zero the start of the array
		for(k = 0; k < i; k++)
		   fft_tables[n][k] = 0.f;
	
		// fill in the rise
        for(; i <= fft_peak[n]; i++){ 
            fft_tables[n][i] = val;
            val += inc;
        }
	
        // calculate the fall increment
        inc = height_norm[n] / (fft_peak[n + 1] - fft_peak[n]);
	
        val = 0;
		next_peak = fft_peak[n + 1];
	
		// reverse fill the 'fall' 
        for(i = next_peak; i > fft_peak[n]; i--){ 
            fft_tables[n][i] = val;
            val += inc;
        }

		// zero the rest of the array
		for(k = next_peak + 1; k < N; k++)
			fft_tables[n][k] = 0.f;
    }


    /* Initialise the fft_plan for the DCT */
    xtract_init_fft(freq_bands, XTRACT_MFCC);

    free(mel_peak);
    free(lin_peak);
    free(height_norm);
    free(fft_peak);

    return XTRACT_SUCCESS;

}

int xtract_init_fft(int N, int feature_name){

    float *input, *output;
    int optimisation;
    
    input = output = NULL;
    
    fprintf(stderr, "Optimisation level: %d\n", XTRACT_FFT_OPTIMISATION_LEVEL);

    if(XTRACT_FFT_OPTIMISATION_LEVEL == 0)
        optimisation = FFTW_ESTIMATE;
    else if(XTRACT_FFT_OPTIMISATION_LEVEL == 1)
        optimisation = FFTW_MEASURE;
    else if(XTRACT_FFT_OPTIMISATION_LEVEL == 2)
        optimisation = FFTW_PATIENT;
    else
        optimisation = FFTW_MEASURE; /* The default */

    if(feature_name == XTRACT_AUTOCORRELATION_FFT)
        N <<= 1;

    input = malloc(N * sizeof(float));
    output = malloc(N * sizeof(float));

    switch(feature_name){
        case XTRACT_SPECTRUM:
            if(spectrum_plan != NULL)
                fftwf_destroy_plan(spectrum_plan);
            spectrum_plan = 
                fftwf_plan_r2r_1d(N, input, output, FFTW_R2HC, optimisation);
            break;
        case XTRACT_AUTOCORRELATION_FFT:
            if(autocorrelation_fft_plan_1 != NULL)
                fftwf_destroy_plan(autocorrelation_fft_plan_1);
            if(autocorrelation_fft_plan_2 != NULL)
                fftwf_destroy_plan(autocorrelation_fft_plan_2);
            autocorrelation_fft_plan_1 =
                fftwf_plan_r2r_1d(N, input, output, FFTW_R2HC, optimisation);
            autocorrelation_fft_plan_2 =
                fftwf_plan_r2r_1d(N, input, output, FFTW_HC2R, optimisation);
            break;
        case XTRACT_DCT:
            if(dct_plan != NULL)
                fftwf_destroy_plan(dct_plan);
            dct_plan =
                fftwf_plan_r2r_1d(N, input, output, FFTW_REDFT00, optimisation);
        case XTRACT_MFCC:
            if(dct_plan != NULL)
                fftwf_destroy_plan(dct_plan);
            dct_plan =
                fftwf_plan_r2r_1d(N, output, output, FFTW_REDFT00, optimisation);
            break;
    }

    free(input);
    free(output);

    return XTRACT_SUCCESS;

}

#endif

int xtract_init_bark(int N, float sr, int *band_limits){

    float  edges[] = {0, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500, 20500, 27000}; /* Takes us up to sr = 54kHz (CCRMA: JOS)*/

    int bands = XTRACT_BARK_BANDS;
    
    while(bands--)
        band_limits[bands] = edges[bands] / sr * N;
        /*FIX shohuld use rounding, but couldn't get it to work */

    return XTRACT_SUCCESS;
}