jamie@235: /* jamie@235: * Copyright (C) 2012 Jamie Bullock jamie@235: * jamie@235: * Permission is hereby granted, free of charge, to any person obtaining a copy jamie@235: * of this software and associated documentation files (the "Software"), to jamie@235: * deal in the Software without restriction, including without limitation the jamie@235: * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or jamie@235: * sell copies of the Software, and to permit persons to whom the Software is jamie@235: * furnished to do so, subject to the following conditions: jamie@235: * jamie@235: * The above copyright notice and this permission notice shall be included in jamie@235: * all copies or substantial portions of the Software. jamie@235: * jamie@235: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR jamie@235: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, jamie@235: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE jamie@235: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER jamie@235: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING jamie@235: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS jamie@235: * IN THE SOFTWARE. jamie@235: * jamie@235: */ jamie@235: jamie@235: #include jamie@235: jamie@235: #include jamie@235: #include jamie@235: #include jamie@235: #include jamie@235: jamie@235: #include "xtract/libxtract.h" jamie@235: #include "xtract/xtract_stateful.h" jamie@235: #include "xtract/xtract_scalar.h" jamie@244: #include "xtract/xtract_helper.h" jamie@235: #include "WaveFile.h" jamie@235: jamie@235: #ifndef M_PI jamie@235: #define M_PI 3.14159265358979323846264338327 jamie@235: #endif jamie@235: jamie@235: using namespace std; jamie@235: jamie@235: typedef enum waveform_type_ jamie@235: { jamie@235: SINE, jamie@235: SAWTOOTH, jamie@235: SQUARE, jamie@235: NOISE jamie@235: } jamie@235: waveform_type; jamie@235: jamie@235: #define BLOCKSIZE 512 jamie@235: #define MAVG_COUNT 10 jamie@244: #define HALF_BLOCKSIZE (BLOCKSIZE >> 1) jamie@235: #define SAMPLERATE 44100 jamie@235: #define PERIOD 102 jamie@235: #define MFCC_FREQ_BANDS 13 jamie@235: #define MFCC_FREQ_MIN 20 jamie@235: #define MFCC_FREQ_MAX 20000 jamie@235: jamie@235: jamie@235: double wavetable[BLOCKSIZE]; jamie@235: jamie@235: void fill_wavetable(const float frequency, waveform_type type) jamie@235: { jamie@235: jamie@235: int samples_per_period = SAMPLERATE / frequency; jamie@235: jamie@235: for (int i = 0; i < BLOCKSIZE; ++i) jamie@235: { jamie@235: int phase = i % samples_per_period; jamie@235: jamie@235: switch (type) jamie@235: { jamie@235: case SINE: jamie@235: wavetable[i] = sin((phase / (double)PERIOD) * 2 * M_PI); jamie@235: break; jamie@235: case SQUARE: jamie@235: if (phase < (samples_per_period / 2.f)) jamie@235: { jamie@235: wavetable[i] = -1.0; jamie@235: } jamie@235: else jamie@235: { jamie@235: wavetable[i] = 1.0; jamie@235: } jamie@235: break; jamie@235: case SAWTOOTH: jamie@235: wavetable[i] = ((phase / (double)PERIOD) * 2) - 1.; jamie@235: break; jamie@235: case NOISE: jamie@235: wavetable[i] = ((random() % 1000) / 500.0) - 1; jamie@235: break; jamie@235: } jamie@235: } jamie@235: } jamie@235: jamie@235: void print_wavetable(void) jamie@235: { jamie@235: for (int i = 0; i < BLOCKSIZE; ++i) jamie@235: { jamie@235: printf("%f\n", wavetable[i]); jamie@235: } jamie@235: } jamie@235: jamie@235: int main(void) jamie@235: { jamie@235: double mean = 0.0; jamie@235: double f0 = 0.0; jamie@235: double midicents = 0.0; jamie@235: double flux = 0.0; jamie@235: double centroid = 0.0; jamie@235: double lowest = 0.0; jamie@235: double spectrum[BLOCKSIZE] = {0}; jamie@235: double windowed[BLOCKSIZE] = {0}; jamie@235: double peaks[BLOCKSIZE] = {0}; jamie@235: double harmonics[BLOCKSIZE] = {0}; jamie@235: double subframes_windowed[BLOCKSIZE] = {0}; jamie@235: double subframes_spectrum[BLOCKSIZE] = {0}; jamie@235: double difference[HALF_BLOCKSIZE] = {0}; jamie@235: double lastn[MAVG_COUNT] = {0}; jamie@235: double *window = NULL; jamie@235: double *window_subframe = NULL; jamie@235: double mfccs[MFCC_FREQ_BANDS] = {0}; jamie@235: double argd[4] = {0}; jamie@235: double samplerate = 44100.0; jamie@235: double prev_note = 0.0; jamie@235: int n; jamie@235: int rv = XTRACT_SUCCESS; jamie@235: double last_found_peak_time = 0.0; jamie@235: WaveFile wavFile("test.wav"); jamie@235: xtract_mel_filter mel_filters; jamie@235: xtract_last_n_state *last_n_state = xtract_last_n_state_new(MAVG_COUNT); jamie@235: jamie@235: if (!wavFile.IsLoaded()) jamie@235: { jamie@235: return EXIT_FAILURE; jamie@235: } jamie@235: jamie@235: float *wavData = (float *)wavFile.GetData(); // assume 32-bit float jamie@235: std::size_t wavBytes = wavFile.GetDataSize(); jamie@235: uint64_t wavSamples = wavBytes / sizeof(float); jamie@235: double data[wavSamples]; jamie@235: jamie@235: for (n = 0; n < wavSamples; ++n) jamie@235: { jamie@235: data[n] = (double)wavData[n]; jamie@235: } jamie@235: // Convert to double jamie@235: jamie@235: jamie@235: /* Allocate Mel filters */ jamie@235: mel_filters.n_filters = MFCC_FREQ_BANDS; jamie@235: mel_filters.filters = (double **)malloc(MFCC_FREQ_BANDS * sizeof(double *)); jamie@235: for(uint8_t k = 0; k < MFCC_FREQ_BANDS; ++k) jamie@235: { jamie@235: mel_filters.filters[k] = (double *)malloc(BLOCKSIZE * sizeof(double)); jamie@235: } jamie@235: jamie@235: xtract_init_mfcc(BLOCKSIZE >> 1, SAMPLERATE >> 1, XTRACT_EQUAL_GAIN, MFCC_FREQ_MIN, MFCC_FREQ_MAX, mel_filters.n_filters, mel_filters.filters); jamie@235: jamie@235: /* create the window functions */ jamie@235: window = xtract_init_window(BLOCKSIZE, XTRACT_HANN); jamie@235: window_subframe = xtract_init_window(HALF_BLOCKSIZE, XTRACT_HANN); jamie@235: xtract_init_wavelet_f0_state(); jamie@235: jamie@235: // fill_wavetable(344.53125f, NOISE); // 344.53125f = 128 samples @ 44100 Hz jamie@235: // fill_wavetable(344.53125f, SINE); // 344.53125f = 128 samples @ 44100 Hz jamie@235: jamie@235: /* jamie@235: print_wavetable(); jamie@235: */ jamie@235: std::cout << "File has " << wavSamples << " samples" << std::endl; jamie@235: int peak_found = XTRACT_NO_RESULT; jamie@235: jamie@235: for (uint64_t n = 0; (n + BLOCKSIZE) < wavSamples; n += HALF_BLOCKSIZE) // Overlap by HALF_BLOCKSIZE jamie@235: { jamie@235: /* get the F0 */ jamie@235: xtract[XTRACT_WAVELET_F0](&data[n], BLOCKSIZE, &samplerate, &f0); jamie@235: jamie@235: /* get the F0 as a MIDI note */ jamie@235: if (f0 != 0.0) jamie@235: { jamie@235: xtract[XTRACT_MIDICENT](NULL, 0, &f0, &midicents); jamie@235: int note = (int)round(midicents / 100); jamie@235: if (note != prev_note) jamie@235: { jamie@235: printf("Pitch: %d at %f\n", note, n / (float)SAMPLERATE); jamie@235: } jamie@235: prev_note = note; jamie@235: } jamie@235: jamie@235: xtract_windowed(&data[n], BLOCKSIZE, window, windowed); jamie@235: jamie@235: /* get the spectrum */ jamie@235: argd[0] = SAMPLERATE / (double)BLOCKSIZE; jamie@235: argd[1] = XTRACT_MAGNITUDE_SPECTRUM; jamie@235: argd[2] = 0.f; /* DC component - we expect this to zero for square wave */ jamie@235: argd[3] = 0.f; /* No Normalisation */ jamie@235: jamie@235: xtract_init_fft(BLOCKSIZE, XTRACT_SPECTRUM); jamie@235: xtract[XTRACT_SPECTRUM](windowed, BLOCKSIZE, &argd[0], spectrum); jamie@235: xtract_free_fft(); jamie@235: jamie@235: xtract[XTRACT_SPECTRAL_CENTROID](spectrum, BLOCKSIZE, NULL, ¢roid); jamie@235: jamie@235: argd[1] = 10.0; /* peak threshold as % of maximum peak */ jamie@235: xtract[XTRACT_PEAK_SPECTRUM](spectrum, BLOCKSIZE / 2, argd, peaks); jamie@235: jamie@235: argd[0] = f0; jamie@235: argd[1] = .3; /* harmonic threshold */ jamie@235: xtract[XTRACT_HARMONIC_SPECTRUM](peaks, BLOCKSIZE, argd, harmonics); jamie@235: jamie@235: /* compute the MFCCs */ jamie@235: xtract_mfcc(spectrum, BLOCKSIZE >> 1, &mel_filters, mfccs); jamie@235: jamie@235: double gated[BLOCKSIZE] = {0}; jamie@235: double block_max = 0.0; jamie@235: jamie@235: /* crude noise gate */ jamie@235: for (uint16_t k = 0; k < BLOCKSIZE; ++k) jamie@235: { jamie@235: if (fabs(data[n+k]) > block_max) jamie@235: { jamie@235: block_max = fabs(data[n+k]); jamie@235: } jamie@235: jamie@235: if (data[n+k] > .1) jamie@235: { jamie@235: gated[k] = data[n+k]; jamie@235: } jamie@235: } jamie@235: jamie@235: /* normalise */ jamie@235: double norm_factor = block_max > 0.0 ? 1.0 / block_max : 0.0; jamie@235: jamie@235: for (uint16_t k = 0; k < BLOCKSIZE; ++k) jamie@235: { jamie@235: gated[k] *= norm_factor; jamie@235: } jamie@235: jamie@235: /* compute Spectral Flux */ jamie@235: argd[0] = SAMPLERATE / HALF_BLOCKSIZE; jamie@235: argd[1] = XTRACT_LOG_POWER_SPECTRUM; jamie@235: argd[2] = 0.f; /* DC component */ jamie@235: argd[3] = 1.f; /* Yes Normalisation */ jamie@235: jamie@235: xtract_features_from_subframes(gated, BLOCKSIZE, XTRACT_WINDOWED, window_subframe, subframes_windowed); jamie@235: xtract_init_fft(HALF_BLOCKSIZE, XTRACT_SPECTRUM); jamie@235: xtract_features_from_subframes(subframes_windowed, BLOCKSIZE, XTRACT_SPECTRUM, argd, subframes_spectrum); jamie@235: xtract_free_fft(); jamie@235: jamie@244: argd[0] = 0.5; /* smoothing factor */ jamie@244: jamie@244: /* smooth the amplitude components of the first and second spectra */ jamie@244: xtract_smoothed(subframes_spectrum, HALF_BLOCKSIZE >> 1, argd, subframes_spectrum); jamie@244: xtract_smoothed(subframes_spectrum + HALF_BLOCKSIZE, HALF_BLOCKSIZE >> 1, argd, subframes_spectrum + HALF_BLOCKSIZE); jamie@244: jamie@244: /* difference between the two spectra */ jamie@235: xtract_difference_vector(subframes_spectrum, BLOCKSIZE, NULL, difference); jamie@235: jamie@235: argd[0] = .25; /* norm order */ jamie@235: argd[1] = XTRACT_POSITIVE_SLOPE; /* positive slope */ jamie@235: argd[2] = 1; /* normalise */ jamie@235: jamie@235: /* Right shift HALF_BLOCKSIZE because we only want amplitudes not frequencies */ jamie@235: xtract_flux(difference, HALF_BLOCKSIZE >> 1, argd, &flux); jamie@235: jamie@235: xtract_last_n(last_n_state, &flux, MAVG_COUNT, NULL, lastn); jamie@235: jamie@235: argd[0] = 10; /* flux threshold */ jamie@235: double flux_current = 0.0; jamie@235: jamie@235: peak_found = xtract_peak(lastn, MAVG_COUNT, argd, &flux_current); jamie@235: jamie@235: if (peak_found == XTRACT_SUCCESS) jamie@235: { jamie@235: double peak_time = n / (float)SAMPLERATE; jamie@235: if (peak_time - last_found_peak_time > .05 || peak_time < .05) jamie@235: { jamie@235: printf("Onset at %f seconds\n", n / (float)SAMPLERATE); jamie@235: last_found_peak_time = peak_time; jamie@235: } jamie@235: } jamie@235: } jamie@235: jamie@235: /* cleanup */ jamie@235: for(n = 0; n < MFCC_FREQ_BANDS; ++n) jamie@235: { jamie@235: free(mel_filters.filters[n]); jamie@235: } jamie@235: free(mel_filters.filters); jamie@235: jamie@235: xtract_free_window(window); jamie@235: xtract_free_window(window_subframe); jamie@235: jamie@235: return 0; jamie@235: jamie@235: }