Mercurial > hg > beaglert
diff core/PRU.cpp @ 318:f7b19ea31bbb prerelease
Added NEON vectorised float<->int converters. Curiously, its performance is worse than the C version. I guess clang is better at vectorising than we thought! The new code remains disabled for now.
author | andrewm |
---|---|
date | Mon, 30 May 2016 01:06:01 +0100 |
parents | 493a07f6ec09 |
children | 2c4ddf4277d1 |
line wrap: on
line diff
--- a/core/PRU.cpp Sat May 28 01:23:56 2016 +0100 +++ b/core/PRU.cpp Mon May 30 01:06:01 2016 +0100 @@ -37,6 +37,11 @@ using namespace std; +// Select whether to use NEON-based sample conversion +// (this will probably go away in a future commit once its performance +// is verified over extended use) +#undef USE_NEON_FORMAT_CONVERSION + // PRU memory: PRU0 and PRU1 RAM are 8kB (0x2000) long each // PRU-SHARED RAM is 12kB (0x3000) long @@ -110,6 +115,14 @@ extern int gShouldStop; extern int gRTAudioVerbose; +// These four functions are written in assembly in FormatConvert.S +extern "C" { + void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer); + void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer); + void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer); + void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer); +} + // Constructor: specify a PRU number (0 or 1) PRU::PRU(InternalBelaContext *input_context) : context(input_context), pru_number(0), running(false), analog_enabled(false), @@ -407,17 +420,56 @@ } } } + + // TESTING + // if(posix_memalign((void **)&testing_float, 16, 8 * context->audioFrames * sizeof(float))) { + // printf("Error allocating float buffers\n"); + // return false; + // } + // if(posix_memalign((void **)&testing_int16, 8, 8 * context->audioFrames * sizeof(int16_t))) { + // printf("Error allocating float buffers\n"); + // return false; + // } // Allocate audio buffers +#ifdef USE_NEON_FORMAT_CONVERSION + if(posix_memalign((void **)&context->audioIn, 16, 2 * context->audioFrames * sizeof(float))) { + printf("Error allocating audio input buffer\n"); + return 1; + } + if(posix_memalign((void **)&context->audioOut, 16, 2 * context->audioFrames * sizeof(float))) { + printf("Error allocating audio output buffer\n"); + return 1; + } +#else context->audioIn = (float *)malloc(2 * context->audioFrames * sizeof(float)); context->audioOut = (float *)malloc(2 * context->audioFrames * sizeof(float)); if(context->audioIn == 0 || context->audioOut == 0) { rt_printf("Error: couldn't allocate audio buffers\n"); return 1; } - +#endif + // Allocate analog buffers if(analog_enabled) { +#ifdef USE_NEON_FORMAT_CONVERSION + if(posix_memalign((void **)&context->analogIn, 16, + context->analogChannels * context->analogFrames * sizeof(float))) { + printf("Error allocating analog input buffer\n"); + return 1; + } + if(posix_memalign((void **)&context->analogOut, 16, + context->analogChannels * context->analogFrames * sizeof(float))) { + printf("Error allocating analog output buffer\n"); + return 1; + } + last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); + + if(last_analog_out_frame == 0) { + rt_printf("Error: couldn't allocate analog persistence buffer\n"); + return 1; + } +#else context->analogIn = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); context->analogOut = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); @@ -426,7 +478,8 @@ rt_printf("Error: couldn't allocate analog buffers\n"); return 1; } - +#endif + memset(last_analog_out_frame, 0, context->analogChannels * sizeof(float)); } @@ -580,10 +633,14 @@ } // Convert short (16-bit) samples to float - // TODO: NEON - for(unsigned int n = 0; n < 2 * context->audioFrames; n++) +#ifdef USE_NEON_FORMAT_CONVERSION + int16_to_float_audio(2 * context->audioFrames, &pru_buffer_audio_adc[pru_audio_offset], context->audioIn); +#else + for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { context->audioIn[n] = (float)pru_buffer_audio_adc[n + pru_audio_offset] / 32768.0f; - + } +#endif + if(analog_enabled) { if(mux_channels != 0) { // If multiplexer is enabled, find out which channels we have by pulling out @@ -593,9 +650,14 @@ // TODO } - // TODO: NEON - for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) +#ifdef USE_NEON_FORMAT_CONVERSION + int16_to_float_analog(context->analogChannels * context->analogFrames, + &pru_buffer_spi_adc[pru_spi_offset], context->analogIn); +#else + for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { context->analogIn[n] = (float)pru_buffer_spi_adc[n + pru_spi_offset] / 65536.0f; + } +#endif if(context->flags & BELA_FLAG_ANALOG_OUTPUTS_PERSIST) { // Initialize the output buffer with the values that were in the last frame of the previous output @@ -642,12 +704,17 @@ } // Convert float back to short for SPI output +#ifdef USE_NEON_FORMAT_CONVERSION + float_to_int16_analog(context->analogChannels * context->analogFrames, + context->analogOut, (uint16_t*)&pru_buffer_spi_dac[pru_spi_offset]); +#else for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { int out = context->analogOut[n] * 65536.0f; if(out < 0) out = 0; else if(out > 65535) out = 65535; pru_buffer_spi_dac[n + pru_spi_offset] = (uint16_t)out; } +#endif } if(digital_enabled) { // keep track of past digital values @@ -657,13 +724,16 @@ } // Convert float back to short for audio - // TODO: NEON +#ifdef USE_NEON_FORMAT_CONVERSION + float_to_int16_audio(2 * context->audioFrames, context->audioOut, &pru_buffer_audio_dac[pru_audio_offset]); +#else for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { int out = context->audioOut[n] * 32768.0f; if(out < -32768) out = -32768; else if(out > 32767) out = 32767; pru_buffer_audio_dac[n + pru_audio_offset] = (int16_t)out; } +#endif // Increment total number of samples that have elapsed context->audioFramesElapsed += context->audioFrames;