comparison core/PRU.cpp @ 318:f7b19ea31bbb prerelease

Added NEON vectorised float<->int converters. Curiously, its performance is worse than the C version. I guess clang is better at vectorising than we thought! The new code remains disabled for now.
author andrewm
date Mon, 30 May 2016 01:06:01 +0100
parents 493a07f6ec09
children 2c4ddf4277d1
comparison
equal deleted inserted replaced
317:52733b58bdf3 318:f7b19ea31bbb
34 #include <native/task.h> 34 #include <native/task.h>
35 #include <native/timer.h> 35 #include <native/timer.h>
36 #include <rtdk.h> 36 #include <rtdk.h>
37 37
38 using namespace std; 38 using namespace std;
39
40 // Select whether to use NEON-based sample conversion
41 // (this will probably go away in a future commit once its performance
42 // is verified over extended use)
43 #undef USE_NEON_FORMAT_CONVERSION
39 44
40 // PRU memory: PRU0 and PRU1 RAM are 8kB (0x2000) long each 45 // PRU memory: PRU0 and PRU1 RAM are 8kB (0x2000) long each
41 // PRU-SHARED RAM is 12kB (0x3000) long 46 // PRU-SHARED RAM is 12kB (0x3000) long
42 47
43 #define PRU_MEM_MCASP_OFFSET 0x2000 // Offset within PRU-SHARED RAM 48 #define PRU_MEM_MCASP_OFFSET 0x2000 // Offset within PRU-SHARED RAM
107 const unsigned int PRU::kPruGPIOTestPin2 = 31; // GPIO0(31); P9-13 112 const unsigned int PRU::kPruGPIOTestPin2 = 31; // GPIO0(31); P9-13
108 const unsigned int PRU::kPruGPIOTestPin3 = 26; // GPIO0(26); P8-14 113 const unsigned int PRU::kPruGPIOTestPin3 = 26; // GPIO0(26); P8-14
109 114
110 extern int gShouldStop; 115 extern int gShouldStop;
111 extern int gRTAudioVerbose; 116 extern int gRTAudioVerbose;
117
118 // These four functions are written in assembly in FormatConvert.S
119 extern "C" {
120 void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer);
121 void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer);
122 void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer);
123 void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer);
124 }
112 125
113 // Constructor: specify a PRU number (0 or 1) 126 // Constructor: specify a PRU number (0 or 1)
114 PRU::PRU(InternalBelaContext *input_context) 127 PRU::PRU(InternalBelaContext *input_context)
115 : context(input_context), pru_number(0), running(false), analog_enabled(false), 128 : context(input_context), pru_number(0), running(false), analog_enabled(false),
116 digital_enabled(false), gpio_enabled(false), led_enabled(false), 129 digital_enabled(false), gpio_enabled(false), led_enabled(false),
405 close(xenomai_gpio_fd); 418 close(xenomai_gpio_fd);
406 xenomai_gpio_fd = -1; 419 xenomai_gpio_fd = -1;
407 } 420 }
408 } 421 }
409 } 422 }
423
424 // TESTING
425 // if(posix_memalign((void **)&testing_float, 16, 8 * context->audioFrames * sizeof(float))) {
426 // printf("Error allocating float buffers\n");
427 // return false;
428 // }
429 // if(posix_memalign((void **)&testing_int16, 8, 8 * context->audioFrames * sizeof(int16_t))) {
430 // printf("Error allocating float buffers\n");
431 // return false;
432 // }
410 433
411 // Allocate audio buffers 434 // Allocate audio buffers
435 #ifdef USE_NEON_FORMAT_CONVERSION
436 if(posix_memalign((void **)&context->audioIn, 16, 2 * context->audioFrames * sizeof(float))) {
437 printf("Error allocating audio input buffer\n");
438 return 1;
439 }
440 if(posix_memalign((void **)&context->audioOut, 16, 2 * context->audioFrames * sizeof(float))) {
441 printf("Error allocating audio output buffer\n");
442 return 1;
443 }
444 #else
412 context->audioIn = (float *)malloc(2 * context->audioFrames * sizeof(float)); 445 context->audioIn = (float *)malloc(2 * context->audioFrames * sizeof(float));
413 context->audioOut = (float *)malloc(2 * context->audioFrames * sizeof(float)); 446 context->audioOut = (float *)malloc(2 * context->audioFrames * sizeof(float));
414 if(context->audioIn == 0 || context->audioOut == 0) { 447 if(context->audioIn == 0 || context->audioOut == 0) {
415 rt_printf("Error: couldn't allocate audio buffers\n"); 448 rt_printf("Error: couldn't allocate audio buffers\n");
416 return 1; 449 return 1;
417 } 450 }
418 451 #endif
452
419 // Allocate analog buffers 453 // Allocate analog buffers
420 if(analog_enabled) { 454 if(analog_enabled) {
455 #ifdef USE_NEON_FORMAT_CONVERSION
456 if(posix_memalign((void **)&context->analogIn, 16,
457 context->analogChannels * context->analogFrames * sizeof(float))) {
458 printf("Error allocating analog input buffer\n");
459 return 1;
460 }
461 if(posix_memalign((void **)&context->analogOut, 16,
462 context->analogChannels * context->analogFrames * sizeof(float))) {
463 printf("Error allocating analog output buffer\n");
464 return 1;
465 }
466 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float));
467
468 if(last_analog_out_frame == 0) {
469 rt_printf("Error: couldn't allocate analog persistence buffer\n");
470 return 1;
471 }
472 #else
421 context->analogIn = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); 473 context->analogIn = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float));
422 context->analogOut = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); 474 context->analogOut = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float));
423 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); 475 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float));
424 476
425 if(context->analogIn == 0 || context->analogOut == 0 || last_analog_out_frame == 0) { 477 if(context->analogIn == 0 || context->analogOut == 0 || last_analog_out_frame == 0) {
426 rt_printf("Error: couldn't allocate analog buffers\n"); 478 rt_printf("Error: couldn't allocate analog buffers\n");
427 return 1; 479 return 1;
428 } 480 }
429 481 #endif
482
430 memset(last_analog_out_frame, 0, context->analogChannels * sizeof(float)); 483 memset(last_analog_out_frame, 0, context->analogChannels * sizeof(float));
431 } 484 }
432 485
433 // Allocate digital buffers 486 // Allocate digital buffers
434 digital_buffer0 = pru_buffer_digital; 487 digital_buffer0 = pru_buffer_digital;
578 // Set the test pin high 631 // Set the test pin high
579 xenomai_gpio[GPIO_SETDATAOUT] = TEST_PIN_MASK; 632 xenomai_gpio[GPIO_SETDATAOUT] = TEST_PIN_MASK;
580 } 633 }
581 634
582 // Convert short (16-bit) samples to float 635 // Convert short (16-bit) samples to float
583 // TODO: NEON 636 #ifdef USE_NEON_FORMAT_CONVERSION
584 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) 637 int16_to_float_audio(2 * context->audioFrames, &pru_buffer_audio_adc[pru_audio_offset], context->audioIn);
638 #else
639 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) {
585 context->audioIn[n] = (float)pru_buffer_audio_adc[n + pru_audio_offset] / 32768.0f; 640 context->audioIn[n] = (float)pru_buffer_audio_adc[n + pru_audio_offset] / 32768.0f;
586 641 }
642 #endif
643
587 if(analog_enabled) { 644 if(analog_enabled) {
588 if(mux_channels != 0) { 645 if(mux_channels != 0) {
589 // If multiplexer is enabled, find out which channels we have by pulling out 646 // If multiplexer is enabled, find out which channels we have by pulling out
590 // the place that it ended. 647 // the place that it ended.
591 // int lastMuxChannel = pru_buffer_comm[PRU_MUX_END_CHANNEL]; 648 // int lastMuxChannel = pru_buffer_comm[PRU_MUX_END_CHANNEL];
592 649
593 // TODO 650 // TODO
594 } 651 }
595 652
596 // TODO: NEON 653 #ifdef USE_NEON_FORMAT_CONVERSION
597 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) 654 int16_to_float_analog(context->analogChannels * context->analogFrames,
655 &pru_buffer_spi_adc[pru_spi_offset], context->analogIn);
656 #else
657 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) {
598 context->analogIn[n] = (float)pru_buffer_spi_adc[n + pru_spi_offset] / 65536.0f; 658 context->analogIn[n] = (float)pru_buffer_spi_adc[n + pru_spi_offset] / 65536.0f;
659 }
660 #endif
599 661
600 if(context->flags & BELA_FLAG_ANALOG_OUTPUTS_PERSIST) { 662 if(context->flags & BELA_FLAG_ANALOG_OUTPUTS_PERSIST) {
601 // Initialize the output buffer with the values that were in the last frame of the previous output 663 // Initialize the output buffer with the values that were in the last frame of the previous output
602 for(unsigned int ch = 0; ch < context->analogChannels; ch++){ 664 for(unsigned int ch = 0; ch < context->analogChannels; ch++){
603 for(unsigned int n = 0; n < context->analogFrames; n++){ 665 for(unsigned int n = 0; n < context->analogFrames; n++){
640 last_analog_out_frame[ch] = context->analogOut[context->analogChannels * (context->analogFrames - 1) + ch]; 702 last_analog_out_frame[ch] = context->analogOut[context->analogChannels * (context->analogFrames - 1) + ch];
641 } 703 }
642 } 704 }
643 705
644 // Convert float back to short for SPI output 706 // Convert float back to short for SPI output
707 #ifdef USE_NEON_FORMAT_CONVERSION
708 float_to_int16_analog(context->analogChannels * context->analogFrames,
709 context->analogOut, (uint16_t*)&pru_buffer_spi_dac[pru_spi_offset]);
710 #else
645 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { 711 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) {
646 int out = context->analogOut[n] * 65536.0f; 712 int out = context->analogOut[n] * 65536.0f;
647 if(out < 0) out = 0; 713 if(out < 0) out = 0;
648 else if(out > 65535) out = 65535; 714 else if(out > 65535) out = 65535;
649 pru_buffer_spi_dac[n + pru_spi_offset] = (uint16_t)out; 715 pru_buffer_spi_dac[n + pru_spi_offset] = (uint16_t)out;
650 } 716 }
717 #endif
651 } 718 }
652 719
653 if(digital_enabled) { // keep track of past digital values 720 if(digital_enabled) { // keep track of past digital values
654 for(unsigned int n = 0; n < context->digitalFrames; n++){ 721 for(unsigned int n = 0; n < context->digitalFrames; n++){
655 last_digital_buffer[n] = context->digital[n]; 722 last_digital_buffer[n] = context->digital[n];
656 } 723 }
657 } 724 }
658 725
659 // Convert float back to short for audio 726 // Convert float back to short for audio
660 // TODO: NEON 727 #ifdef USE_NEON_FORMAT_CONVERSION
728 float_to_int16_audio(2 * context->audioFrames, context->audioOut, &pru_buffer_audio_dac[pru_audio_offset]);
729 #else
661 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { 730 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) {
662 int out = context->audioOut[n] * 32768.0f; 731 int out = context->audioOut[n] * 32768.0f;
663 if(out < -32768) out = -32768; 732 if(out < -32768) out = -32768;
664 else if(out > 32767) out = 32767; 733 else if(out > 32767) out = 32767;
665 pru_buffer_audio_dac[n + pru_audio_offset] = (int16_t)out; 734 pru_buffer_audio_dac[n + pru_audio_offset] = (int16_t)out;
666 } 735 }
736 #endif
667 737
668 // Increment total number of samples that have elapsed 738 // Increment total number of samples that have elapsed
669 context->audioFramesElapsed += context->audioFrames; 739 context->audioFramesElapsed += context->audioFrames;
670 740
671 if(xenomai_gpio != 0) { 741 if(xenomai_gpio != 0) {