Mercurial > hg > beaglert
comparison core/PRU.cpp @ 318:f7b19ea31bbb prerelease
Added NEON vectorised float<->int converters. Curiously, its performance is worse than the C version. I guess clang is better at vectorising than we thought! The new code remains disabled for now.
author | andrewm |
---|---|
date | Mon, 30 May 2016 01:06:01 +0100 |
parents | 493a07f6ec09 |
children | 2c4ddf4277d1 |
comparison
equal
deleted
inserted
replaced
317:52733b58bdf3 | 318:f7b19ea31bbb |
---|---|
34 #include <native/task.h> | 34 #include <native/task.h> |
35 #include <native/timer.h> | 35 #include <native/timer.h> |
36 #include <rtdk.h> | 36 #include <rtdk.h> |
37 | 37 |
38 using namespace std; | 38 using namespace std; |
39 | |
40 // Select whether to use NEON-based sample conversion | |
41 // (this will probably go away in a future commit once its performance | |
42 // is verified over extended use) | |
43 #undef USE_NEON_FORMAT_CONVERSION | |
39 | 44 |
40 // PRU memory: PRU0 and PRU1 RAM are 8kB (0x2000) long each | 45 // PRU memory: PRU0 and PRU1 RAM are 8kB (0x2000) long each |
41 // PRU-SHARED RAM is 12kB (0x3000) long | 46 // PRU-SHARED RAM is 12kB (0x3000) long |
42 | 47 |
43 #define PRU_MEM_MCASP_OFFSET 0x2000 // Offset within PRU-SHARED RAM | 48 #define PRU_MEM_MCASP_OFFSET 0x2000 // Offset within PRU-SHARED RAM |
107 const unsigned int PRU::kPruGPIOTestPin2 = 31; // GPIO0(31); P9-13 | 112 const unsigned int PRU::kPruGPIOTestPin2 = 31; // GPIO0(31); P9-13 |
108 const unsigned int PRU::kPruGPIOTestPin3 = 26; // GPIO0(26); P8-14 | 113 const unsigned int PRU::kPruGPIOTestPin3 = 26; // GPIO0(26); P8-14 |
109 | 114 |
110 extern int gShouldStop; | 115 extern int gShouldStop; |
111 extern int gRTAudioVerbose; | 116 extern int gRTAudioVerbose; |
117 | |
118 // These four functions are written in assembly in FormatConvert.S | |
119 extern "C" { | |
120 void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer); | |
121 void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer); | |
122 void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer); | |
123 void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer); | |
124 } | |
112 | 125 |
113 // Constructor: specify a PRU number (0 or 1) | 126 // Constructor: specify a PRU number (0 or 1) |
114 PRU::PRU(InternalBelaContext *input_context) | 127 PRU::PRU(InternalBelaContext *input_context) |
115 : context(input_context), pru_number(0), running(false), analog_enabled(false), | 128 : context(input_context), pru_number(0), running(false), analog_enabled(false), |
116 digital_enabled(false), gpio_enabled(false), led_enabled(false), | 129 digital_enabled(false), gpio_enabled(false), led_enabled(false), |
405 close(xenomai_gpio_fd); | 418 close(xenomai_gpio_fd); |
406 xenomai_gpio_fd = -1; | 419 xenomai_gpio_fd = -1; |
407 } | 420 } |
408 } | 421 } |
409 } | 422 } |
423 | |
424 // TESTING | |
425 // if(posix_memalign((void **)&testing_float, 16, 8 * context->audioFrames * sizeof(float))) { | |
426 // printf("Error allocating float buffers\n"); | |
427 // return false; | |
428 // } | |
429 // if(posix_memalign((void **)&testing_int16, 8, 8 * context->audioFrames * sizeof(int16_t))) { | |
430 // printf("Error allocating float buffers\n"); | |
431 // return false; | |
432 // } | |
410 | 433 |
411 // Allocate audio buffers | 434 // Allocate audio buffers |
435 #ifdef USE_NEON_FORMAT_CONVERSION | |
436 if(posix_memalign((void **)&context->audioIn, 16, 2 * context->audioFrames * sizeof(float))) { | |
437 printf("Error allocating audio input buffer\n"); | |
438 return 1; | |
439 } | |
440 if(posix_memalign((void **)&context->audioOut, 16, 2 * context->audioFrames * sizeof(float))) { | |
441 printf("Error allocating audio output buffer\n"); | |
442 return 1; | |
443 } | |
444 #else | |
412 context->audioIn = (float *)malloc(2 * context->audioFrames * sizeof(float)); | 445 context->audioIn = (float *)malloc(2 * context->audioFrames * sizeof(float)); |
413 context->audioOut = (float *)malloc(2 * context->audioFrames * sizeof(float)); | 446 context->audioOut = (float *)malloc(2 * context->audioFrames * sizeof(float)); |
414 if(context->audioIn == 0 || context->audioOut == 0) { | 447 if(context->audioIn == 0 || context->audioOut == 0) { |
415 rt_printf("Error: couldn't allocate audio buffers\n"); | 448 rt_printf("Error: couldn't allocate audio buffers\n"); |
416 return 1; | 449 return 1; |
417 } | 450 } |
418 | 451 #endif |
452 | |
419 // Allocate analog buffers | 453 // Allocate analog buffers |
420 if(analog_enabled) { | 454 if(analog_enabled) { |
455 #ifdef USE_NEON_FORMAT_CONVERSION | |
456 if(posix_memalign((void **)&context->analogIn, 16, | |
457 context->analogChannels * context->analogFrames * sizeof(float))) { | |
458 printf("Error allocating analog input buffer\n"); | |
459 return 1; | |
460 } | |
461 if(posix_memalign((void **)&context->analogOut, 16, | |
462 context->analogChannels * context->analogFrames * sizeof(float))) { | |
463 printf("Error allocating analog output buffer\n"); | |
464 return 1; | |
465 } | |
466 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); | |
467 | |
468 if(last_analog_out_frame == 0) { | |
469 rt_printf("Error: couldn't allocate analog persistence buffer\n"); | |
470 return 1; | |
471 } | |
472 #else | |
421 context->analogIn = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); | 473 context->analogIn = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); |
422 context->analogOut = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); | 474 context->analogOut = (float *)malloc(context->analogChannels * context->analogFrames * sizeof(float)); |
423 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); | 475 last_analog_out_frame = (float *)malloc(context->analogChannels * sizeof(float)); |
424 | 476 |
425 if(context->analogIn == 0 || context->analogOut == 0 || last_analog_out_frame == 0) { | 477 if(context->analogIn == 0 || context->analogOut == 0 || last_analog_out_frame == 0) { |
426 rt_printf("Error: couldn't allocate analog buffers\n"); | 478 rt_printf("Error: couldn't allocate analog buffers\n"); |
427 return 1; | 479 return 1; |
428 } | 480 } |
429 | 481 #endif |
482 | |
430 memset(last_analog_out_frame, 0, context->analogChannels * sizeof(float)); | 483 memset(last_analog_out_frame, 0, context->analogChannels * sizeof(float)); |
431 } | 484 } |
432 | 485 |
433 // Allocate digital buffers | 486 // Allocate digital buffers |
434 digital_buffer0 = pru_buffer_digital; | 487 digital_buffer0 = pru_buffer_digital; |
578 // Set the test pin high | 631 // Set the test pin high |
579 xenomai_gpio[GPIO_SETDATAOUT] = TEST_PIN_MASK; | 632 xenomai_gpio[GPIO_SETDATAOUT] = TEST_PIN_MASK; |
580 } | 633 } |
581 | 634 |
582 // Convert short (16-bit) samples to float | 635 // Convert short (16-bit) samples to float |
583 // TODO: NEON | 636 #ifdef USE_NEON_FORMAT_CONVERSION |
584 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) | 637 int16_to_float_audio(2 * context->audioFrames, &pru_buffer_audio_adc[pru_audio_offset], context->audioIn); |
638 #else | |
639 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { | |
585 context->audioIn[n] = (float)pru_buffer_audio_adc[n + pru_audio_offset] / 32768.0f; | 640 context->audioIn[n] = (float)pru_buffer_audio_adc[n + pru_audio_offset] / 32768.0f; |
586 | 641 } |
642 #endif | |
643 | |
587 if(analog_enabled) { | 644 if(analog_enabled) { |
588 if(mux_channels != 0) { | 645 if(mux_channels != 0) { |
589 // If multiplexer is enabled, find out which channels we have by pulling out | 646 // If multiplexer is enabled, find out which channels we have by pulling out |
590 // the place that it ended. | 647 // the place that it ended. |
591 // int lastMuxChannel = pru_buffer_comm[PRU_MUX_END_CHANNEL]; | 648 // int lastMuxChannel = pru_buffer_comm[PRU_MUX_END_CHANNEL]; |
592 | 649 |
593 // TODO | 650 // TODO |
594 } | 651 } |
595 | 652 |
596 // TODO: NEON | 653 #ifdef USE_NEON_FORMAT_CONVERSION |
597 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) | 654 int16_to_float_analog(context->analogChannels * context->analogFrames, |
655 &pru_buffer_spi_adc[pru_spi_offset], context->analogIn); | |
656 #else | |
657 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { | |
598 context->analogIn[n] = (float)pru_buffer_spi_adc[n + pru_spi_offset] / 65536.0f; | 658 context->analogIn[n] = (float)pru_buffer_spi_adc[n + pru_spi_offset] / 65536.0f; |
659 } | |
660 #endif | |
599 | 661 |
600 if(context->flags & BELA_FLAG_ANALOG_OUTPUTS_PERSIST) { | 662 if(context->flags & BELA_FLAG_ANALOG_OUTPUTS_PERSIST) { |
601 // Initialize the output buffer with the values that were in the last frame of the previous output | 663 // Initialize the output buffer with the values that were in the last frame of the previous output |
602 for(unsigned int ch = 0; ch < context->analogChannels; ch++){ | 664 for(unsigned int ch = 0; ch < context->analogChannels; ch++){ |
603 for(unsigned int n = 0; n < context->analogFrames; n++){ | 665 for(unsigned int n = 0; n < context->analogFrames; n++){ |
640 last_analog_out_frame[ch] = context->analogOut[context->analogChannels * (context->analogFrames - 1) + ch]; | 702 last_analog_out_frame[ch] = context->analogOut[context->analogChannels * (context->analogFrames - 1) + ch]; |
641 } | 703 } |
642 } | 704 } |
643 | 705 |
644 // Convert float back to short for SPI output | 706 // Convert float back to short for SPI output |
707 #ifdef USE_NEON_FORMAT_CONVERSION | |
708 float_to_int16_analog(context->analogChannels * context->analogFrames, | |
709 context->analogOut, (uint16_t*)&pru_buffer_spi_dac[pru_spi_offset]); | |
710 #else | |
645 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { | 711 for(unsigned int n = 0; n < context->analogChannels * context->analogFrames; n++) { |
646 int out = context->analogOut[n] * 65536.0f; | 712 int out = context->analogOut[n] * 65536.0f; |
647 if(out < 0) out = 0; | 713 if(out < 0) out = 0; |
648 else if(out > 65535) out = 65535; | 714 else if(out > 65535) out = 65535; |
649 pru_buffer_spi_dac[n + pru_spi_offset] = (uint16_t)out; | 715 pru_buffer_spi_dac[n + pru_spi_offset] = (uint16_t)out; |
650 } | 716 } |
717 #endif | |
651 } | 718 } |
652 | 719 |
653 if(digital_enabled) { // keep track of past digital values | 720 if(digital_enabled) { // keep track of past digital values |
654 for(unsigned int n = 0; n < context->digitalFrames; n++){ | 721 for(unsigned int n = 0; n < context->digitalFrames; n++){ |
655 last_digital_buffer[n] = context->digital[n]; | 722 last_digital_buffer[n] = context->digital[n]; |
656 } | 723 } |
657 } | 724 } |
658 | 725 |
659 // Convert float back to short for audio | 726 // Convert float back to short for audio |
660 // TODO: NEON | 727 #ifdef USE_NEON_FORMAT_CONVERSION |
728 float_to_int16_audio(2 * context->audioFrames, context->audioOut, &pru_buffer_audio_dac[pru_audio_offset]); | |
729 #else | |
661 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { | 730 for(unsigned int n = 0; n < 2 * context->audioFrames; n++) { |
662 int out = context->audioOut[n] * 32768.0f; | 731 int out = context->audioOut[n] * 32768.0f; |
663 if(out < -32768) out = -32768; | 732 if(out < -32768) out = -32768; |
664 else if(out > 32767) out = 32767; | 733 else if(out > 32767) out = 32767; |
665 pru_buffer_audio_dac[n + pru_audio_offset] = (int16_t)out; | 734 pru_buffer_audio_dac[n + pru_audio_offset] = (int16_t)out; |
666 } | 735 } |
736 #endif | |
667 | 737 |
668 // Increment total number of samples that have elapsed | 738 // Increment total number of samples that have elapsed |
669 context->audioFramesElapsed += context->audioFrames; | 739 context->audioFramesElapsed += context->audioFrames; |
670 | 740 |
671 if(xenomai_gpio != 0) { | 741 if(xenomai_gpio != 0) { |