diff pru_rtaudio.p @ 108:3068421c0737 ultra-staging

Merged default into ultra-staging
author Giulio Moro <giuliomoro@yahoo.it>
date Tue, 18 Aug 2015 00:35:15 +0100
parents 31ca45939a0c
children c706be7daad7
line wrap: on
line diff
--- a/pru_rtaudio.p	Mon Jun 08 01:07:48 2015 +0100
+++ b/pru_rtaudio.p	Tue Aug 18 00:35:15 2015 +0100
@@ -31,7 +31,8 @@
 #define GPIO_CLEARDATAOUT 0x190
 #define GPIO_SETDATAOUT 0x194
 
-#define PRU0_ARM_INTERRUPT 19
+#define PRU0_ARM_INTERRUPT 19	// Interrupt signalling we're done
+#define PRU1_ARM_INTERRUPT 20	// Interrupt signalling a block is ready
 
 #define C_ADC_DAC_MEM C24     // PRU0 mem
 #ifdef DBOX_CAPE
@@ -144,6 +145,7 @@
 #define MCASP_RFIFOCTL			0x1008
 #define MCASP_RFIFOSTS			0x100C
 
+#define MCASP_XSTAT_XUNDRN_BIT          0        // Bit to test if there was an underrun
 #define MCASP_XSTAT_XDATA_BIT           5        // Bit to test for transmit ready
 #define MCASP_RSTAT_RDATA_BIT           5        // Bit to test for receive ready 
 	
@@ -382,23 +384,23 @@
 
 .macro HANG //useful for debugging
 DALOOP: 
-set r30.t14
-clr r30.t14
+    set r30.t14
+    clr r30.t14
 QBA DALOOP
 .endm	
 
 // Bring CS line low to write to DAC
 .macro DAC_CS_ASSERT
-      MOV r27, DAC_CS_PIN
-      MOV r28, DAC_GPIO + GPIO_CLEARDATAOUT
-      SBBO r27, r28, 0, 4
+     MOV r27, DAC_CS_PIN
+     MOV r28, DAC_GPIO + GPIO_CLEARDATAOUT
+     SBBO r27, r28, 0, 4
 .endm
 
 // Bring CS line high at end of DAC transaction
 .macro DAC_CS_UNASSERT
-      MOV r27, DAC_CS_PIN
-      MOV r28, DAC_GPIO + GPIO_SETDATAOUT
-      SBBO r27, r28, 0, 4
+     MOV r27, DAC_CS_PIN
+     MOV r28, DAC_GPIO + GPIO_SETDATAOUT
+     SBBO r27, r28, 0, 4
 .endm
 
 // Write to DAC TX register
@@ -410,314 +412,313 @@
 // Wait for SPI to finish (uses RXS indicator)
 .macro DAC_WAIT_FOR_FINISH
  LOOP:
-      LBBO r27, reg_spi_addr, SPI_CH0STAT, 4
-      QBBC LOOP, r27, 0
+     LBBO r27, reg_spi_addr, SPI_CH0STAT, 4
+     QBBC LOOP, r27, 0
 .endm
 
 // Read the RX word to clear
 .macro DAC_DISCARD_RX
-      LBBO r27, reg_spi_addr, SPI_CH0RX, 4
+     LBBO r27, reg_spi_addr, SPI_CH0RX, 4
 .endm
 
 // Complete DAC write with chip select
 .macro DAC_WRITE
 .mparam reg
-      DAC_CS_ASSERT
-      DAC_TX reg
-      DAC_WAIT_FOR_FINISH
-      DAC_CS_UNASSERT
-      DAC_DISCARD_RX
+     DAC_CS_ASSERT
+     DAC_TX reg
+     DAC_WAIT_FOR_FINISH
+     DAC_CS_UNASSERT
+     DAC_DISCARD_RX
 .endm
 
 // Bring CS line low to write to ADC
 .macro ADC_CS_ASSERT
-      MOV r27, ADC_CS_PIN
-      MOV r28, ADC_GPIO + GPIO_CLEARDATAOUT
-      SBBO r27, r28, 0, 4
+     MOV r27, ADC_CS_PIN
+     MOV r28, ADC_GPIO + GPIO_CLEARDATAOUT
+     SBBO r27, r28, 0, 4
 .endm
 
 // Bring CS line high at end of ADC transaction
 .macro ADC_CS_UNASSERT
-      MOV r27, ADC_CS_PIN
-      MOV r28, ADC_GPIO + GPIO_SETDATAOUT
-      SBBO r27, r28, 0, 4
+     MOV r27, ADC_CS_PIN
+     MOV r28, ADC_GPIO + GPIO_SETDATAOUT
+     SBBO r27, r28, 0, 4
 .endm
 
 // Write to ADC TX register
 .macro ADC_TX
 .mparam data
-      SBBO data, reg_spi_addr, SPI_CH1TX, 4
+     SBBO data, reg_spi_addr, SPI_CH1TX, 4
 .endm
 
 // Wait for SPI to finish (uses RXS indicator)
 .macro ADC_WAIT_FOR_FINISH
  LOOP:
-      LBBO r27, reg_spi_addr, SPI_CH1STAT, 4
-      QBBC LOOP, r27, 0
+     LBBO r27, reg_spi_addr, SPI_CH1STAT, 4
+     QBBC LOOP, r27, 0
 .endm
 
 // Read the RX word to clear; store output
 .macro ADC_RX
 .mparam data
-      LBBO data, reg_spi_addr, SPI_CH1RX, 4
+     LBBO data, reg_spi_addr, SPI_CH1RX, 4
 .endm
 
 // Complete ADC write+read with chip select
 .macro ADC_WRITE
 .mparam in, out
-      ADC_CS_ASSERT
-      ADC_TX in
-      ADC_WAIT_FOR_FINISH
-      ADC_RX out
-      ADC_CS_UNASSERT
+     ADC_CS_ASSERT
+     ADC_TX in
+     ADC_WAIT_FOR_FINISH
+     ADC_RX out
+     ADC_CS_UNASSERT
 .endm
 
 // Complete ADC write+read with chip select and also performs IO for digital
 .macro ADC_WRITE_GPIO
 .mparam in, out, do_gpio
-      ADC_CS_ASSERT
-      ADC_TX in
-      QBBC GPIO_DONE, reg_flags, FLAG_BIT_USE_DIGITAL //skip if DIGITAL is disabled
-      AND r27, do_gpio, 0x3 // only do a DIGITAL every 2 SPI I/O
-      QBNE GPIO_DONE, r27, 0 
+     ADC_CS_ASSERT
+     ADC_TX in
+     QBBC GPIO_DONE, reg_flags, FLAG_BIT_USE_DIGITAL //skip if DIGITAL is disabled
+     AND r27, do_gpio, 0x3 // only do a DIGITAL every 2 SPI I/O
+     QBNE GPIO_DONE, r27, 0 
 //from here to GPIO_DONE takes 1.8us, while usually ADC_WAIT_FOR_FINISH only waits for 1.14us.
 //TODO: it would be better to split the DIGITAL stuff in two parts:
 //- one taking place during DAC_WRITE which sets the GPIO_OE
 //- and the other during ADC_WRITE which actually reads DATAIN and writes CLEAR/SET DATAOUT
                             //r27 is actually r27, so do not use r27 from here to ...
-      LBBO r27, reg_digital_current, 0, 4 
-      JAL r28.w0, DIGITAL // note that this is not called as a macro, but with JAL. r28 will contain the return address
-      SBBO r27, reg_digital_current, 0,   4 
+     LBBO r27, reg_digital_current, 0, 4 
+     JAL r28.w0, DIGITAL // note that this is not called as a macro, but with JAL. r28 will contain the return address
+     SBBO r27, reg_digital_current, 0,   4 
                             //..here you can start using r27 again
-      ADD reg_digital_current, reg_digital_current, 4 //increment pointer
+     ADD reg_digital_current, reg_digital_current, 4 //increment pointer
 GPIO_DONE:
-      ADC_WAIT_FOR_FINISH
-      ADC_RX out
-      ADC_CS_UNASSERT
+     ADC_WAIT_FOR_FINISH
+     ADC_RX out
+     ADC_CS_UNASSERT
 .endm
 
 // Write a McASP register
 .macro MCASP_REG_WRITE
 .mparam reg, value
-      MOV r27, value
-      SBBO r27, reg_mcasp_addr, reg, 4
+     MOV r27, value
+     SBBO r27, reg_mcasp_addr, reg, 4
 .endm
 
 // Write a McASP register beyond the 0xFF boundary
 .macro MCASP_REG_WRITE_EXT
 .mparam reg, value
-      MOV r27, value
-      MOV r28, reg
-      ADD r28, reg_mcasp_addr, r28
-      SBBO r27, r28, 0, 4
+     MOV r27, value
+     MOV r28, reg
+     ADD r28, reg_mcasp_addr, r28
+     SBBO r27, r28, 0, 4
 .endm
 
 // Read a McASP register
 .macro MCASP_REG_READ
 .mparam reg, value
-      LBBO value, reg_mcasp_addr, reg, 4
+     LBBO value, reg_mcasp_addr, reg, 4
 .endm
 	
 // Read a McASP register beyond the 0xFF boundary
 .macro MCASP_REG_READ_EXT
 .mparam reg, value
-      MOV r28, reg
-      ADD r28, reg_mcasp_addr, r28
-      LBBO value, r28, 0, 4
+     MOV r28, reg
+     ADD r28, reg_mcasp_addr, r28
+     LBBO value, r28, 0, 4
 .endm
 	
 // Set a bit and wait for it to come up
 .macro MCASP_REG_SET_BIT_AND_POLL
 .mparam reg, mask
-      MOV r27, mask
-      LBBO r28, reg_mcasp_addr, reg, 4
-      OR r28, r28, r27
-      SBBO r28, reg_mcasp_addr, reg, 4
+     MOV r27, mask
+     LBBO r28, reg_mcasp_addr, reg, 4
+     OR r28, r28, r27
+     SBBO r28, reg_mcasp_addr, reg, 4
 POLL:
-      LBBO r28, reg_mcasp_addr, reg, 4
-      AND r28, r28, r27
-      QBEQ POLL, r28, 0
+     LBBO r28, reg_mcasp_addr, reg, 4
+     AND r28, r28, r27
+     QBEQ POLL, r28, 0
 .endm
    
 START:
-      // Set up c24 and c25 offsets with CTBIR register
-      // Thus C24 points to start of PRU0 RAM
-      MOV r3, 0x22020       // CTBIR0
-      MOV r2, 0
-      SBBO r2, r3, 0, 4
+     // Set up c24 and c25 offsets with CTBIR register
+     // Thus C24 points to start of PRU0 RAM
+     MOV r3, 0x22020       // CTBIR0
+     MOV r2, 0
+     SBBO r2, r3, 0, 4
 
-      // Set up c28 pointer offset for shared PRU RAM
-      MOV r3, 0x22028       // CTPPR0
-      MOV r2, 0x00000120    // To get address 0x00012000
-      SBBO r2, r3, 0, 4
+     // Set up c28 pointer offset for shared PRU RAM
+     MOV r3, 0x22028       // CTPPR0
+     MOV r2, 0x00000120    // To get address 0x00012000
+     SBBO r2, r3, 0, 4
 	
-      // Load useful registers for addressing SPI
-      MOV reg_comm_addr, SHARED_COMM_MEM_BASE
-      MOV reg_spi_addr, SPI_BASE
-      MOV reg_mcasp_addr, MCASP_BASE
+     // Load useful registers for addressing SPI
+     MOV reg_comm_addr, SHARED_COMM_MEM_BASE
+     MOV reg_spi_addr, SPI_BASE
+     MOV reg_mcasp_addr, MCASP_BASE
 	
-      // Set ARM such that PRU can write to registers
-      LBCO r0, C4, 4, 4
-      CLR r0, r0, 4
-      SBCO r0, C4, 4, 4
+     // Set ARM such that PRU can write to registers
+     LBCO r0, C4, 4, 4
+     CLR r0, r0, 4
+     SBCO r0, C4, 4, 4
 
-      // Clear flags
-      MOV reg_flags, 0
+     // Clear flags
+     MOV reg_flags, 0
+     // Default number of channels in case SPI disabled
+     LDI reg_num_channels, 8
 
-      // Default number of channels in case SPI disabled
-      LDI reg_num_channels, 8
-	
-      // Find out whether we should use DIGITAL
-      LBBO r2, reg_comm_addr, COMM_USE_DIGITAL, 4
-      QBEQ DIGITAL_INIT_DONE, r2, 0 // if we use digital
-      SET reg_flags, reg_flags, FLAG_BIT_USE_DIGITAL 
+     // Find out whether we should use DIGITAL
+     LBBO r2, reg_comm_addr, COMM_USE_DIGITAL, 4
+     QBEQ DIGITAL_INIT_DONE, r2, 0 // if we use digital
+     SET reg_flags, reg_flags, FLAG_BIT_USE_DIGITAL 
 /* This block of code is not really needed, as the memory is initialized by ARM before the PRU is started.
 Will leave it here for future reference
 DIGITAL_INIT: //set the digital buffer to 0x0000ffff (all inputs), to prevent unwanted high outputs
               //the loop is unrolled by a factor of four just to take advantage of the speed of SBBO on larger byte bursts, but there is no real need for it
-      MOV r2, 0x0000ffff //value to store. 0x0000ffff means all inputs
-      MOV r3, MEM_DIGITAL_BASE //start of the digital buffer
-      MOV r4, MEM_DIGITAL_BASE+2*MEM_DIGITAL_BUFFER1_OFFSET //end of the digital buffer
+     MOV r2, 0x0000ffff //value to store. 0x0000ffff means all inputs
+     MOV r3, MEM_DIGITAL_BASE //start of the digital buffer
+     MOV r4, MEM_DIGITAL_BASE+2*MEM_DIGITAL_BUFFER1_OFFSET //end of the digital buffer
 DIGITAL_INIT_BUFFER_LOOP:
-      SBBO r2, r3, 0, 4 
-      ADD r3, r3, 4 //increment pointer
-      QBGT DIGITAL_INIT_BUFFER_LOOP, r3, r4 //loop until we reach the end of the buffer
+     SBBO r2, r3, 0, 4 
+     ADD r3, r3, 4 //increment pointer
+     QBGT DIGITAL_INIT_BUFFER_LOOP, r3, r4 //loop until we reach the end of the buffer
 */
 DIGITAL_INIT_DONE:
-      // Find out whether we should use SPI ADC and DAC
-      LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
-      QBEQ SPI_FLAG_CHECK_DONE, r2, 0
-      SET reg_flags, reg_flags, FLAG_BIT_USE_SPI
+     // Find out whether we should use SPI ADC and DAC
+     LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
+     QBEQ SPI_FLAG_CHECK_DONE, r2, 0
+     SET reg_flags, reg_flags, FLAG_BIT_USE_SPI
 SPI_FLAG_CHECK_DONE:
-      // If we don't use SPI, then skip all this init
-      QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI
+     // If we don't use SPI, then skip all this init
+     QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI
 
-      // Load the number of channels: valid values are 8, 4 or 2
-      LBBO reg_num_channels, reg_comm_addr, COMM_NUM_CHANNELS, 4
-      QBGT SPI_NUM_CHANNELS_LT8, reg_num_channels, 8 // 8 > num_channels ?
-      LDI reg_num_channels, 8		// If N >= 8, N = 8
-      QBA SPI_NUM_CHANNELS_DONE
+     // Load the number of channels: valid values are 8, 4 or 2
+     LBBO reg_num_channels, reg_comm_addr, COMM_NUM_CHANNELS, 4
+     QBGT SPI_NUM_CHANNELS_LT8, reg_num_channels, 8 // 8 > num_channels ?
+     LDI reg_num_channels, 8		// If N >= 8, N = 8
+     QBA SPI_NUM_CHANNELS_DONE
 SPI_NUM_CHANNELS_LT8:	
-      QBGT SPI_NUM_CHANNELS_LT4, reg_num_channels, 4 // 4 > num_channels ?
-      LDI reg_num_channels, 4		// If N >= 4, N = 4
-      QBA SPI_NUM_CHANNELS_DONE
+     QBGT SPI_NUM_CHANNELS_LT4, reg_num_channels, 4 // 4 > num_channels ?
+     LDI reg_num_channels, 4		// If N >= 4, N = 4
+     QBA SPI_NUM_CHANNELS_DONE
 SPI_NUM_CHANNELS_LT4:
-      LDI reg_num_channels, 2		// else N = 2
+     LDI reg_num_channels, 2		// else N = 2
 SPI_NUM_CHANNELS_DONE:	
 	
-      // Init SPI clock
-      MOV r2, 0x02
-      MOV r3, CLOCK_BASE + CLOCK_SPI0
-      SBBO r2, r3, 0, 4
+     // Init SPI clock
+     MOV r2, 0x02
+     MOV r3, CLOCK_BASE + CLOCK_SPI0
+     SBBO r2, r3, 0, 4
 
-      // Reset SPI and wait for finish
-      MOV r2, 0x02
-      SBBO r2, reg_spi_addr, SPI_SYSCONFIG, 4
+     // Reset SPI and wait for finish
+     MOV r2, 0x02
+     SBBO r2, reg_spi_addr, SPI_SYSCONFIG, 4
 
 SPI_WAIT_RESET:
-      LBBO r2, reg_spi_addr, SPI_SYSSTATUS, 4
-      QBBC SPI_WAIT_RESET, r2, 0
+     LBBO r2, reg_spi_addr, SPI_SYSSTATUS, 4
+     QBBC SPI_WAIT_RESET, r2, 0
 	
-      // Turn off SPI channels
-      MOV r2, 0
-      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
-      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4
+     // Turn off SPI channels
+     MOV r2, 0
+     SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
+     SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4
   
-      // Set to master; chip select lines enabled (CS0 used for DAC)
-      MOV r2, 0x00
-      SBBO r2, reg_spi_addr, SPI_MODULCTRL, 4
+     // Set to master; chip select lines enabled (CS0 used for DAC)
+     MOV r2, 0x00
+     SBBO r2, reg_spi_addr, SPI_MODULCTRL, 4
   
-      // Configure CH0 for DAC
-      MOV r2, (3 << 27) | (DAC_DPE << 16) | (DAC_TRM << 12) | ((DAC_WL - 1) << 7) | (DAC_CLK_DIV << 2) | DAC_CLK_MODE | (1 << 6)
-      SBBO r2, reg_spi_addr, SPI_CH0CONF, 4
+     // Configure CH0 for DAC
+     MOV r2, (3 << 27) | (DAC_DPE << 16) | (DAC_TRM << 12) | ((DAC_WL - 1) << 7) | (DAC_CLK_DIV << 2) | DAC_CLK_MODE | (1 << 6)
+     SBBO r2, reg_spi_addr, SPI_CH0CONF, 4
 
-      // Configure CH1 for ADC
-      MOV r2, (3 << 27) | (ADC_DPE << 16) | (ADC_TRM << 12) | ((ADC_WL - 1) << 7) | (ADC_CLK_DIV << 2) | ADC_CLK_MODE
-      SBBO r2, reg_spi_addr, SPI_CH1CONF, 4
+     // Configure CH1 for ADC
+     MOV r2, (3 << 27) | (ADC_DPE << 16) | (ADC_TRM << 12) | ((ADC_WL - 1) << 7) | (ADC_CLK_DIV << 2) | ADC_CLK_MODE
+     SBBO r2, reg_spi_addr, SPI_CH1CONF, 4
    
-      // Turn on SPI channels
-      MOV r2, 0x01
-      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
-      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4   
+     // Turn on SPI channels
+     MOV r2, 0x01
+     SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
+     SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4   
 
-      // DAC power-on reset sequence
-      MOV r2, (0x07 << AD5668_COMMAND_OFFSET)
-      DAC_WRITE r2
+     // DAC power-on reset sequence
+     MOV r2, (0x07 << AD5668_COMMAND_OFFSET)
+     DAC_WRITE r2
 
-      // Initialise ADC
-      MOV r2, AD7699_CFG_MASK | (0 << AD7699_CHANNEL_OFFSET) | (0 << AD7699_SEQ_OFFSET)
-      ADC_WRITE r2, r2
+     // Initialise ADC
+     MOV r2, AD7699_CFG_MASK | (0 << AD7699_CHANNEL_OFFSET) | (0 << AD7699_SEQ_OFFSET)
+     ADC_WRITE r2, r2
 
-      // Enable DAC internal reference
-      MOV r2, (0x08 << AD5668_COMMAND_OFFSET) | (0x01 << AD5668_REF_OFFSET)
-      DAC_WRITE r2
+     // Enable DAC internal reference
+     MOV r2, (0x08 << AD5668_COMMAND_OFFSET) | (0x01 << AD5668_REF_OFFSET)
+     DAC_WRITE r2
 	
-      // Read ADC ch0 and ch1: result is always 2 samples behind so start here
-      MOV r2, AD7699_CFG_MASK | (0x00 << AD7699_CHANNEL_OFFSET)
-      ADC_WRITE r2, r2
+     // Read ADC ch0 and ch1: result is always 2 samples behind so start here
+     MOV r2, AD7699_CFG_MASK | (0x00 << AD7699_CHANNEL_OFFSET)
+     ADC_WRITE r2, r2
 
-      MOV r2, AD7699_CFG_MASK | (0x01 << AD7699_CHANNEL_OFFSET)
-      ADC_WRITE r2, r2
+     MOV r2, AD7699_CFG_MASK | (0x01 << AD7699_CHANNEL_OFFSET)
+     ADC_WRITE r2, r2
 SPI_INIT_DONE:	
 	
-// Prepare McASP0 for audio
-MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
-MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
-MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
-MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
-MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
-MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
-MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0
+    // Prepare McASP0 for audio
+    MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
+    MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
+    MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
+    MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
+    MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
+    MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
+    MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0
 
-MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
-MCASP_REG_WRITE MCASP_PFUNC, 0x00		// All pins are McASP
-MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
-MCASP_REG_WRITE MCASP_DLBCTL, 0x00
-MCASP_REG_WRITE MCASP_DITCTL, 0x00
-MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
-MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
-MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
-MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
-MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
-MCASP_REG_WRITE MCASP_RTDM, 0x03		// Enable TDM slots 0 and 1
-MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
-MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
-MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
-MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
-MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
-MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
-MCASP_REG_WRITE MCASP_XTDM, 0x03		// Enable TDM slots 0 and 1
-MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
+    MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
+    MCASP_REG_WRITE MCASP_PFUNC, 0x00		// All pins are McASP
+    MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
+    MCASP_REG_WRITE MCASP_DLBCTL, 0x00
+    MCASP_REG_WRITE MCASP_DITCTL, 0x00
+    MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
+    MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
+    MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
+    MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
+    MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
+    MCASP_REG_WRITE MCASP_RTDM, 0x03		// Enable TDM slots 0 and 1
+    MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
+    MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
+    MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
+    MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
+    MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
+    MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
+    MCASP_REG_WRITE MCASP_XTDM, 0x03		// Enable TDM slots 0 and 1
+    MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
 	
-MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
-MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
-MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00	// Disable FIFOs
-MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00
+    MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
+    MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
+    MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00	// Disable FIFOs
+    MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00
 
-MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
-MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors
+    MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
+    MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors
 
-MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
-MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST
+    MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
+    MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST
 
 // The above write sequence will have temporarily changed the AHCLKX frequency
 // The PLL needs time to settle or the sample rate will be unstable and possibly
 // cause an underrun. Give it ~1ms before going on.
 // 10ns per loop iteration = 10^-8s --> 10^5 iterations needed
 
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_SETDATAOUT
-      SBBO r2, r3, 0, 4
+     MOV r2, 1 << 28
+     MOV r3, GPIO1 + GPIO_SETDATAOUT
+     SBBO r2, r3, 0, 4
 
 MOV r2, 100000
 MCASP_INIT_WAIT:	
-      SUB r2, r2, 1
-      QBNE MCASP_INIT_WAIT, r2, 0
+     SUB r2, r2, 1
+     QBNE MCASP_INIT_WAIT, r2, 0
 
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
-      SBBO r2, r3, 0, 4
-	
+     MOV r2, 1 << 28
+     MOV r3, GPIO1 + GPIO_CLEARDATAOUT
+     SBBO r2, r3, 0, 4
+
 MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
 MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
 MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
@@ -777,263 +778,263 @@
 // Here we are out of sync by one TDM slot since the 0 word transmitted above will have occupied
 // the first output slot. Send one more word before jumping into the loop.
 MCASP_DAC_WAIT_BEFORE_LOOP:	
-      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
-      QBBC MCASP_DAC_WAIT_BEFORE_LOOP, r2, MCASP_XSTAT_XDATA_BIT
+     LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
+     QBBC MCASP_DAC_WAIT_BEFORE_LOOP, r2, MCASP_XSTAT_XDATA_BIT
 
-      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00
+     MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00
 
 // Likewise, read and discard the first sample we get back from the ADC. This keeps the DAC and ADC
 // in sync in terms of which TDM slot we are reading (empirically found that we should throw this away
 // rather than keep it and invert the phase)
 MCASP_ADC_WAIT_BEFORE_LOOP:
-      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
-      QBBC MCASP_ADC_WAIT_BEFORE_LOOP, r2, MCASP_RSTAT_RDATA_BIT
+     LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+     QBBC MCASP_ADC_WAIT_BEFORE_LOOP, r2, MCASP_RSTAT_RDATA_BIT
 
-      MCASP_REG_READ_EXT MCASP_RBUF, r2
+     MCASP_REG_READ_EXT MCASP_RBUF, r2
 	
 WRITE_ONE_BUFFER:
 
-      // Write a single buffer of DAC samples and read a buffer of ADC samples
-      // Load starting positions
-      MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
-      LMBD r2, reg_num_channels, 1		// 1, 2 or 3 for 2, 4 or 8 channels
-      LSL reg_adc_current, reg_frame_total, r2
-      LSL reg_adc_current, reg_adc_current, 2   // N * 2 * 2 * bufsize
-      ADD reg_adc_current, reg_adc_current, reg_dac_current // ADC: starts N * 2 * 2 * bufsize beyond DAC
-      MOV reg_mcasp_dac_current, reg_mcasp_buf0 // McASP: set current DAC pointer
-      LSL reg_mcasp_adc_current, reg_frame_total, r2 // McASP ADC: starts (N/2)*2*2*bufsize beyond DAC
-      LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
-      ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
-      MOV reg_frame_current, 0
-      QBBS DIGITAL_BASE_CHECK_SET, reg_flags, FLAG_BIT_BUFFER1  //check which buffer we are using for DIGITAL
+     // Write a single buffer of DAC samples and read a buffer of ADC samples
+     // Load starting positions
+     MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
+     LMBD r2, reg_num_channels, 1		// 1, 2 or 3 for 2, 4 or 8 channels
+     LSL reg_adc_current, reg_frame_total, r2
+     LSL reg_adc_current, reg_adc_current, 2   // N * 2 * 2 * bufsize
+     ADD reg_adc_current, reg_adc_current, reg_dac_current // ADC: starts N * 2 * 2 * bufsize beyond DAC
+    MOV reg_mcasp_dac_current, reg_mcasp_buf0 // McASP: set current DAC pointer
+     LSL reg_mcasp_adc_current, reg_frame_total, r2 // McASP ADC: starts (N/2)*2*2*bufsize beyond DAC
+     LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
+     ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
+     MOV reg_frame_current, 0
+     QBBS DIGITAL_BASE_CHECK_SET, reg_flags, FLAG_BIT_BUFFER1  //check which buffer we are using for DIGITAL
                   // if we are here, we are using buffer0 
-      MOV reg_digital_current, MEM_DIGITAL_BASE
-      QBA DIGITAL_BASE_CHECK_DONE
+     MOV reg_digital_current, MEM_DIGITAL_BASE
+     QBA DIGITAL_BASE_CHECK_DONE
 DIGITAL_BASE_CHECK_SET: //if we are here, we are using buffer1 
-      MOV reg_digital_current, MEM_DIGITAL_BASE+MEM_DIGITAL_BUFFER1_OFFSET //so adjust offset appropriately
+     MOV reg_digital_current, MEM_DIGITAL_BASE+MEM_DIGITAL_BUFFER1_OFFSET //so adjust offset appropriately
 DIGITAL_BASE_CHECK_DONE:
 
 WRITE_LOOP:
-      // Write N channels to DAC from successive values in memory
-      // At the same time, read N channels from ADC
-      // Unrolled by a factor of 2 to get high and low words
-      MOV r1, 0
+     // Write N channels to DAC from successive values in memory
+     // At the same time, read N channels from ADC
+     // Unrolled by a factor of 2 to get high and low words
+     MOV r1, 0
 ADC_DAC_LOOP:
-      QBBC SPI_DAC_LOAD_DONE, reg_flags, FLAG_BIT_USE_SPI
-      // Load next 2 SPI DAC samples and store zero in their place
-      LBCO reg_dac_data, C_ADC_DAC_MEM, reg_dac_current, 4
-      MOV r2, 0
-      SBCO r2, C_ADC_DAC_MEM, reg_dac_current, 4
-      ADD reg_dac_current, reg_dac_current, 4
+     QBBC SPI_DAC_LOAD_DONE, reg_flags, FLAG_BIT_USE_SPI
+     // Load next 2 SPI DAC samples and store zero in their place
+     LBCO reg_dac_data, C_ADC_DAC_MEM, reg_dac_current, 4
+     MOV r2, 0
+     SBCO r2, C_ADC_DAC_MEM, reg_dac_current, 4
+     ADD reg_dac_current, reg_dac_current, 4
 SPI_DAC_LOAD_DONE:
 
-      // On even iterations, load two more samples and choose the first one
-      // On odd iterations, transmit the second of the samples already loaded
-      // QBBS MCASP_DAC_HIGH_WORD, r1, 1
-      QBBS MCASP_DAC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
+     // On even iterations, load two more samples and choose the first one
+     // On odd iterations, transmit the second of the samples already loaded
+     // QBBS MCASP_DAC_HIGH_WORD, r1, 1
+     QBBS MCASP_DAC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
 MCASP_DAC_LOW_WORD:	
-      // Load next 2 Audio DAC samples and store zero in their place
-      LBCO reg_mcasp_dac_data, C_MCASP_MEM, reg_mcasp_dac_current, 4
-      MOV r2, 0
-      SBCO r2, C_MCASP_MEM, reg_mcasp_dac_current, 4
-      ADD reg_mcasp_dac_current, reg_mcasp_dac_current, 4
+     // Load next 2 Audio DAC samples and store zero in their place
+     LBCO reg_mcasp_dac_data, C_MCASP_MEM, reg_mcasp_dac_current, 4
+     MOV r2, 0
+     SBCO r2, C_MCASP_MEM, reg_mcasp_dac_current, 4
+     ADD reg_mcasp_dac_current, reg_mcasp_dac_current, 4
 
-      // Mask out the low word (first in little endian)
-      MOV r2, 0xFFFF
-      AND r7, reg_mcasp_dac_data, r2
+     // Mask out the low word (first in little endian)
+     MOV r2, 0xFFFF
+     AND r7, reg_mcasp_dac_data, r2
 	
-      QBA MCASP_WAIT_XSTAT
+     QBA MCASP_WAIT_XSTAT
 MCASP_DAC_HIGH_WORD:
-      // Take the high word of the previously loaded data
-      LSR r7, reg_mcasp_dac_data, 16
+     // Take the high word of the previously loaded data
+     LSR r7, reg_mcasp_dac_data, 16
 	
-      // Every 2 channels we send one audio sample; this loop already
-      // sends exactly two SPI channels.
-      // Wait for McASP XSTAT[XDATA] to set indicating we can write more data
+     // Every 2 channels we send one audio sample; this loop already
+     // sends exactly two SPI channels.
+     // Wait for McASP XSTAT[XDATA] to set indicating we can write more data
 MCASP_WAIT_XSTAT:
-      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
-      QBBC MCASP_WAIT_XSTAT, r2, MCASP_XSTAT_XDATA_BIT
+     LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
+     QBBS START, r2, MCASP_XSTAT_XUNDRN_BIT // if underrun occurred, reset the PRU
+     QBBC MCASP_WAIT_XSTAT, r2, MCASP_XSTAT_XDATA_BIT
 
-      MCASP_REG_WRITE_EXT MCASP_XBUF, r7
+     MCASP_REG_WRITE_EXT MCASP_XBUF, r7
 	
-      // Same idea with ADC: even iterations, load the sample into the low word, odd
-      // iterations, load the sample into the high word and store
-      // QBBS MCASP_ADC_HIGH_WORD, r1, 1
-      QBBS MCASP_ADC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
+     // Same idea with ADC: even iterations, load the sample into the low word, odd
+     // iterations, load the sample into the high word and store
+     // QBBS MCASP_ADC_HIGH_WORD, r1, 1
+     QBBS MCASP_ADC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
 MCASP_ADC_LOW_WORD:	
-      // Start ADC data at 0
-      LDI reg_mcasp_adc_data, 0
+     // Start ADC data at 0
+     LDI reg_mcasp_adc_data, 0
 	
-      // Now wait for a received word to become available from the audio ADC
+     // Now wait for a received word to become available from the audio ADC
 MCASP_WAIT_RSTAT_LOW:
-      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
-      QBBC MCASP_WAIT_RSTAT_LOW, r2, MCASP_RSTAT_RDATA_BIT
+     LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+     QBBC MCASP_WAIT_RSTAT_LOW, r2, MCASP_RSTAT_RDATA_BIT
 
-      // Mask low word and store in ADC data register
-      MCASP_REG_READ_EXT MCASP_RBUF, r3
-      MOV r2, 0xFFFF
-      AND reg_mcasp_adc_data, r3, r2
-      QBA MCASP_ADC_DONE
+     // Mask low word and store in ADC data register
+     MCASP_REG_READ_EXT MCASP_RBUF, r3
+     MOV r2, 0xFFFF
+     AND reg_mcasp_adc_data, r3, r2
+     QBA MCASP_ADC_DONE
 
 MCASP_ADC_HIGH_WORD:	
-      // Wait for a received word to become available from the audio ADC
+     // Wait for a received word to become available from the audio ADC
 MCASP_WAIT_RSTAT_HIGH:
-      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
-      QBBC MCASP_WAIT_RSTAT_HIGH, r2, MCASP_RSTAT_RDATA_BIT
+     LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+     QBBC MCASP_WAIT_RSTAT_HIGH, r2, MCASP_RSTAT_RDATA_BIT
 
-      // Read data and shift 16 bits to the left (into the high word)
-      MCASP_REG_READ_EXT MCASP_RBUF, r3
-      LSL r3, r3, 16
-      OR reg_mcasp_adc_data, reg_mcasp_adc_data, r3
+     // Read data and shift 16 bits to the left (into the high word)
+     MCASP_REG_READ_EXT MCASP_RBUF, r3
+     LSL r3, r3, 16
+     OR reg_mcasp_adc_data, reg_mcasp_adc_data, r3
 
-      // Now store the result and increment the pointer
-      SBCO reg_mcasp_adc_data, C_MCASP_MEM, reg_mcasp_adc_current, 4
-      ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
+     // Now store the result and increment the pointer
+     SBCO reg_mcasp_adc_data, C_MCASP_MEM, reg_mcasp_adc_current, 4
+     ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
 MCASP_ADC_DONE:	
-      QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
+     QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
 
-      // DAC: transmit low word (first in little endian)
-      MOV r2, 0xFFFF
-      AND r7, reg_dac_data, r2
-      LSL r7, r7, AD5668_DATA_OFFSET
-      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
-      OR r7, r7, r8
-      LSL r8, r1, AD5668_ADDRESS_OFFSET
-      OR r7, r7, r8
-      DAC_WRITE r7
+     // DAC: transmit low word (first in little endian)
+     MOV r2, 0xFFFF
+     AND r7, reg_dac_data, r2
+     LSL r7, r7, AD5668_DATA_OFFSET
+     MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
+     OR r7, r7, r8
+     LSL r8, r1, AD5668_ADDRESS_OFFSET
+     OR r7, r7, r8
+     DAC_WRITE r7
 
-      // Read ADC channels: result is always 2 commands behind
-      // Start by reading channel 2 (result is channel 0) and go
-      // to N+2, but masking the channel number to be between 0 and N-1
-      LDI reg_adc_data, 0
-      ADD r8, r1, 2
-      SUB r7, reg_num_channels, 1
-      AND r8, r8, r7
-      LSL r8, r8, AD7699_CHANNEL_OFFSET
-      MOV r7, AD7699_CFG_MASK
-      OR r7, r7, r8
+     // Read ADC channels: result is always 2 commands behind
+     // Start by reading channel 2 (result is channel 0) and go
+     // to N+2, but masking the channel number to be between 0 and N-1
+     LDI reg_adc_data, 0
+     ADD r8, r1, 2
+     SUB r7, reg_num_channels, 1
+     AND r8, r8, r7
+     LSL r8, r8, AD7699_CHANNEL_OFFSET
+     MOV r7, AD7699_CFG_MASK
+     OR r7, r7, r8
 
-//ssssssssssssssssssssssssssss
-      ADC_WRITE_GPIO r7, r7, r1
+     ADC_WRITE_GPIO r7, r7, r1
 
-      // Mask out only the relevant 16 bits and store in reg_adc_data
-      MOV r2, 0xFFFF
-      AND reg_adc_data, r7, r2
+     // Mask out only the relevant 16 bits and store in reg_adc_data
+     MOV r2, 0xFFFF
+     AND reg_adc_data, r7, r2
+     // Increment channel index
+     ADD r1, r1, 1
 
-      // Increment channel index
-      ADD r1, r1, 1
+     // DAC: transmit high word (second in little endian)
+     LSR r7, reg_dac_data, 16
+     LSL r7, r7, AD5668_DATA_OFFSET
+     MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
+     OR r7, r7, r8
+     LSL r8, r1, AD5668_ADDRESS_OFFSET
+     OR r7, r7, r8
+     DAC_WRITE r7
 
-      // DAC: transmit high word (second in little endian)
-      LSR r7, reg_dac_data, 16
-      LSL r7, r7, AD5668_DATA_OFFSET
-      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
-      OR r7, r7, r8
-      LSL r8, r1, AD5668_ADDRESS_OFFSET
-      OR r7, r7, r8
-      DAC_WRITE r7
+     // Read ADC channels: result is always 2 commands behind
+     // Start by reading channel 2 (result is channel 0) and go
+     // to N+2, but masking the channel number to be between 0 and N-1
+     ADD r8, r1, 2
+     SUB r7, reg_num_channels, 1
+     AND r8, r8, r7
+     LSL r8, r8, AD7699_CHANNEL_OFFSET
+     MOV r7, AD7699_CFG_MASK
+     OR r7, r7, r8
+     ADC_WRITE r7, r7
 
-      // Read ADC channels: result is always 2 commands behind
-      // Start by reading channel 2 (result is channel 0) and go
-      // to N+2, but masking the channel number to be between 0 and N-1
-      ADD r8, r1, 2
-      SUB r7, reg_num_channels, 1
-      AND r8, r8, r7
-      LSL r8, r8, AD7699_CHANNEL_OFFSET
-      MOV r7, AD7699_CFG_MASK
-      OR r7, r7, r8
-      ADC_WRITE r7, r7
+     // Move this result up to the 16 high bits
+     LSL r7, r7, 16
+     OR reg_adc_data, reg_adc_data, r7
 
-      // Move this result up to the 16 high bits
-      LSL r7, r7, 16
-      OR reg_adc_data, reg_adc_data, r7
+     // Store 2 ADC words in memory
+     SBCO reg_adc_data, C_ADC_DAC_MEM, reg_adc_current, 4
+     ADD reg_adc_current, reg_adc_current, 4
 
-      // Store 2 ADC words in memory
-      SBCO reg_adc_data, C_ADC_DAC_MEM, reg_adc_current, 4
-      ADD reg_adc_current, reg_adc_current, 4
+     // Toggle the high/low word for McASP control (since we send one word out of
+     // 32 bits for each pair of SPI channels)
+     XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
+	
+     // Repeat 4 times for 8 channels (2 samples per loop, r1 += 1 already happened)
+     // For 4 or 2 channels, repeat 2 or 1 times, according to flags
+     ADD r1, r1, 1
+     QBNE ADC_DAC_LOOP, r1, reg_num_channels
+     QBA ADC_DAC_LOOP_DONE
+SPI_SKIP_WRITE:
+     // We get here only if the SPI ADC and DAC are disabled
+     // Just keep the loop going for McASP
 
-      // Toggle the high/low word for McASP control (since we send one word out of
-      // 32 bits for each pair of SPI channels)
-      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
-	
-      // Repeat 4 times for 8 channels (2 samples per loop, r1 += 1 already happened)
-      // For 4 or 2 channels, repeat 2 or 1 times, according to flags
-      ADD r1, r1, 1
-      QBNE ADC_DAC_LOOP, r1, reg_num_channels
-      QBA ADC_DAC_LOOP_DONE
-SPI_SKIP_WRITE:
-      // We get here only if the SPI ADC and DAC are disabled
-      // Just keep the loop going for McASP
+     // Toggle the high/low word for McASP control (since we send one word out of
+     // 32 bits for each pair of SPI channels)
+     XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
 
-      // Toggle the high/low word for McASP control (since we send one word out of
-      // 32 bits for each pair of SPI channels)
-      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
-
-      ADD r1, r1, 2
-      QBNE ADC_DAC_LOOP, r1, reg_num_channels
+     ADD r1, r1, 2
+     QBNE ADC_DAC_LOOP, r1, reg_num_channels
 	
 ADC_DAC_LOOP_DONE:	
-      // Increment number of frames, see if we have more to write
-      ADD reg_frame_current, reg_frame_current, 1
-      QBNE WRITE_LOOP, reg_frame_current, reg_frame_total
+     // Increment number of frames, see if we have more to write
+     ADD reg_frame_current, reg_frame_current, 1
+     QBNE WRITE_LOOP, reg_frame_current, reg_frame_total
 
 WRITE_LOOP_DONE:
-      // Now done, swap the buffers and do the next one
-      // Use r2 as a temp register
-      MOV r2, reg_dac_buf0
-      MOV reg_dac_buf0, reg_dac_buf1
-      MOV reg_dac_buf1, r2
-      MOV r2, reg_mcasp_buf0
-      MOV reg_mcasp_buf0, reg_mcasp_buf1
-      MOV reg_mcasp_buf1, r2
-      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1) //flip the buffer flag
+     // Now done, swap the buffers and do the next one
+     // Use r2 as a temp register
+     MOV r2, reg_dac_buf0
+     MOV reg_dac_buf0, reg_dac_buf1
+     MOV reg_dac_buf1, r2
+     MOV r2, reg_mcasp_buf0
+     MOV reg_mcasp_buf0, reg_mcasp_buf1
+     MOV reg_mcasp_buf1, r2
+     XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1) //flip the buffer flag
 
-      // Notify ARM of buffer swap
-      AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
-      SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4
+     // Notify ARM of buffer swap
+     AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
+     SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4
+     MOV R31.b0, PRU1_ARM_INTERRUPT + 16           // Interrupt to host loop
+	
+     // Increment the frame count in the comm buffer (for status monitoring)
+     LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
+     ADD r2, r2, reg_frame_total
+     SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
 
-      // Increment the frame count in the comm buffer (for status monitoring)
-      LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
-      ADD r2, r2, reg_frame_total
-      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
-
-      // If LED blink enabled, toggle every 4096 frames
-      LBBO r3, reg_comm_addr, COMM_LED_ADDRESS, 4
-      QBEQ LED_BLINK_DONE, r3, 0	
-      MOV r1, 0x1000
-      AND r2, r2, r1          // Test (frame count & 4096)
-      QBEQ LED_BLINK_OFF, r2, 0
-      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4	
-      MOV r1, GPIO_SETDATAOUT
-      ADD r3, r3, r1          // Address for GPIO set register
-      SBBO r2, r3, 0, 4       // Set GPIO pin
-      QBA LED_BLINK_DONE
+     // If LED blink enabled, toggle every 4096 frames
+     LBBO r3, reg_comm_addr, COMM_LED_ADDRESS, 4
+     QBEQ LED_BLINK_DONE, r3, 0	
+     MOV r1, 0x1000
+     AND r2, r2, r1          // Test (frame count & 4096)
+     QBEQ LED_BLINK_OFF, r2, 0
+     LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4	
+     MOV r1, GPIO_SETDATAOUT
+     ADD r3, r3, r1          // Address for GPIO set register
+     SBBO r2, r3, 0, 4       // Set GPIO pin
+     QBA LED_BLINK_DONE
 LED_BLINK_OFF:
-      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4
-      MOV r1, GPIO_CLEARDATAOUT
-      ADD r3, r3, r1          // Address for GPIO clear register
-      SBBO r2, r3, 0, 4       // Clear GPIO pin	
+     LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4
+     MOV r1, GPIO_CLEARDATAOUT
+     ADD r3, r3, r1          // Address for GPIO clear register
+     SBBO r2, r3, 0, 4       // Clear GPIO pin	
 LED_BLINK_DONE:	
-      // Check if we should finish: flag is zero as long as it should run
-      LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
-      QBEQ WRITE_ONE_BUFFER, r2, 0
+     // Check if we should finish: flag is zero as long as it should run
+     LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
+     QBEQ WRITE_ONE_BUFFER, r2, 0
 
 CLEANUP:
-      MCASP_REG_WRITE MCASP_GBLCTL, 0x00	// Turn off McASP
+     MCASP_REG_WRITE MCASP_GBLCTL, 0x00	// Turn off McASP
 
-      // Turn off SPI if enabled
-      QBBC SPI_CLEANUP_DONE, reg_flags, FLAG_BIT_USE_SPI
+     // Turn off SPI if enabled
+     QBBC SPI_CLEANUP_DONE, reg_flags, FLAG_BIT_USE_SPI
 	
-      MOV r3, SPI_BASE + SPI_CH0CONF
-      LBBO r2, r3, 0, 4
-      CLR r2, r2, 13
-      CLR r2, r2, 27
-      SBBO r2, r3, 0, 4
+     MOV r3, SPI_BASE + SPI_CH0CONF
+     LBBO r2, r3, 0, 4
+     CLR r2, r2, 13
+     CLR r2, r2, 27
+     SBBO r2, r3, 0, 4
 
-      MOV r3, SPI_BASE + SPI_CH0CTRL
-      LBBO r2, r3, 0, 4
-      CLR r2, r2, 1
-      SBBO r2, r3, 0, 4      
+     MOV r3, SPI_BASE + SPI_CH0CTRL
+     LBBO r2, r3, 0, 4
+     CLR r2, r2, 1
+     SBBO r2, r3, 0, 4      
 SPI_CLEANUP_DONE:
-      // Signal the ARM that we have finished 
-      MOV R31.b0, PRU0_ARM_INTERRUPT + 16
-      HALT
+     // Signal the ARM that we have finished 
+     MOV R31.b0, PRU0_ARM_INTERRUPT + 16
+     HALT