diff pru_rtaudio.p @ 0:8a575ba3ab52

Initial commit.
author andrewm
date Fri, 31 Oct 2014 19:10:17 +0100
parents
children a6beeba3a648
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pru_rtaudio.p	Fri Oct 31 19:10:17 2014 +0100
@@ -0,0 +1,753 @@
+.origin 0
+.entrypoint START
+
+#define DBOX_CAPE	// Define this to use new cape hardware
+	
+#define CLOCK_BASE  0x44E00000
+#define CLOCK_SPI0  0x4C
+#define CLOCK_SPI1  0x50
+#define CLOCK_L4LS  0x60
+
+#define SPI0_BASE   0x48030100
+#define SPI1_BASE   0x481A0100
+#define SPI_BASE    SPI0_BASE
+	
+#define SPI_SYSCONFIG 0x10
+#define SPI_SYSSTATUS 0x14
+#define SPI_MODULCTRL 0x28
+#define SPI_CH0CONF   0x2C
+#define SPI_CH0STAT   0x30
+#define SPI_CH0CTRL   0x34
+#define SPI_CH0TX     0x38
+#define SPI_CH0RX     0x3C
+#define SPI_CH1CONF   0x40
+#define SPI_CH1STAT   0x44
+#define SPI_CH1CTRL   0x48
+#define SPI_CH1TX     0x4C
+#define SPI_CH1RX     0x50
+
+#define GPIO0 0x44E07000
+#define GPIO1 0x4804C000
+#define GPIO_CLEARDATAOUT 0x190
+#define GPIO_SETDATAOUT 0x194
+
+#define PRU0_ARM_INTERRUPT 19
+
+#define C_ADC_DAC_MEM C24     // PRU0 mem
+#ifdef DBOX_CAPE
+#define DAC_GPIO      GPIO0
+#define DAC_CS_PIN    (1<<5) // GPIO0:5 = P9 pin 17
+#else
+#define DAC_GPIO      GPIO1
+#define DAC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
+#endif
+#define DAC_TRM       0       // SPI transmit and receive
+#define DAC_WL        32      // Word length
+#define DAC_CLK_MODE  1       // SPI mode
+#define DAC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
+#define DAC_DPE       1       // d0 = receive, d1 = transmit
+
+#define AD5668_COMMAND_OFFSET 24
+#define AD5668_ADDRESS_OFFSET 20
+#define AD5668_DATA_OFFSET    4
+#define AD5668_REF_OFFSET     0
+
+#ifdef DBOX_CAPE
+#define ADC_GPIO      GPIO1
+#define ADC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
+#else
+#define ADC_GPIO      GPIO1
+#define ADC_CS_PIN    (1<<17) // GPIO1:17 = P9 pin 23
+#endif
+#define ADC_TRM       0       // SPI transmit and receive
+#define ADC_WL        16      // Word length
+#define ADC_CLK_MODE  0       // SPI mode
+#define ADC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
+#define ADC_DPE       1       // d0 = receive, d1 = transmit
+
+#define AD7699_CFG_MASK       0xF120 // Mask for config update, unipolar, full BW
+#define AD7699_CHANNEL_OFFSET 9      // 7 bits offset of a 14-bit left-justified word
+#define AD7699_SEQ_OFFSET     3      // sequencer (0 = disable, 3 = scan all)
+
+#define SHARED_COMM_MEM_BASE  0x00010000  // Location where comm flags are written
+#define COMM_SHOULD_STOP      0		  // Set to be nonzero when loop should stop
+#define COMM_CURRENT_BUFFER   4           // Which buffer we are on
+#define COMM_BUFFER_FRAMES    8           // How many frames per buffer
+#define COMM_SHOULD_SYNC      12          // Whether to synchronise to an external clock
+#define COMM_SYNC_ADDRESS     16          // Which memory address to find the GPIO on
+#define COMM_SYNC_PIN_MASK    20          // Which pin to read for the sync
+#define COMM_LED_ADDRESS      24          // Which memory address to find the status LED on
+#define COMM_LED_PIN_MASK     28          // Which pin to write to change LED
+#define COMM_FRAME_COUNT      32	  // How many frames have elapse since beginning
+#define COMM_USE_SPI          36          // Whether or not to use SPI ADC and DAC
+	
+#define MCASP0_BASE 0x48038000
+#define MCASP1_BASE 0x4803C000
+
+#define MCASP_PWRIDLESYSCONFIG 		0x04
+#define MCASP_PFUNC			0x10
+#define MCASP_PDIR			0x14
+#define MCASP_PDOUT			0x18
+#define MCASP_PDSET			0x1C
+#define MCASP_PDIN			0x1C
+#define MCASP_PDCLR			0x20
+#define MCASP_GBLCTL			0x44
+#define MCASP_AMUTE			0x48
+#define MCASP_DLBCTL			0x4C
+#define MCASP_DITCTL			0x50
+#define MCASP_RGBLCTL			0x60
+#define MCASP_RMASK			0x64
+#define MCASP_RFMT			0x68
+#define MCASP_AFSRCTL			0x6C
+#define MCASP_ACLKRCTL			0x70
+#define MCASP_AHCLKRCTL			0x74
+#define MCASP_RTDM			0x78
+#define MCASP_RINTCTL			0x7C
+#define MCASP_RSTAT			0x80
+#define MCASP_RSLOT			0x84
+#define MCASP_RCLKCHK			0x88
+#define MCASP_REVTCTL			0x8C
+#define MCASP_XGBLCTL			0xA0
+#define MCASP_XMASK			0xA4
+#define MCASP_XFMT			0xA8
+#define MCASP_AFSXCTL			0xAC
+#define MCASP_ACLKXCTL			0xB0
+#define MCASP_AHCLKXCTL			0xB4
+#define MCASP_XTDM			0xB8
+#define MCASP_XINTCTL			0xBC
+#define MCASP_XSTAT			0xC0
+#define MCASP_XSLOT			0xC4
+#define MCASP_XCLKCHK			0xC8
+#define MCASP_XEVTCTL			0xCC
+#define MCASP_SRCTL0			0x180
+#define MCASP_SRCTL1			0x184
+#define MCASP_SRCTL2			0x188
+#define MCASP_SRCTL3			0x18C
+#define MCASP_SRCTL4			0x190
+#define MCASP_SRCTL5			0x194
+#define MCASP_XBUF0			0x200
+#define MCASP_XBUF1			0x204
+#define MCASP_XBUF2			0x208
+#define MCASP_XBUF3			0x20C
+#define MCASP_XBUF4			0x210
+#define MCASP_XBUF5			0x214
+#define MCASP_RBUF0			0x280
+#define MCASP_RBUF1			0x284
+#define MCASP_RBUF2			0x288
+#define MCASP_RBUF3			0x28C
+#define MCASP_RBUF4			0x290
+#define MCASP_RBUF5			0x294
+#define MCASP_WFIFOCTL			0x1000
+#define MCASP_WFIFOSTS			0x1004
+#define MCASP_RFIFOCTL			0x1008
+#define MCASP_RFIFOSTS			0x100C
+
+#define MCASP_XSTAT_XDATA_BIT           5        // Bit to test for transmit ready
+#define MCASP_RSTAT_RDATA_BIT           5        // Bit to test for receive ready 
+	
+// Constants used for this particular audio setup
+#define MCASP_BASE 	MCASP0_BASE
+#ifdef DBOX_CAPE
+#define MCASP_SRCTL_X	MCASP_SRCTL2	// Ser. 2 is transmitter
+#define MCASP_SRCTL_R	MCASP_SRCTL0	// Ser. 0 is receiver
+#define MCASP_XBUF	MCASP_XBUF2
+#define MCASP_RBUF	MCASP_RBUF0
+#else
+#define MCASP_SRCTL_X	MCASP_SRCTL3	// Ser. 3 is transmitter
+#define MCASP_SRCTL_R	MCASP_SRCTL2	// Ser. 2 is receiver
+#define MCASP_XBUF	MCASP_XBUF3
+#define MCASP_RBUF	MCASP_RBUF2
+#endif
+	
+#define MCASP_PIN_AFSX		(1 << 28)
+#define MCASP_PIN_AHCLKX	(1 << 27)
+#define MCASP_PIN_ACLKX		(1 << 26)
+#define MCASP_PIN_AMUTE		(1 << 25)	// Also, 0 to 3 are XFR0 to XFR3
+
+#ifdef DBOX_CAPE
+#define MCASP_OUTPUT_PINS   	MCASP_PIN_AHCLKX | (1 << 2) // AHCLKX and AXR2 outputs
+#else
+#define MCASP_OUTPUT_PINS   	(1 << 3)	// Which pins are outputs
+#endif
+
+#define MCASP_DATA_MASK 	0xFFFF		// 16 bit data
+#define MCASP_DATA_FORMAT	0x807C		// MSB first, 0 bit delay, 16 bits, CFG bus, ROR 16bits
+
+#define C_MCASP_MEM             C28     // Shared PRU mem
+
+// Flags for the flags register
+#define FLAG_BIT_BUFFER1	0
+#define FLAG_BIT_USE_SPI	1
+	
+// Registers used throughout
+
+// r1, r2, r3 are used for temporary storage
+#define reg_frame_current	r10		// Current frame count in SPI ADC/DAC transfer
+#define reg_frame_total		r11		// Total frame count for SPI ADC/DAC
+#define reg_dac_data		r12		// Current dword for SPI DAC
+#define reg_adc_data		r13		// Current dword for SPI ADC
+#define reg_mcasp_dac_data	r14		// Current dword for McASP DAC
+#define reg_mcasp_adc_data	r15		// Current dword for McASP ADC
+#define reg_dac_buf0		r16		// Start pointer to SPI DAC buffer 0
+#define reg_dac_buf1		r17		// Start pointer to SPI DAC buffer 1
+#define reg_dac_current		r18		// Pointer to current storage location of SPI DAC
+#define reg_adc_current		r19		// Pointer to current storage location of SPI ADC
+#define reg_mcasp_buf0		r20		// Start pointer to McASP DAC buffer 0
+#define reg_mcasp_buf1		r21		// Start pointer to McASP DAC buffer 1
+#define reg_mcasp_dac_current	r22		// Pointer to current storage location of McASP DAC
+#define reg_mcasp_adc_current	r23		// Pointer to current storage location of McASP ADC
+#define reg_flags		r24		// Buffer ID (0 and 1) and other flags
+#define reg_comm_addr		r25		// Memory address for communicating with ARM
+#define reg_spi_addr		r26		// Base address for SPI
+// r27, r28 used in macros
+#define reg_mcasp_addr		r29		// Base address for McASP
+
+	
+// Bring CS line low to write to DAC
+.macro DAC_CS_ASSERT
+      MOV r27, DAC_CS_PIN
+      MOV r28, DAC_GPIO + GPIO_CLEARDATAOUT
+      SBBO r27, r28, 0, 4
+.endm
+
+// Bring CS line high at end of DAC transaction
+.macro DAC_CS_UNASSERT
+      MOV r27, DAC_CS_PIN
+      MOV r28, DAC_GPIO + GPIO_SETDATAOUT
+      SBBO r27, r28, 0, 4
+.endm
+
+// Write to DAC TX register
+.macro DAC_TX
+.mparam data
+      SBBO data, reg_spi_addr, SPI_CH0TX, 4
+.endm
+
+// Wait for SPI to finish (uses RXS indicator)
+.macro DAC_WAIT_FOR_FINISH
+ LOOP:
+      LBBO r27, reg_spi_addr, SPI_CH0STAT, 4
+      QBBC LOOP, r27, 0
+.endm
+
+// Read the RX word to clear
+.macro DAC_DISCARD_RX
+      LBBO r27, reg_spi_addr, SPI_CH0RX, 4
+.endm
+
+// Complete DAC write with chip select
+.macro DAC_WRITE
+.mparam reg
+      DAC_CS_ASSERT
+      DAC_TX reg
+      DAC_WAIT_FOR_FINISH
+      DAC_CS_UNASSERT
+      DAC_DISCARD_RX
+.endm
+
+// Bring CS line low to write to ADC
+.macro ADC_CS_ASSERT
+      MOV r27, ADC_CS_PIN
+      MOV r28, ADC_GPIO + GPIO_CLEARDATAOUT
+      SBBO r27, r28, 0, 4
+.endm
+
+// Bring CS line high at end of ADC transaction
+.macro ADC_CS_UNASSERT
+      MOV r27, ADC_CS_PIN
+      MOV r28, ADC_GPIO + GPIO_SETDATAOUT
+      SBBO r27, r28, 0, 4
+.endm
+
+// Write to ADC TX register
+.macro ADC_TX
+.mparam data
+      SBBO data, reg_spi_addr, SPI_CH1TX, 4
+.endm
+
+// Wait for SPI to finish (uses RXS indicator)
+.macro ADC_WAIT_FOR_FINISH
+ LOOP:
+      LBBO r27, reg_spi_addr, SPI_CH1STAT, 4
+      QBBC LOOP, r27, 0
+.endm
+
+// Read the RX word to clear; store output
+.macro ADC_RX
+.mparam data
+      LBBO data, reg_spi_addr, SPI_CH1RX, 4
+.endm
+
+// Complete ADC write+read with chip select
+.macro ADC_WRITE
+.mparam in, out
+      ADC_CS_ASSERT
+      ADC_TX in
+      ADC_WAIT_FOR_FINISH
+      ADC_RX out
+      ADC_CS_UNASSERT
+.endm
+
+// Write a McASP register
+.macro MCASP_REG_WRITE
+.mparam reg, value
+      MOV r27, value
+      SBBO r27, reg_mcasp_addr, reg, 4
+.endm
+
+// Write a McASP register beyond the 0xFF boundary
+.macro MCASP_REG_WRITE_EXT
+.mparam reg, value
+      MOV r27, value
+      MOV r28, reg
+      ADD r28, reg_mcasp_addr, r28
+      SBBO r27, r28, 0, 4
+.endm
+
+// Read a McASP register
+.macro MCASP_REG_READ
+.mparam reg, value
+      LBBO value, reg_mcasp_addr, reg, 4
+.endm
+	
+// Read a McASP register beyond the 0xFF boundary
+.macro MCASP_REG_READ_EXT
+.mparam reg, value
+      MOV r28, reg
+      ADD r28, reg_mcasp_addr, r28
+      LBBO value, r28, 0, 4
+.endm
+	
+// Set a bit and wait for it to come up
+.macro MCASP_REG_SET_BIT_AND_POLL
+.mparam reg, mask
+      MOV r27, mask
+      LBBO r28, reg_mcasp_addr, reg, 4
+      OR r28, r28, r27
+      SBBO r28, reg_mcasp_addr, reg, 4
+POLL:
+      LBBO r28, reg_mcasp_addr, reg, 4
+      AND r28, r28, r27
+      QBEQ POLL, r28, 0
+.endm
+   
+START:
+      // Set up c24 and c25 offsets with CTBIR register
+      // Thus C24 points to start of PRU0 RAM
+      MOV r3, 0x22020       // CTBIR0
+      MOV r2, 0
+      SBBO r2, r3, 0, 4
+
+      // Set up c28 pointer offset for shared PRU RAM
+      MOV r3, 0x22028       // CTPPR0
+      MOV r2, 0x00000120    // To get address 0x00012000
+      SBBO r2, r3, 0, 4
+	
+      // Load useful registers for addressing SPI
+      MOV reg_comm_addr, SHARED_COMM_MEM_BASE
+      MOV reg_spi_addr, SPI_BASE
+      MOV reg_mcasp_addr, MCASP_BASE
+	
+      // Set ARM such that PRU can write to registers
+      LBCO r0, C4, 4, 4
+      CLR r0, r0, 4
+      SBCO r0, C4, 4, 4
+
+      // Clear flags
+      MOV reg_flags, 0
+
+      // Find out whether we should use SPI ADC and DAC
+      LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
+      QBEQ SPI_FLAG_CHECK_DONE, r2, 0
+      SET reg_flags, reg_flags, FLAG_BIT_USE_SPI
+
+SPI_FLAG_CHECK_DONE:
+      // If we don't use SPI, then skip all this init
+      QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI
+	
+      // Init SPI clock
+      MOV r2, 0x02
+      MOV r3, CLOCK_BASE + CLOCK_SPI0
+      SBBO r2, r3, 0, 4
+
+      // Reset SPI and wait for finish
+      MOV r2, 0x02
+      SBBO r2, reg_spi_addr, SPI_SYSCONFIG, 4
+
+SPI_WAIT_RESET:
+      LBBO r2, reg_spi_addr, SPI_SYSSTATUS, 4
+      QBBC SPI_WAIT_RESET, r2, 0
+	
+      // Turn off SPI channels
+      MOV r2, 0
+      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
+      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4
+  
+      // Set to master; chip select lines enabled (CS0 used for DAC)
+      MOV r2, 0x00
+      SBBO r2, reg_spi_addr, SPI_MODULCTRL, 4
+  
+      // Configure CH0 for DAC
+      MOV r2, (3 << 27) | (DAC_DPE << 16) | (DAC_TRM << 12) | ((DAC_WL - 1) << 7) | (DAC_CLK_DIV << 2) | DAC_CLK_MODE | (1 << 6)
+      SBBO r2, reg_spi_addr, SPI_CH0CONF, 4
+
+      // Configure CH1 for ADC
+      MOV r2, (3 << 27) | (ADC_DPE << 16) | (ADC_TRM << 12) | ((ADC_WL - 1) << 7) | (ADC_CLK_DIV << 2) | ADC_CLK_MODE
+      SBBO r2, reg_spi_addr, SPI_CH1CONF, 4
+   
+      // Turn on SPI channels
+      MOV r2, 0x01
+      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
+      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4   
+
+      // DAC power-on reset sequence
+      MOV r2, (0x07 << AD5668_COMMAND_OFFSET)
+      DAC_WRITE r2
+
+      // Initialise ADC
+      MOV r2, AD7699_CFG_MASK | (0 << AD7699_CHANNEL_OFFSET) | (0 << AD7699_SEQ_OFFSET)
+      ADC_WRITE r2, r2
+
+      // Enable DAC internal reference
+      MOV r2, (0x08 << AD5668_COMMAND_OFFSET) | (0x01 << AD5668_REF_OFFSET)
+      DAC_WRITE r2
+	
+      // Read ADC ch0 and ch1: result is always 2 samples behind so start here
+      MOV r2, AD7699_CFG_MASK | (0x00 << AD7699_CHANNEL_OFFSET)
+      ADC_WRITE r2, r2
+
+      MOV r2, AD7699_CFG_MASK | (0x01 << AD7699_CHANNEL_OFFSET)
+      ADC_WRITE r2, r2
+SPI_INIT_DONE:	
+	
+// Prepare McASP0 for audio
+MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
+MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
+MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0
+
+MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
+MCASP_REG_WRITE MCASP_PFUNC, 0x00		// All pins are McASP
+MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
+MCASP_REG_WRITE MCASP_DLBCTL, 0x00
+MCASP_REG_WRITE MCASP_DITCTL, 0x00
+MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
+MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
+MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
+MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
+MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
+MCASP_REG_WRITE MCASP_RTDM, 0x03		// Enable TDM slots 0 and 1
+MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
+MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
+MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
+MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
+MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
+MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
+MCASP_REG_WRITE MCASP_XTDM, 0x03		// Enable TDM slots 0 and 1
+MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
+	
+MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
+MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
+MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00	// Disable FIFOs
+MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00
+
+MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
+MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors
+
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST
+
+// The above write sequence will have temporarily changed the AHCLKX frequency
+// The PLL needs time to settle or the sample rate will be unstable and possibly
+// cause an underrun. Give it ~1ms before going on.
+// 10ns per loop iteration = 10^-8s --> 10^5 iterations needed
+
+      MOV r2, 1 << 28
+      MOV r3, GPIO1 + GPIO_SETDATAOUT
+      SBBO r2, r3, 0, 4
+
+MOV r2, 100000
+MCASP_INIT_WAIT:	
+      SUB r2, r2, 1
+      QBNE MCASP_INIT_WAIT, r2, 0
+
+      MOV r2, 1 << 28
+      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
+      SBBO r2, r3, 0, 4
+	
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 10)	// Set XSRCLR
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 3)	// Set RSMRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 11)	// Set XSMRST
+
+MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00		// Write to the transmit buffer to prevent underflow
+
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 4)	// Set RFRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 12)	// Set XFRST
+
+// Initialisation
+LBBO reg_frame_total, reg_comm_addr, COMM_BUFFER_FRAMES, 4  // Total frame count (SPI; 2x for McASP)
+MOV reg_dac_buf0, 0                      // DAC buffer 0 start pointer
+LSL reg_dac_buf1, reg_frame_total, 4     // DAC buffer 1 start pointer = 8[ch]*2[bytes]*bufsize
+MOV reg_mcasp_buf0, 0			 // McASP DAC buffer 0 start pointer
+LSL reg_mcasp_buf1, reg_frame_total, 3   // McASP DAC buffer 1 start pointer = 2[ch]*2[bytes]*2[samples/spi]*bufsize
+CLR reg_flags, reg_flags, FLAG_BIT_BUFFER1  // Bit 0 holds which buffer we are on
+MOV r2, 0
+SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4  // Start with frame count of 0
+	
+// Here we are out of sync by one TDM slot since the 0 word transmitted above will have occupied
+// the first output slot. Send one more word before jumping into the loop.
+MCASP_DAC_WAIT_BEFORE_LOOP:	
+      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
+      QBBC MCASP_DAC_WAIT_BEFORE_LOOP, r2, MCASP_XSTAT_XDATA_BIT
+
+      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00
+
+// Likewise, read and discard the first sample we get back from the ADC. This keeps the DAC and ADC
+// in sync in terms of which TDM slot we are reading (empirically found that we should throw this away
+// rather than keep it and invert the phase)
+MCASP_ADC_WAIT_BEFORE_LOOP:
+      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+      QBBC MCASP_ADC_WAIT_BEFORE_LOOP, r2, MCASP_RSTAT_RDATA_BIT
+
+      MCASP_REG_READ_EXT MCASP_RBUF, r2
+	
+WRITE_ONE_BUFFER:
+      // Write a single buffer of DAC samples and read a buffer of ADC samples
+      // Load starting positions
+      MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
+      LSL reg_adc_current, reg_frame_total, 5   // 16 * 2 * bufsize
+      ADD reg_adc_current, reg_adc_current, reg_dac_current // ADC: starts 16 * 2 * bufsize beyond DAC
+      MOV reg_mcasp_dac_current, reg_mcasp_buf0 // McASP: set current DAC pointer
+      LSL reg_mcasp_adc_current, reg_frame_total, 4 // McASP ADC: starts 4*2*2*bufsize beyond DAC
+      ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
+      MOV reg_frame_current, 0
+	
+WRITE_LOOP:
+      // Write 8 channels to DAC from successive values in memory
+      // At the same time, read 8 channels from ADC
+      // Unrolled by a factor of 2 to get high and low words
+      MOV r1, 0
+ADC_DAC_LOOP:
+      QBBC SPI_DAC_LOAD_DONE, reg_flags, FLAG_BIT_USE_SPI
+      // Load next 2 SPI DAC samples and store zero in their place
+      LBCO reg_dac_data, C_ADC_DAC_MEM, reg_dac_current, 4
+      MOV r2, 0
+      SBCO r2, C_ADC_DAC_MEM, reg_dac_current, 4
+      ADD reg_dac_current, reg_dac_current, 4
+SPI_DAC_LOAD_DONE:
+
+      // On even iterations, load two more samples and choose the first one
+      // On odd iterations, transmit the second of the samples already loaded
+      QBBS MCASP_DAC_HIGH_WORD, r1, 1
+MCASP_DAC_LOW_WORD:	
+      // Load next 2 Audio DAC samples and store zero in their place
+      LBCO reg_mcasp_dac_data, C_MCASP_MEM, reg_mcasp_dac_current, 4
+      MOV r2, 0
+      SBCO r2, C_MCASP_MEM, reg_mcasp_dac_current, 4
+      ADD reg_mcasp_dac_current, reg_mcasp_dac_current, 4
+
+      // Mask out the low word (first in little endian)
+      MOV r2, 0xFFFF
+      AND r7, reg_mcasp_dac_data, r2
+	
+      QBA MCASP_WAIT_XSTAT
+MCASP_DAC_HIGH_WORD:
+      // Take the high word of the previously loaded data
+      LSR r7, reg_mcasp_dac_data, 16
+	
+      // Two audio frames per SPI frame = 4 audio samples per SPI frame
+      // Therefore every 2 channels we send one audio sample; this loop already
+      // sends exactly two SPI channels.
+      // Wait for McASP XSTAT[XDATA] to set indicating we can write more data
+MCASP_WAIT_XSTAT:
+      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
+      QBBC MCASP_WAIT_XSTAT, r2, MCASP_XSTAT_XDATA_BIT
+
+      MCASP_REG_WRITE_EXT MCASP_XBUF, r7
+	
+      // Same idea with ADC: even iterations, load the sample into the low word, odd
+      // iterations, load the sample into the high word and store
+      QBBS MCASP_ADC_HIGH_WORD, r1, 1
+MCASP_ADC_LOW_WORD:	
+      // Start ADC data at 0
+      LDI reg_mcasp_adc_data, 0
+	
+      // Now wait for a received word to become available from the audio ADC
+MCASP_WAIT_RSTAT_LOW:
+      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+      QBBC MCASP_WAIT_RSTAT_LOW, r2, MCASP_RSTAT_RDATA_BIT
+
+      // Mask low word and store in ADC data register
+      MCASP_REG_READ_EXT MCASP_RBUF, r3
+      MOV r2, 0xFFFF
+      AND reg_mcasp_adc_data, r3, r2
+      QBA MCASP_ADC_DONE
+
+MCASP_ADC_HIGH_WORD:	
+      // Wait for a received word to become available from the audio ADC
+MCASP_WAIT_RSTAT_HIGH:
+      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
+      QBBC MCASP_WAIT_RSTAT_HIGH, r2, MCASP_RSTAT_RDATA_BIT
+
+      // Read data and shift 16 bits to the left (into the high word)
+      MCASP_REG_READ_EXT MCASP_RBUF, r3
+      LSL r3, r3, 16
+      OR reg_mcasp_adc_data, reg_mcasp_adc_data, r3
+
+      // Now store the result and increment the pointer
+      SBCO reg_mcasp_adc_data, C_MCASP_MEM, reg_mcasp_adc_current, 4
+      ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
+MCASP_ADC_DONE:	
+      QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
+	
+      // DAC: transmit low word (first in little endian)
+      MOV r2, 0xFFFF
+      AND r7, reg_dac_data, r2
+      LSL r7, r7, AD5668_DATA_OFFSET
+      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
+      OR r7, r7, r8
+      LSL r8, r1, AD5668_ADDRESS_OFFSET
+      OR r7, r7, r8
+      DAC_WRITE r7
+
+      // Read ADC channels: result is always 2 commands behind
+      // Start by reading channel 2 (result is channel 0) and go
+      // to 10, but masking the channel number to be between 0 and 7
+      LDI reg_adc_data, 0
+      MOV r7, AD7699_CFG_MASK
+      ADD r8, r1, 2
+      AND r8, r8, 7
+      LSL r8, r8, AD7699_CHANNEL_OFFSET
+      OR r7, r7, r8
+      ADC_WRITE r7, r7
+
+      // Mask out only the relevant 16 bits and store in reg_adc_data
+      MOV r2, 0xFFFF
+      AND reg_adc_data, r7, r2
+
+      // Increment channel index
+      ADD r1, r1, 1
+
+      // DAC: transmit high word (second in little endian)
+      LSR r7, reg_dac_data, 16
+      LSL r7, r7, AD5668_DATA_OFFSET
+      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
+      OR r7, r7, r8
+      LSL r8, r1, AD5668_ADDRESS_OFFSET
+      OR r7, r7, r8
+      DAC_WRITE r7
+
+      // Read ADC channels: result is always 2 commands behind
+      // Start by reading channel 2 (result is channel 0) and go
+      // to 10, but masking the channel number to be between 0 and 7
+      MOV r7, AD7699_CFG_MASK
+      ADD r8, r1, 2
+      AND r8, r8, 7
+      LSL r8, r8, AD7699_CHANNEL_OFFSET
+      OR r7, r7, r8
+      ADC_WRITE r7, r7
+
+      // Move this result up to the 16 high bits
+      LSL r7, r7, 16
+      OR reg_adc_data, reg_adc_data, r7
+
+      // Store 2 ADC words in memory
+      SBCO reg_adc_data, C_ADC_DAC_MEM, reg_adc_current, 4
+      ADD reg_adc_current, reg_adc_current, 4
+
+      // Repeat 4 times (2 samples per loop, r1 += 1 already happened)
+      ADD r1, r1, 1
+      QBNE ADC_DAC_LOOP, r1, 8
+      QBA ADC_DAC_LOOP_DONE
+	
+SPI_SKIP_WRITE:
+      // We get here only if the SPI ADC and DAC are disabled
+      // Just keep the loop going for McASP
+      ADD r1, r1, 2
+      QBNE ADC_DAC_LOOP, r1, 8
+	
+ADC_DAC_LOOP_DONE:	
+      // Increment number of frames, see if we have more to write
+      ADD reg_frame_current, reg_frame_current, 1
+      QBNE WRITE_LOOP, reg_frame_current, reg_frame_total
+
+WRITE_LOOP_DONE:
+      // Now done, swap the buffers and do the next one
+      // Use r2 as a temp register
+      MOV r2, reg_dac_buf0
+      MOV reg_dac_buf0, reg_dac_buf1
+      MOV reg_dac_buf1, r2
+      MOV r2, reg_mcasp_buf0
+      MOV reg_mcasp_buf0, reg_mcasp_buf1
+      MOV reg_mcasp_buf1, r2
+
+      // Notify ARM of buffer swap
+      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1)
+      AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
+      SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4
+
+      // Increment the frame count in the comm buffer (for status monitoring)
+      LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
+      ADD r2, r2, reg_frame_total
+      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
+
+      // If LED blink enabled, toggle every 4096 frames
+      LBBO r3, reg_comm_addr, COMM_LED_ADDRESS, 4
+      QBEQ LED_BLINK_DONE, r3, 0	
+      MOV r1, 0x1000
+      AND r2, r2, r1          // Test (frame count & 4096)
+      QBEQ LED_BLINK_OFF, r2, 0
+      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4	
+      MOV r1, GPIO_SETDATAOUT
+      ADD r3, r3, r1          // Address for GPIO set register
+      SBBO r2, r3, 0, 4       // Set GPIO pin
+      QBA LED_BLINK_DONE
+LED_BLINK_OFF:
+      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4
+      MOV r1, GPIO_CLEARDATAOUT
+      ADD r3, r3, r1          // Address for GPIO clear register
+      SBBO r2, r3, 0, 4       // Clear GPIO pin	
+LED_BLINK_DONE:	
+	
+      QBBC TESTLOW, reg_flags, FLAG_BIT_BUFFER1
+      MOV r2, 1 << 28
+      MOV r3, GPIO1 + GPIO_SETDATAOUT
+      SBBO r2, r3, 0, 4
+      QBA TESTDONE
+TESTLOW:
+      MOV r2, 1 << 28
+      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
+      SBBO r2, r3, 0, 4
+TESTDONE:
+	 
+      // Check if we should finish: flag is zero as long as it should run
+      LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
+      QBEQ WRITE_ONE_BUFFER, r2, 0
+
+CLEANUP:
+      MCASP_REG_WRITE MCASP_GBLCTL, 0x00	// Turn off McASP
+
+      // Turn off SPI if enabled
+      QBBC SPI_CLEANUP_DONE, reg_flags, FLAG_BIT_USE_SPI
+	
+      MOV r3, SPI_BASE + SPI_CH0CONF
+      LBBO r2, r3, 0, 4
+      CLR r2, r2, 13
+      CLR r2, r2, 27
+      SBBO r2, r3, 0, 4
+
+      MOV r3, SPI_BASE + SPI_CH0CTRL
+      LBBO r2, r3, 0, 4
+      CLR r2, r2, 1
+      SBBO r2, r3, 0, 4      
+SPI_CLEANUP_DONE:
+	
+      // Signal the ARM that we have finished 
+      MOV R31.b0, PRU0_ARM_INTERRUPT + 16
+      HALT
\ No newline at end of file