view pru_rtaudio.p @ 21:0d80ff9e2227

Add float<->int macros to PRU code (still to integrate); formatting cleanups
author andrewm
date Sun, 08 Feb 2015 00:20:01 +0000
parents 6adb088196a7
children 472e892c6e41
line wrap: on
line source
.origin 0
.entrypoint START

#define DBOX_CAPE	// Define this to use new cape hardware
	
#define CLOCK_BASE  0x44E00000
#define CLOCK_SPI0  0x4C
#define CLOCK_SPI1  0x50
#define CLOCK_L4LS  0x60

#define SPI0_BASE   0x48030100
#define SPI1_BASE   0x481A0100
#define SPI_BASE    SPI0_BASE
	
#define SPI_SYSCONFIG 0x10
#define SPI_SYSSTATUS 0x14
#define SPI_MODULCTRL 0x28
#define SPI_CH0CONF   0x2C
#define SPI_CH0STAT   0x30
#define SPI_CH0CTRL   0x34
#define SPI_CH0TX     0x38
#define SPI_CH0RX     0x3C
#define SPI_CH1CONF   0x40
#define SPI_CH1STAT   0x44
#define SPI_CH1CTRL   0x48
#define SPI_CH1TX     0x4C
#define SPI_CH1RX     0x50

#define GPIO0 0x44E07000
#define GPIO1 0x4804C000
#define GPIO_CLEARDATAOUT 0x190
#define GPIO_SETDATAOUT 0x194

#define PRU0_ARM_INTERRUPT 19

#define C_ADC_DAC_MEM C24     // PRU0 mem
#ifdef DBOX_CAPE
#define DAC_GPIO      GPIO0
#define DAC_CS_PIN    (1<<5) // GPIO0:5 = P9 pin 17
#else
#define DAC_GPIO      GPIO1
#define DAC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
#endif
#define DAC_TRM       0       // SPI transmit and receive
#define DAC_WL        32      // Word length
#define DAC_CLK_MODE  1       // SPI mode
#define DAC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
#define DAC_DPE       1       // d0 = receive, d1 = transmit

#define AD5668_COMMAND_OFFSET 24
#define AD5668_ADDRESS_OFFSET 20
#define AD5668_DATA_OFFSET    4
#define AD5668_REF_OFFSET     0

#ifdef DBOX_CAPE
#define ADC_GPIO      GPIO1
#define ADC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
#else
#define ADC_GPIO      GPIO1
#define ADC_CS_PIN    (1<<17) // GPIO1:17 = P9 pin 23
#endif
#define ADC_TRM       0       // SPI transmit and receive
#define ADC_WL        16      // Word length
#define ADC_CLK_MODE  0       // SPI mode
#define ADC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
#define ADC_DPE       1       // d0 = receive, d1 = transmit

#define AD7699_CFG_MASK       0xF120 // Mask for config update, unipolar, full BW
#define AD7699_CHANNEL_OFFSET 9      // 7 bits offset of a 14-bit left-justified word
#define AD7699_SEQ_OFFSET     3      // sequencer (0 = disable, 3 = scan all)

#define SHARED_COMM_MEM_BASE  0x00010000  // Location where comm flags are written
#define COMM_SHOULD_STOP      0		  // Set to be nonzero when loop should stop
#define COMM_CURRENT_BUFFER   4           // Which buffer we are on
#define COMM_BUFFER_FRAMES    8           // How many frames per buffer
#define COMM_SHOULD_SYNC      12          // Whether to synchronise to an external clock
#define COMM_SYNC_ADDRESS     16          // Which memory address to find the GPIO on
#define COMM_SYNC_PIN_MASK    20          // Which pin to read for the sync
#define COMM_LED_ADDRESS      24          // Which memory address to find the status LED on
#define COMM_LED_PIN_MASK     28          // Which pin to write to change LED
#define COMM_FRAME_COUNT      32	  // How many frames have elapse since beginning
#define COMM_USE_SPI          36          // Whether or not to use SPI ADC and DAC
#define COMM_NUM_CHANNELS     40	  // Low 2 bits indicate 8 [0x3], 4 [0x1] or 2 [0x0] channels
	
#define MCASP0_BASE 0x48038000
#define MCASP1_BASE 0x4803C000

#define MCASP_PWRIDLESYSCONFIG 		0x04
#define MCASP_PFUNC			0x10
#define MCASP_PDIR			0x14
#define MCASP_PDOUT			0x18
#define MCASP_PDSET			0x1C
#define MCASP_PDIN			0x1C
#define MCASP_PDCLR			0x20
#define MCASP_GBLCTL			0x44
#define MCASP_AMUTE			0x48
#define MCASP_DLBCTL			0x4C
#define MCASP_DITCTL			0x50
#define MCASP_RGBLCTL			0x60
#define MCASP_RMASK			0x64
#define MCASP_RFMT			0x68
#define MCASP_AFSRCTL			0x6C
#define MCASP_ACLKRCTL			0x70
#define MCASP_AHCLKRCTL			0x74
#define MCASP_RTDM			0x78
#define MCASP_RINTCTL			0x7C
#define MCASP_RSTAT			0x80
#define MCASP_RSLOT			0x84
#define MCASP_RCLKCHK			0x88
#define MCASP_REVTCTL			0x8C
#define MCASP_XGBLCTL			0xA0
#define MCASP_XMASK			0xA4
#define MCASP_XFMT			0xA8
#define MCASP_AFSXCTL			0xAC
#define MCASP_ACLKXCTL			0xB0
#define MCASP_AHCLKXCTL			0xB4
#define MCASP_XTDM			0xB8
#define MCASP_XINTCTL			0xBC
#define MCASP_XSTAT			0xC0
#define MCASP_XSLOT			0xC4
#define MCASP_XCLKCHK			0xC8
#define MCASP_XEVTCTL			0xCC
#define MCASP_SRCTL0			0x180
#define MCASP_SRCTL1			0x184
#define MCASP_SRCTL2			0x188
#define MCASP_SRCTL3			0x18C
#define MCASP_SRCTL4			0x190
#define MCASP_SRCTL5			0x194
#define MCASP_XBUF0			0x200
#define MCASP_XBUF1			0x204
#define MCASP_XBUF2			0x208
#define MCASP_XBUF3			0x20C
#define MCASP_XBUF4			0x210
#define MCASP_XBUF5			0x214
#define MCASP_RBUF0			0x280
#define MCASP_RBUF1			0x284
#define MCASP_RBUF2			0x288
#define MCASP_RBUF3			0x28C
#define MCASP_RBUF4			0x290
#define MCASP_RBUF5			0x294
#define MCASP_WFIFOCTL			0x1000
#define MCASP_WFIFOSTS			0x1004
#define MCASP_RFIFOCTL			0x1008
#define MCASP_RFIFOSTS			0x100C

#define MCASP_XSTAT_XDATA_BIT           5        // Bit to test for transmit ready
#define MCASP_RSTAT_RDATA_BIT           5        // Bit to test for receive ready 
	
// Constants used for this particular audio setup
#define MCASP_BASE 	MCASP0_BASE
#ifdef DBOX_CAPE
#define MCASP_SRCTL_X	MCASP_SRCTL2	// Ser. 2 is transmitter
#define MCASP_SRCTL_R	MCASP_SRCTL0	// Ser. 0 is receiver
#define MCASP_XBUF	MCASP_XBUF2
#define MCASP_RBUF	MCASP_RBUF0
#else
#define MCASP_SRCTL_X	MCASP_SRCTL3	// Ser. 3 is transmitter
#define MCASP_SRCTL_R	MCASP_SRCTL2	// Ser. 2 is receiver
#define MCASP_XBUF	MCASP_XBUF3
#define MCASP_RBUF	MCASP_RBUF2
#endif
	
#define MCASP_PIN_AFSX		(1 << 28)
#define MCASP_PIN_AHCLKX	(1 << 27)
#define MCASP_PIN_ACLKX		(1 << 26)
#define MCASP_PIN_AMUTE		(1 << 25)	// Also, 0 to 3 are XFR0 to XFR3

#ifdef DBOX_CAPE
#define MCASP_OUTPUT_PINS   	MCASP_PIN_AHCLKX | (1 << 2) // AHCLKX and AXR2 outputs
#else
#define MCASP_OUTPUT_PINS   	(1 << 3)	// Which pins are outputs
#endif

#define MCASP_DATA_MASK 	0xFFFF		// 16 bit data
#define MCASP_DATA_FORMAT	0x807C		// MSB first, 0 bit delay, 16 bits, CFG bus, ROR 16bits

#define C_MCASP_MEM             C28     	// Shared PRU mem

// Flags for the flags register
#define FLAG_BIT_BUFFER1	0
#define FLAG_BIT_USE_SPI	1
#define FLAG_BIT_MCASP_HWORD	2		// Whether we are on the high word for McASP transmission
	
// Registers used throughout

// r1, r2, r3 are used for temporary storage
#define reg_num_channels	r9		// Number of SPI ADC/DAC channels to use
#define reg_frame_current	r10		// Current frame count in SPI ADC/DAC transfer
#define reg_frame_total		r11		// Total frame count for SPI ADC/DAC
#define reg_dac_data		r12		// Current dword for SPI DAC
#define reg_adc_data		r13		// Current dword for SPI ADC
#define reg_mcasp_dac_data	r14		// Current dword for McASP DAC
#define reg_mcasp_adc_data	r15		// Current dword for McASP ADC
#define reg_dac_buf0		r16		// Start pointer to SPI DAC buffer 0
#define reg_dac_buf1		r17		// Start pointer to SPI DAC buffer 1
#define reg_dac_current		r18		// Pointer to current storage location of SPI DAC
#define reg_adc_current		r19		// Pointer to current storage location of SPI ADC
#define reg_mcasp_buf0		r20		// Start pointer to McASP DAC buffer 0
#define reg_mcasp_buf1		r21		// Start pointer to McASP DAC buffer 1
#define reg_mcasp_dac_current	r22		// Pointer to current storage location of McASP DAC
#define reg_mcasp_adc_current	r23		// Pointer to current storage location of McASP ADC
#define reg_flags		r24		// Buffer ID (0 and 1) and other flags
#define reg_comm_addr		r25		// Memory address for communicating with ARM
#define reg_spi_addr		r26		// Base address for SPI
// r27, r28 used in macros
#define reg_mcasp_addr		r29		// Base address for McASP

// Convert float to 16-bit int, multiplying by 32768
// Converts -1.0 to 1.0 to a full 16-bit range
// input and output can safely be the same register
.macro FLOAT_TO_INT16
.mparam input, output
      // int exponent = ((input >> 23) & 0xFF)
      LSR r27, input, 23  // exponent goes in r27
      AND r27, r27, 0xFF

      // Actual exponent is 127 less than the above; below -15 we
      // should return 0. So check if it is less than 112.
      QBLE EXPONENT_GREQ_MINUS15, r27, 112
      LDI output, 0
      QBA FLOAT_TO_INT16_DONE
EXPONENT_GREQ_MINUS15:	

      // Next check if exponent is greater than or equal to 0 (i.e.
      // 127 in our adjusted version. If so we return the max.
      QBGT EXPONENT_LT_ZERO, r27, 127
      QBBS NEGATIVE_MAX, input, 31  // Is sign negative?
      LDI output, 32767		    // Max positive value
      QBA FLOAT_TO_INT16_DONE
NEGATIVE_MAX:
      LDI output, 32768	            // Actually will be -32768 in signed
      QBA FLOAT_TO_INT16_DONE
EXPONENT_LT_ZERO:	

      // Mask out the mantissa and shift
      // int mantissa = (input & 0x7FFFFF) | (1 << 23)
      MOV r28, 0x007FFFFF
      AND r28, r28, input
      SET r28, 23

      // Shift right by -(exponent - 127 - 8) to produce an int
      // after effectively multiplying by 2^15
      // ---> (135 - exponent)
      RSB r27, r27, 135
      LSR r28, r28, r27

      // Finally, check the sign bit and invert if needed
      QBBS NEGATIVE_RESULT, input, 31
      // Positive result: but might be 32768 so needs checking
      LDI r27, 0x7FFF
      MIN output, r27, r28	
      QBA FLOAT_TO_INT16_DONE
NEGATIVE_RESULT:
      // Take negative: invert the bits and add 1
      LDI r27, 0xFFFF
      XOR r28, r28, r27
      ADD r28, r28, 1
      CLR output, r28, 16         // Clear carry bit if present
FLOAT_TO_INT16_DONE:
.endm


// Convert float to 16-bit unsigned int, multiplying by 65536
// Converts 0.0 to 1.0 to a full 16-bit range
// input and output can safely be the same register
.macro FLOAT_TO_UINT16
.mparam input, output
      QBBC NONNEGATIVE, input, 31   // Is sign negative?
      LDI output, 0		    // All < 0 inputs produce 0 output
      QBA FLOAT_TO_UINT16_DONE
NONNEGATIVE:
      // int exponent = ((input >> 23) & 0xFF)
      LSR r27, input, 23  // exponent goes in r27
      AND r27, r27, 0xFF

      // Actual exponent is 127 less than the above; below -16 we
      // should return 0. So check if it is less than 111.
      QBLE EXPONENT_GREQ_MINUS16, r27, 111
      LDI output, 0
      QBA FLOAT_TO_UINT16_DONE
EXPONENT_GREQ_MINUS16:	

      // Next check if exponent is greater than or equal to 0 (i.e.
      // 127 in our adjusted version. If so we return the max.
      QBGT EXPONENT_LT_ZERO, r27, 127
      LDI output, 65535		    // Max positive value
      QBA FLOAT_TO_UINT16_DONE
EXPONENT_LT_ZERO:	

      // Mask out the mantissa and shift
      // int mantissa = (input & 0x7FFFFF) | (1 << 23)
      MOV r28, 0x007FFFFF
      AND r28, r28, input
      SET r28, 23

      // Shift right by -(exponent - 127 - 7) to produce an int
      // after effectively multiplying by 2^16
      // ---> (134 - exponent)
      RSB r27, r27, 134
      LSR r28, r28, r27

      // Check for 65536 and clip at 65535
      LDI r27, 0xFFFF
      MIN output, r27, r28	
FLOAT_TO_UINT16_DONE:
.endm

	
// Convert a 16-bit int to float. This macro assumes that the upper
// 16 bits of input are 0 and may behave strangely if this is not the case.
// input and output must be different registers
.macro INT16_TO_FLOAT
.mparam input, output
      // Check edge cases first: 0 and -32768 (= 32768 in unsigned)
      QBNE INPUT_NOT_ZERO, input, 0
      LDI output, 0
      QBA INT16_TO_FLOAT_DONE
INPUT_NOT_ZERO:
      LDI r28, 32768
      QBNE INPUT_NOT_MIN, input, r28
      MOV output, 0xBF800000	// -1.0
      QBA INT16_TO_FLOAT_DONE
INPUT_NOT_MIN:
      // Check for negative values = values with bit 15 set
      MOV output, input
      QBBC NEGATIVE_DONE, output, 15
      LDI r28, 0xFFFF
      XOR output, output, r28
      ADD output, output, 1
      CLR output, 16	 // Clear any carry bit
NEGATIVE_DONE:
      // Now we need to find the highest bit that is 1 in order to determine
      // the exponent
      LMBD r28, output, 1

      // Calculate exponent field: 127 + 8 + (r28 - 23) = 112 + r28
      ADD r27, r28, 112
	
      // Take 23 minus the result to get the shift	
      RSB r28, r28, 23     
      LSL output, output, r28

      // Now clear bit 23 (implicitly 1) and replace it with the exponent
      CLR output, output, 23
      LSL r27, r27, 23
      OR  output, output, r27
	
      // Put the sign bit back in place
      QBBC INT16_TO_FLOAT_DONE, input, 15
      SET output, 31
INT16_TO_FLOAT_DONE:	
.endm

// Convert a 16-bit unsigned int to float.
.macro UINT16_TO_FLOAT
.mparam input, output
      MOV output, input
	
      // Clear upper 16 bits
      LDI r27, 0xFFFF
      AND output, output, r27

      // If zero, we're done
      QBEQ UINT16_TO_FLOAT_DONE, output, 0
	
      // Now we need to find the highest bit that is 1 in order to determine
      // the exponent
      LMBD r28, output, 1

      // Calculate exponent field: 127 + 7 + (r28 - 23) = 111 + r28
      ADD r27, r28, 111
	
      // Take 23 minus the result to get the shift	
      RSB r28, r28, 23     
      LSL output, output, r28

      // Now clear bit 23 (implicitly 1) and replace it with the exponent
      CLR output, output, 23
      LSL r27, r27, 23
      OR  output, output, r27	
UINT16_TO_FLOAT_DONE:	
.endm	
	
// Bring CS line low to write to DAC
.macro DAC_CS_ASSERT
      MOV r27, DAC_CS_PIN
      MOV r28, DAC_GPIO + GPIO_CLEARDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Bring CS line high at end of DAC transaction
.macro DAC_CS_UNASSERT
      MOV r27, DAC_CS_PIN
      MOV r28, DAC_GPIO + GPIO_SETDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Write to DAC TX register
.macro DAC_TX
.mparam data
      SBBO data, reg_spi_addr, SPI_CH0TX, 4
.endm

// Wait for SPI to finish (uses RXS indicator)
.macro DAC_WAIT_FOR_FINISH
 LOOP:
      LBBO r27, reg_spi_addr, SPI_CH0STAT, 4
      QBBC LOOP, r27, 0
.endm

// Read the RX word to clear
.macro DAC_DISCARD_RX
      LBBO r27, reg_spi_addr, SPI_CH0RX, 4
.endm

// Complete DAC write with chip select
.macro DAC_WRITE
.mparam reg
      DAC_CS_ASSERT
      DAC_TX reg
      DAC_WAIT_FOR_FINISH
      DAC_CS_UNASSERT
      DAC_DISCARD_RX
.endm

// Bring CS line low to write to ADC
.macro ADC_CS_ASSERT
      MOV r27, ADC_CS_PIN
      MOV r28, ADC_GPIO + GPIO_CLEARDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Bring CS line high at end of ADC transaction
.macro ADC_CS_UNASSERT
      MOV r27, ADC_CS_PIN
      MOV r28, ADC_GPIO + GPIO_SETDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Write to ADC TX register
.macro ADC_TX
.mparam data
      SBBO data, reg_spi_addr, SPI_CH1TX, 4
.endm

// Wait for SPI to finish (uses RXS indicator)
.macro ADC_WAIT_FOR_FINISH
 LOOP:
      LBBO r27, reg_spi_addr, SPI_CH1STAT, 4
      QBBC LOOP, r27, 0
.endm

// Read the RX word to clear; store output
.macro ADC_RX
.mparam data
      LBBO data, reg_spi_addr, SPI_CH1RX, 4
.endm

// Complete ADC write+read with chip select
.macro ADC_WRITE
.mparam in, out
      ADC_CS_ASSERT
      ADC_TX in
      ADC_WAIT_FOR_FINISH
      ADC_RX out
      ADC_CS_UNASSERT
.endm

// Write a McASP register
.macro MCASP_REG_WRITE
.mparam reg, value
      MOV r27, value
      SBBO r27, reg_mcasp_addr, reg, 4
.endm

// Write a McASP register beyond the 0xFF boundary
.macro MCASP_REG_WRITE_EXT
.mparam reg, value
      MOV r27, value
      MOV r28, reg
      ADD r28, reg_mcasp_addr, r28
      SBBO r27, r28, 0, 4
.endm

// Read a McASP register
.macro MCASP_REG_READ
.mparam reg, value
      LBBO value, reg_mcasp_addr, reg, 4
.endm
	
// Read a McASP register beyond the 0xFF boundary
.macro MCASP_REG_READ_EXT
.mparam reg, value
      MOV r28, reg
      ADD r28, reg_mcasp_addr, r28
      LBBO value, r28, 0, 4
.endm
	
// Set a bit and wait for it to come up
.macro MCASP_REG_SET_BIT_AND_POLL
.mparam reg, mask
      MOV r27, mask
      LBBO r28, reg_mcasp_addr, reg, 4
      OR r28, r28, r27
      SBBO r28, reg_mcasp_addr, reg, 4
POLL:
      LBBO r28, reg_mcasp_addr, reg, 4
      AND r28, r28, r27
      QBEQ POLL, r28, 0
.endm
   
START:
      // Set up c24 and c25 offsets with CTBIR register
      // Thus C24 points to start of PRU0 RAM
      MOV r3, 0x22020       // CTBIR0
      MOV r2, 0
      SBBO r2, r3, 0, 4

      // Set up c28 pointer offset for shared PRU RAM
      MOV r3, 0x22028       // CTPPR0
      MOV r2, 0x00000120    // To get address 0x00012000
      SBBO r2, r3, 0, 4
	
      // Load useful registers for addressing SPI
      MOV reg_comm_addr, SHARED_COMM_MEM_BASE
      MOV reg_spi_addr, SPI_BASE
      MOV reg_mcasp_addr, MCASP_BASE
	
      // Set ARM such that PRU can write to registers
      LBCO r0, C4, 4, 4
      CLR r0, r0, 4
      SBCO r0, C4, 4, 4

      // Clear flags
      MOV reg_flags, 0

      // Default number of channels in case SPI disabled
      LDI reg_num_channels, 8
	
      // Find out whether we should use SPI ADC and DAC
      LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
      QBEQ SPI_FLAG_CHECK_DONE, r2, 0
      SET reg_flags, reg_flags, FLAG_BIT_USE_SPI

SPI_FLAG_CHECK_DONE:
      // If we don't use SPI, then skip all this init
      QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI

      // Load the number of channels: valid values are 8, 4 or 2
      LBBO reg_num_channels, reg_comm_addr, COMM_NUM_CHANNELS, 4
      QBGT SPI_NUM_CHANNELS_LT8, reg_num_channels, 8 // 8 > num_channels ?
      LDI reg_num_channels, 8		// If N >= 8, N = 8
      QBA SPI_NUM_CHANNELS_DONE
SPI_NUM_CHANNELS_LT8:	
      QBGT SPI_NUM_CHANNELS_LT4, reg_num_channels, 4 // 4 > num_channels ?
      LDI reg_num_channels, 4		// If N >= 4, N = 4
      QBA SPI_NUM_CHANNELS_DONE
SPI_NUM_CHANNELS_LT4:
      LDI reg_num_channels, 2		// else N = 2
SPI_NUM_CHANNELS_DONE:	
	
      // Init SPI clock
      MOV r2, 0x02
      MOV r3, CLOCK_BASE + CLOCK_SPI0
      SBBO r2, r3, 0, 4

      // Reset SPI and wait for finish
      MOV r2, 0x02
      SBBO r2, reg_spi_addr, SPI_SYSCONFIG, 4

SPI_WAIT_RESET:
      LBBO r2, reg_spi_addr, SPI_SYSSTATUS, 4
      QBBC SPI_WAIT_RESET, r2, 0
	
      // Turn off SPI channels
      MOV r2, 0
      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4
  
      // Set to master; chip select lines enabled (CS0 used for DAC)
      MOV r2, 0x00
      SBBO r2, reg_spi_addr, SPI_MODULCTRL, 4
  
      // Configure CH0 for DAC
      MOV r2, (3 << 27) | (DAC_DPE << 16) | (DAC_TRM << 12) | ((DAC_WL - 1) << 7) | (DAC_CLK_DIV << 2) | DAC_CLK_MODE | (1 << 6)
      SBBO r2, reg_spi_addr, SPI_CH0CONF, 4

      // Configure CH1 for ADC
      MOV r2, (3 << 27) | (ADC_DPE << 16) | (ADC_TRM << 12) | ((ADC_WL - 1) << 7) | (ADC_CLK_DIV << 2) | ADC_CLK_MODE
      SBBO r2, reg_spi_addr, SPI_CH1CONF, 4
   
      // Turn on SPI channels
      MOV r2, 0x01
      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4   

      // DAC power-on reset sequence
      MOV r2, (0x07 << AD5668_COMMAND_OFFSET)
      DAC_WRITE r2

      // Initialise ADC
      MOV r2, AD7699_CFG_MASK | (0 << AD7699_CHANNEL_OFFSET) | (0 << AD7699_SEQ_OFFSET)
      ADC_WRITE r2, r2

      // Enable DAC internal reference
      MOV r2, (0x08 << AD5668_COMMAND_OFFSET) | (0x01 << AD5668_REF_OFFSET)
      DAC_WRITE r2
	
      // Read ADC ch0 and ch1: result is always 2 samples behind so start here
      MOV r2, AD7699_CFG_MASK | (0x00 << AD7699_CHANNEL_OFFSET)
      ADC_WRITE r2, r2

      MOV r2, AD7699_CFG_MASK | (0x01 << AD7699_CHANNEL_OFFSET)
      ADC_WRITE r2, r2
SPI_INIT_DONE:	
	
      // Prepare McASP0 for audio
      MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
      MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
      MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
      MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
      MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
      MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
      MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0

      MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
      MCASP_REG_WRITE MCASP_PFUNC, 0x00			// All pins are McASP
      MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
      MCASP_REG_WRITE MCASP_DLBCTL, 0x00
      MCASP_REG_WRITE MCASP_DITCTL, 0x00
      MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
      MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
      MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
      MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
      MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
      MCASP_REG_WRITE MCASP_RTDM, 0x03			// Enable TDM slots 0 and 1
      MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
      MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
      MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
      MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
      MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
      MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
      MCASP_REG_WRITE MCASP_XTDM, 0x03			// Enable TDM slots 0 and 1
      MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
	
      MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
      MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
      MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00		// Disable FIFOs
      MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00

      MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
      MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors

      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST

// The above write sequence will have temporarily changed the AHCLKX frequency
// The PLL needs time to settle or the sample rate will be unstable and possibly
// cause an underrun. Give it ~1ms before going on.
// 10ns per loop iteration = 10^-8s --> 10^5 iterations needed

      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_SETDATAOUT
      SBBO r2, r3, 0, 4

      MOV r2, 100000
MCASP_INIT_WAIT:	
      SUB r2, r2, 1
      QBNE MCASP_INIT_WAIT, r2, 0

      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
      SBBO r2, r3, 0, 4
	
      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 10)	// Set XSRCLR
      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 3)	// Set RSMRST
      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 11)	// Set XSMRST

      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00		// Write to the transmit buffer to prevent underflow

      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 4)	// Set RFRST
      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 12)	// Set XFRST

// Initialisation
      LBBO reg_frame_total, reg_comm_addr, COMM_BUFFER_FRAMES, 4  // Total frame count (SPI; 0.5x-2x for McASP)
      MOV reg_dac_buf0, 0                      // DAC buffer 0 start pointer
      LSL reg_dac_buf1, reg_frame_total, 1     // DAC buffer 1 start pointer = N[ch]*2[bytes]*bufsize
      LMBD r2, reg_num_channels, 1		 // Returns 1, 2 or 3 depending on the number of channels
      LSL reg_dac_buf1, reg_dac_buf1, r2	 // Multiply by 2, 4 or 8 to get the N[ch] scaling above
      MOV reg_mcasp_buf0, 0			 // McASP DAC buffer 0 start pointer
      LSL reg_mcasp_buf1, reg_frame_total, r2    // McASP DAC buffer 1 start pointer = 2[ch]*2[bytes]*(N/4)[samples/spi]*bufsize
      CLR reg_flags, reg_flags, FLAG_BIT_BUFFER1 // Bit 0 holds which buffer we are on
      MOV r2, 0
      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4  // Start with frame count of 0
	
// Here we are out of sync by one TDM slot since the 0 word transmitted above will have occupied
// the first output slot. Send one more word before jumping into the loop.
MCASP_DAC_WAIT_BEFORE_LOOP:	
      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
      QBBC MCASP_DAC_WAIT_BEFORE_LOOP, r2, MCASP_XSTAT_XDATA_BIT

      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00

// Likewise, read and discard the first sample we get back from the ADC. This keeps the DAC and ADC
// in sync in terms of which TDM slot we are reading (empirically found that we should throw this away
// rather than keep it and invert the phase)
MCASP_ADC_WAIT_BEFORE_LOOP:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_ADC_WAIT_BEFORE_LOOP, r2, MCASP_RSTAT_RDATA_BIT

      MCASP_REG_READ_EXT MCASP_RBUF, r2
	
WRITE_ONE_BUFFER:
      // Write a single buffer of DAC samples and read a buffer of ADC samples
      // Load starting positions
      MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
      LMBD r2, reg_num_channels, 1		// 1, 2 or 3 for 2, 4 or 8 channels
      LSL reg_adc_current, reg_frame_total, r2
      LSL reg_adc_current, reg_adc_current, 2   // N * 2 * 2 * bufsize
      ADD reg_adc_current, reg_adc_current, reg_dac_current // ADC: starts N * 2 * 2 * bufsize beyond DAC
      MOV reg_mcasp_dac_current, reg_mcasp_buf0 // McASP: set current DAC pointer
      LSL reg_mcasp_adc_current, reg_frame_total, r2 // McASP ADC: starts (N/2)*2*2*bufsize beyond DAC
      LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
      ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
      MOV reg_frame_current, 0
	
WRITE_LOOP:
      // Write N channels to DAC from successive values in memory
      // At the same time, read N channels from ADC
      // Unrolled by a factor of 2 to get high and low words
      MOV r1, 0
ADC_DAC_LOOP:
      QBBC SPI_DAC_LOAD_DONE, reg_flags, FLAG_BIT_USE_SPI
      // Load next 2 SPI DAC samples and store zero in their place
      LBCO reg_dac_data, C_ADC_DAC_MEM, reg_dac_current, 4
      MOV r2, 0
      SBCO r2, C_ADC_DAC_MEM, reg_dac_current, 4
      ADD reg_dac_current, reg_dac_current, 4
SPI_DAC_LOAD_DONE:

      // On even iterations, load two more samples and choose the first one
      // On odd iterations, transmit the second of the samples already loaded
      // QBBS MCASP_DAC_HIGH_WORD, r1, 1
      QBBS MCASP_DAC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
MCASP_DAC_LOW_WORD:	
      // Load next 2 Audio DAC samples and store zero in their place
      LBCO reg_mcasp_dac_data, C_MCASP_MEM, reg_mcasp_dac_current, 4
      MOV r2, 0
      SBCO r2, C_MCASP_MEM, reg_mcasp_dac_current, 4
      ADD reg_mcasp_dac_current, reg_mcasp_dac_current, 4

      // Mask out the low word (first in little endian)
      MOV r2, 0xFFFF
      AND r7, reg_mcasp_dac_data, r2
	
      QBA MCASP_WAIT_XSTAT
MCASP_DAC_HIGH_WORD:
      // Take the high word of the previously loaded data
      LSR r7, reg_mcasp_dac_data, 16
	
      // Every 2 channels we send one audio sample; this loop already
      // sends exactly two SPI channels.
      // Wait for McASP XSTAT[XDATA] to set indicating we can write more data
MCASP_WAIT_XSTAT:
      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
      QBBC MCASP_WAIT_XSTAT, r2, MCASP_XSTAT_XDATA_BIT

      MCASP_REG_WRITE_EXT MCASP_XBUF, r7
	
      // Same idea with ADC: even iterations, load the sample into the low word, odd
      // iterations, load the sample into the high word and store
      // QBBS MCASP_ADC_HIGH_WORD, r1, 1
      QBBS MCASP_ADC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
MCASP_ADC_LOW_WORD:	
      // Start ADC data at 0
      LDI reg_mcasp_adc_data, 0
	
      // Now wait for a received word to become available from the audio ADC
MCASP_WAIT_RSTAT_LOW:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_WAIT_RSTAT_LOW, r2, MCASP_RSTAT_RDATA_BIT

      // Mask low word and store in ADC data register
      MCASP_REG_READ_EXT MCASP_RBUF, r3
      MOV r2, 0xFFFF
      AND reg_mcasp_adc_data, r3, r2
      QBA MCASP_ADC_DONE

MCASP_ADC_HIGH_WORD:	
      // Wait for a received word to become available from the audio ADC
MCASP_WAIT_RSTAT_HIGH:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_WAIT_RSTAT_HIGH, r2, MCASP_RSTAT_RDATA_BIT

      // Read data and shift 16 bits to the left (into the high word)
      MCASP_REG_READ_EXT MCASP_RBUF, r3
      LSL r3, r3, 16
      OR reg_mcasp_adc_data, reg_mcasp_adc_data, r3

      // Now store the result and increment the pointer
      SBCO reg_mcasp_adc_data, C_MCASP_MEM, reg_mcasp_adc_current, 4
      ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
MCASP_ADC_DONE:	
      QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
	
      // DAC: transmit low word (first in little endian)
      MOV r2, 0xFFFF
      AND r7, reg_dac_data, r2
      LSL r7, r7, AD5668_DATA_OFFSET
      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
      OR r7, r7, r8
      LSL r8, r1, AD5668_ADDRESS_OFFSET
      OR r7, r7, r8
      DAC_WRITE r7

      // Read ADC channels: result is always 2 commands behind
      // Start by reading channel 2 (result is channel 0) and go
      // to N+2, but masking the channel number to be between 0 and N-1
      LDI reg_adc_data, 0
      ADD r8, r1, 2
      SUB r7, reg_num_channels, 1
      AND r8, r8, r7
      LSL r8, r8, AD7699_CHANNEL_OFFSET
      MOV r7, AD7699_CFG_MASK
      OR r7, r7, r8
      ADC_WRITE r7, r7

      // Mask out only the relevant 16 bits and store in reg_adc_data
      MOV r2, 0xFFFF
      AND reg_adc_data, r7, r2

      // Increment channel index
      ADD r1, r1, 1

      // DAC: transmit high word (second in little endian)
      LSR r7, reg_dac_data, 16
      LSL r7, r7, AD5668_DATA_OFFSET
      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
      OR r7, r7, r8
      LSL r8, r1, AD5668_ADDRESS_OFFSET
      OR r7, r7, r8
      DAC_WRITE r7

      // Read ADC channels: result is always 2 commands behind
      // Start by reading channel 2 (result is channel 0) and go
      // to N+2, but masking the channel number to be between 0 and N-1
      ADD r8, r1, 2
      SUB r7, reg_num_channels, 1
      AND r8, r8, r7
      LSL r8, r8, AD7699_CHANNEL_OFFSET
      MOV r7, AD7699_CFG_MASK
      OR r7, r7, r8
      ADC_WRITE r7, r7

      // Move this result up to the 16 high bits
      LSL r7, r7, 16
      OR reg_adc_data, reg_adc_data, r7

      // Store 2 ADC words in memory
      SBCO reg_adc_data, C_ADC_DAC_MEM, reg_adc_current, 4
      ADD reg_adc_current, reg_adc_current, 4

      // Toggle the high/low word for McASP control (since we send one word out of
      // 32 bits for each pair of SPI channels)
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
	
      // Repeat 4 times for 8 channels (2 samples per loop, r1 += 1 already happened)
      // For 4 or 2 channels, repeat 2 or 1 times, according to flags
      ADD r1, r1, 1
      QBNE ADC_DAC_LOOP, r1, reg_num_channels
      QBA ADC_DAC_LOOP_DONE

SPI_SKIP_WRITE:
      // We get here only if the SPI ADC and DAC are disabled
      // Just keep the loop going for McASP

      // Toggle the high/low word for McASP control (since we send one word out of
      // 32 bits for each pair of SPI channels)
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)

      ADD r1, r1, 2
      QBNE ADC_DAC_LOOP, r1, reg_num_channels
	
ADC_DAC_LOOP_DONE:	
      // Increment number of frames, see if we have more to write
      ADD reg_frame_current, reg_frame_current, 1
      QBNE WRITE_LOOP, reg_frame_current, reg_frame_total

WRITE_LOOP_DONE:
      // Now done, swap the buffers and do the next one
      // Use r2 as a temp register
      MOV r2, reg_dac_buf0
      MOV reg_dac_buf0, reg_dac_buf1
      MOV reg_dac_buf1, r2
      MOV r2, reg_mcasp_buf0
      MOV reg_mcasp_buf0, reg_mcasp_buf1
      MOV reg_mcasp_buf1, r2

      // Notify ARM of buffer swap
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1)
      AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
      SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4

      // Increment the frame count in the comm buffer (for status monitoring)
      LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
      ADD r2, r2, reg_frame_total
      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4

      // If LED blink enabled, toggle every 4096 frames
      LBBO r3, reg_comm_addr, COMM_LED_ADDRESS, 4
      QBEQ LED_BLINK_DONE, r3, 0	
      MOV r1, 0x1000
      AND r2, r2, r1          // Test (frame count & 4096)
      QBEQ LED_BLINK_OFF, r2, 0
      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4	
      MOV r1, GPIO_SETDATAOUT
      ADD r3, r3, r1          // Address for GPIO set register
      SBBO r2, r3, 0, 4       // Set GPIO pin
      QBA LED_BLINK_DONE
LED_BLINK_OFF:
      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4
      MOV r1, GPIO_CLEARDATAOUT
      ADD r3, r3, r1          // Address for GPIO clear register
      SBBO r2, r3, 0, 4       // Clear GPIO pin	
LED_BLINK_DONE:	
	
      QBBC TESTLOW, reg_flags, FLAG_BIT_BUFFER1
      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_SETDATAOUT
      SBBO r2, r3, 0, 4
      QBA TESTDONE
TESTLOW:
      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
      SBBO r2, r3, 0, 4
TESTDONE:
	 
      // Check if we should finish: flag is zero as long as it should run
      LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
      QBEQ WRITE_ONE_BUFFER, r2, 0

CLEANUP:
      MCASP_REG_WRITE MCASP_GBLCTL, 0x00	// Turn off McASP

      // Turn off SPI if enabled
      QBBC SPI_CLEANUP_DONE, reg_flags, FLAG_BIT_USE_SPI
	
      MOV r3, SPI_BASE + SPI_CH0CONF
      LBBO r2, r3, 0, 4
      CLR r2, r2, 13
      CLR r2, r2, 27
      SBBO r2, r3, 0, 4

      MOV r3, SPI_BASE + SPI_CH0CTRL
      LBBO r2, r3, 0, 4
      CLR r2, r2, 1
      SBBO r2, r3, 0, 4      
SPI_CLEANUP_DONE:
	
      // Signal the ARM that we have finished 
      MOV R31.b0, PRU0_ARM_INTERRUPT + 16
      HALT