view pru_rtaudio.p @ 12:a6beeba3a648

Initial support for higher matrix sample rates by reducing the number of channels. Input not tested yet, and not all examples updated to new format.
author andrewm
date Thu, 22 Jan 2015 19:00:22 +0000
parents 8a575ba3ab52
children 6adb088196a7
line wrap: on
line source
.origin 0
.entrypoint START

#define DBOX_CAPE	// Define this to use new cape hardware
	
#define CLOCK_BASE  0x44E00000
#define CLOCK_SPI0  0x4C
#define CLOCK_SPI1  0x50
#define CLOCK_L4LS  0x60

#define SPI0_BASE   0x48030100
#define SPI1_BASE   0x481A0100
#define SPI_BASE    SPI0_BASE
	
#define SPI_SYSCONFIG 0x10
#define SPI_SYSSTATUS 0x14
#define SPI_MODULCTRL 0x28
#define SPI_CH0CONF   0x2C
#define SPI_CH0STAT   0x30
#define SPI_CH0CTRL   0x34
#define SPI_CH0TX     0x38
#define SPI_CH0RX     0x3C
#define SPI_CH1CONF   0x40
#define SPI_CH1STAT   0x44
#define SPI_CH1CTRL   0x48
#define SPI_CH1TX     0x4C
#define SPI_CH1RX     0x50

#define GPIO0 0x44E07000
#define GPIO1 0x4804C000
#define GPIO_CLEARDATAOUT 0x190
#define GPIO_SETDATAOUT 0x194

#define PRU0_ARM_INTERRUPT 19

#define C_ADC_DAC_MEM C24     // PRU0 mem
#ifdef DBOX_CAPE
#define DAC_GPIO      GPIO0
#define DAC_CS_PIN    (1<<5) // GPIO0:5 = P9 pin 17
#else
#define DAC_GPIO      GPIO1
#define DAC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
#endif
#define DAC_TRM       0       // SPI transmit and receive
#define DAC_WL        32      // Word length
#define DAC_CLK_MODE  1       // SPI mode
#define DAC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
#define DAC_DPE       1       // d0 = receive, d1 = transmit

#define AD5668_COMMAND_OFFSET 24
#define AD5668_ADDRESS_OFFSET 20
#define AD5668_DATA_OFFSET    4
#define AD5668_REF_OFFSET     0

#ifdef DBOX_CAPE
#define ADC_GPIO      GPIO1
#define ADC_CS_PIN    (1<<16) // GPIO1:16 = P9 pin 15
#else
#define ADC_GPIO      GPIO1
#define ADC_CS_PIN    (1<<17) // GPIO1:17 = P9 pin 23
#endif
#define ADC_TRM       0       // SPI transmit and receive
#define ADC_WL        16      // Word length
#define ADC_CLK_MODE  0       // SPI mode
#define ADC_CLK_DIV   1       // Clock divider (48MHz / 2^n)
#define ADC_DPE       1       // d0 = receive, d1 = transmit

#define AD7699_CFG_MASK       0xF120 // Mask for config update, unipolar, full BW
#define AD7699_CHANNEL_OFFSET 9      // 7 bits offset of a 14-bit left-justified word
#define AD7699_SEQ_OFFSET     3      // sequencer (0 = disable, 3 = scan all)

#define SHARED_COMM_MEM_BASE  0x00010000  // Location where comm flags are written
#define COMM_SHOULD_STOP      0		  // Set to be nonzero when loop should stop
#define COMM_CURRENT_BUFFER   4           // Which buffer we are on
#define COMM_BUFFER_FRAMES    8           // How many frames per buffer
#define COMM_SHOULD_SYNC      12          // Whether to synchronise to an external clock
#define COMM_SYNC_ADDRESS     16          // Which memory address to find the GPIO on
#define COMM_SYNC_PIN_MASK    20          // Which pin to read for the sync
#define COMM_LED_ADDRESS      24          // Which memory address to find the status LED on
#define COMM_LED_PIN_MASK     28          // Which pin to write to change LED
#define COMM_FRAME_COUNT      32	  // How many frames have elapse since beginning
#define COMM_USE_SPI          36          // Whether or not to use SPI ADC and DAC
#define COMM_NUM_CHANNELS     40	  // Low 2 bits indicate 8 [0x3], 4 [0x1] or 2 [0x0] channels
	
#define MCASP0_BASE 0x48038000
#define MCASP1_BASE 0x4803C000

#define MCASP_PWRIDLESYSCONFIG 		0x04
#define MCASP_PFUNC			0x10
#define MCASP_PDIR			0x14
#define MCASP_PDOUT			0x18
#define MCASP_PDSET			0x1C
#define MCASP_PDIN			0x1C
#define MCASP_PDCLR			0x20
#define MCASP_GBLCTL			0x44
#define MCASP_AMUTE			0x48
#define MCASP_DLBCTL			0x4C
#define MCASP_DITCTL			0x50
#define MCASP_RGBLCTL			0x60
#define MCASP_RMASK			0x64
#define MCASP_RFMT			0x68
#define MCASP_AFSRCTL			0x6C
#define MCASP_ACLKRCTL			0x70
#define MCASP_AHCLKRCTL			0x74
#define MCASP_RTDM			0x78
#define MCASP_RINTCTL			0x7C
#define MCASP_RSTAT			0x80
#define MCASP_RSLOT			0x84
#define MCASP_RCLKCHK			0x88
#define MCASP_REVTCTL			0x8C
#define MCASP_XGBLCTL			0xA0
#define MCASP_XMASK			0xA4
#define MCASP_XFMT			0xA8
#define MCASP_AFSXCTL			0xAC
#define MCASP_ACLKXCTL			0xB0
#define MCASP_AHCLKXCTL			0xB4
#define MCASP_XTDM			0xB8
#define MCASP_XINTCTL			0xBC
#define MCASP_XSTAT			0xC0
#define MCASP_XSLOT			0xC4
#define MCASP_XCLKCHK			0xC8
#define MCASP_XEVTCTL			0xCC
#define MCASP_SRCTL0			0x180
#define MCASP_SRCTL1			0x184
#define MCASP_SRCTL2			0x188
#define MCASP_SRCTL3			0x18C
#define MCASP_SRCTL4			0x190
#define MCASP_SRCTL5			0x194
#define MCASP_XBUF0			0x200
#define MCASP_XBUF1			0x204
#define MCASP_XBUF2			0x208
#define MCASP_XBUF3			0x20C
#define MCASP_XBUF4			0x210
#define MCASP_XBUF5			0x214
#define MCASP_RBUF0			0x280
#define MCASP_RBUF1			0x284
#define MCASP_RBUF2			0x288
#define MCASP_RBUF3			0x28C
#define MCASP_RBUF4			0x290
#define MCASP_RBUF5			0x294
#define MCASP_WFIFOCTL			0x1000
#define MCASP_WFIFOSTS			0x1004
#define MCASP_RFIFOCTL			0x1008
#define MCASP_RFIFOSTS			0x100C

#define MCASP_XSTAT_XDATA_BIT           5        // Bit to test for transmit ready
#define MCASP_RSTAT_RDATA_BIT           5        // Bit to test for receive ready 
	
// Constants used for this particular audio setup
#define MCASP_BASE 	MCASP0_BASE
#ifdef DBOX_CAPE
#define MCASP_SRCTL_X	MCASP_SRCTL2	// Ser. 2 is transmitter
#define MCASP_SRCTL_R	MCASP_SRCTL0	// Ser. 0 is receiver
#define MCASP_XBUF	MCASP_XBUF2
#define MCASP_RBUF	MCASP_RBUF0
#else
#define MCASP_SRCTL_X	MCASP_SRCTL3	// Ser. 3 is transmitter
#define MCASP_SRCTL_R	MCASP_SRCTL2	// Ser. 2 is receiver
#define MCASP_XBUF	MCASP_XBUF3
#define MCASP_RBUF	MCASP_RBUF2
#endif
	
#define MCASP_PIN_AFSX		(1 << 28)
#define MCASP_PIN_AHCLKX	(1 << 27)
#define MCASP_PIN_ACLKX		(1 << 26)
#define MCASP_PIN_AMUTE		(1 << 25)	// Also, 0 to 3 are XFR0 to XFR3

#ifdef DBOX_CAPE
#define MCASP_OUTPUT_PINS   	MCASP_PIN_AHCLKX | (1 << 2) // AHCLKX and AXR2 outputs
#else
#define MCASP_OUTPUT_PINS   	(1 << 3)	// Which pins are outputs
#endif

#define MCASP_DATA_MASK 	0xFFFF		// 16 bit data
#define MCASP_DATA_FORMAT	0x807C		// MSB first, 0 bit delay, 16 bits, CFG bus, ROR 16bits

#define C_MCASP_MEM             C28     	// Shared PRU mem

// Flags for the flags register
#define FLAG_BIT_BUFFER1	0
#define FLAG_BIT_USE_SPI	1
#define FLAG_BIT_MCASP_HWORD	2		// Whether we are on the high word for McASP transmission
	
// Registers used throughout

// r1, r2, r3 are used for temporary storage
#define reg_num_channels	r9		// Number of SPI ADC/DAC channels to use
#define reg_frame_current	r10		// Current frame count in SPI ADC/DAC transfer
#define reg_frame_total		r11		// Total frame count for SPI ADC/DAC
#define reg_dac_data		r12		// Current dword for SPI DAC
#define reg_adc_data		r13		// Current dword for SPI ADC
#define reg_mcasp_dac_data	r14		// Current dword for McASP DAC
#define reg_mcasp_adc_data	r15		// Current dword for McASP ADC
#define reg_dac_buf0		r16		// Start pointer to SPI DAC buffer 0
#define reg_dac_buf1		r17		// Start pointer to SPI DAC buffer 1
#define reg_dac_current		r18		// Pointer to current storage location of SPI DAC
#define reg_adc_current		r19		// Pointer to current storage location of SPI ADC
#define reg_mcasp_buf0		r20		// Start pointer to McASP DAC buffer 0
#define reg_mcasp_buf1		r21		// Start pointer to McASP DAC buffer 1
#define reg_mcasp_dac_current	r22		// Pointer to current storage location of McASP DAC
#define reg_mcasp_adc_current	r23		// Pointer to current storage location of McASP ADC
#define reg_flags		r24		// Buffer ID (0 and 1) and other flags
#define reg_comm_addr		r25		// Memory address for communicating with ARM
#define reg_spi_addr		r26		// Base address for SPI
// r27, r28 used in macros
#define reg_mcasp_addr		r29		// Base address for McASP

	
// Bring CS line low to write to DAC
.macro DAC_CS_ASSERT
      MOV r27, DAC_CS_PIN
      MOV r28, DAC_GPIO + GPIO_CLEARDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Bring CS line high at end of DAC transaction
.macro DAC_CS_UNASSERT
      MOV r27, DAC_CS_PIN
      MOV r28, DAC_GPIO + GPIO_SETDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Write to DAC TX register
.macro DAC_TX
.mparam data
      SBBO data, reg_spi_addr, SPI_CH0TX, 4
.endm

// Wait for SPI to finish (uses RXS indicator)
.macro DAC_WAIT_FOR_FINISH
 LOOP:
      LBBO r27, reg_spi_addr, SPI_CH0STAT, 4
      QBBC LOOP, r27, 0
.endm

// Read the RX word to clear
.macro DAC_DISCARD_RX
      LBBO r27, reg_spi_addr, SPI_CH0RX, 4
.endm

// Complete DAC write with chip select
.macro DAC_WRITE
.mparam reg
      DAC_CS_ASSERT
      DAC_TX reg
      DAC_WAIT_FOR_FINISH
      DAC_CS_UNASSERT
      DAC_DISCARD_RX
.endm

// Bring CS line low to write to ADC
.macro ADC_CS_ASSERT
      MOV r27, ADC_CS_PIN
      MOV r28, ADC_GPIO + GPIO_CLEARDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Bring CS line high at end of ADC transaction
.macro ADC_CS_UNASSERT
      MOV r27, ADC_CS_PIN
      MOV r28, ADC_GPIO + GPIO_SETDATAOUT
      SBBO r27, r28, 0, 4
.endm

// Write to ADC TX register
.macro ADC_TX
.mparam data
      SBBO data, reg_spi_addr, SPI_CH1TX, 4
.endm

// Wait for SPI to finish (uses RXS indicator)
.macro ADC_WAIT_FOR_FINISH
 LOOP:
      LBBO r27, reg_spi_addr, SPI_CH1STAT, 4
      QBBC LOOP, r27, 0
.endm

// Read the RX word to clear; store output
.macro ADC_RX
.mparam data
      LBBO data, reg_spi_addr, SPI_CH1RX, 4
.endm

// Complete ADC write+read with chip select
.macro ADC_WRITE
.mparam in, out
      ADC_CS_ASSERT
      ADC_TX in
      ADC_WAIT_FOR_FINISH
      ADC_RX out
      ADC_CS_UNASSERT
.endm

// Write a McASP register
.macro MCASP_REG_WRITE
.mparam reg, value
      MOV r27, value
      SBBO r27, reg_mcasp_addr, reg, 4
.endm

// Write a McASP register beyond the 0xFF boundary
.macro MCASP_REG_WRITE_EXT
.mparam reg, value
      MOV r27, value
      MOV r28, reg
      ADD r28, reg_mcasp_addr, r28
      SBBO r27, r28, 0, 4
.endm

// Read a McASP register
.macro MCASP_REG_READ
.mparam reg, value
      LBBO value, reg_mcasp_addr, reg, 4
.endm
	
// Read a McASP register beyond the 0xFF boundary
.macro MCASP_REG_READ_EXT
.mparam reg, value
      MOV r28, reg
      ADD r28, reg_mcasp_addr, r28
      LBBO value, r28, 0, 4
.endm
	
// Set a bit and wait for it to come up
.macro MCASP_REG_SET_BIT_AND_POLL
.mparam reg, mask
      MOV r27, mask
      LBBO r28, reg_mcasp_addr, reg, 4
      OR r28, r28, r27
      SBBO r28, reg_mcasp_addr, reg, 4
POLL:
      LBBO r28, reg_mcasp_addr, reg, 4
      AND r28, r28, r27
      QBEQ POLL, r28, 0
.endm
   
START:
      // Set up c24 and c25 offsets with CTBIR register
      // Thus C24 points to start of PRU0 RAM
      MOV r3, 0x22020       // CTBIR0
      MOV r2, 0
      SBBO r2, r3, 0, 4

      // Set up c28 pointer offset for shared PRU RAM
      MOV r3, 0x22028       // CTPPR0
      MOV r2, 0x00000120    // To get address 0x00012000
      SBBO r2, r3, 0, 4
	
      // Load useful registers for addressing SPI
      MOV reg_comm_addr, SHARED_COMM_MEM_BASE
      MOV reg_spi_addr, SPI_BASE
      MOV reg_mcasp_addr, MCASP_BASE
	
      // Set ARM such that PRU can write to registers
      LBCO r0, C4, 4, 4
      CLR r0, r0, 4
      SBCO r0, C4, 4, 4

      // Clear flags
      MOV reg_flags, 0

      // Default number of channels in case SPI disabled
      LDI reg_num_channels, 8
	
      // Find out whether we should use SPI ADC and DAC
      LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
      QBEQ SPI_FLAG_CHECK_DONE, r2, 0
      SET reg_flags, reg_flags, FLAG_BIT_USE_SPI

SPI_FLAG_CHECK_DONE:
      // If we don't use SPI, then skip all this init
      QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI

      // Load the number of channels: valid values are 8, 4 or 2
      LBBO reg_num_channels, reg_comm_addr, COMM_NUM_CHANNELS, 4
      QBGT SPI_NUM_CHANNELS_LT8, reg_num_channels, 8 // 8 > num_channels ?
      LDI reg_num_channels, 8		// If N >= 8, N = 8
      QBA SPI_NUM_CHANNELS_DONE
SPI_NUM_CHANNELS_LT8:	
      QBGT SPI_NUM_CHANNELS_LT4, reg_num_channels, 4 // 4 > num_channels ?
      LDI reg_num_channels, 4		// If N >= 4, N = 4
      QBA SPI_NUM_CHANNELS_DONE
SPI_NUM_CHANNELS_LT4:
      LDI reg_num_channels, 2		// else N = 2
SPI_NUM_CHANNELS_DONE:	
	
      // Init SPI clock
      MOV r2, 0x02
      MOV r3, CLOCK_BASE + CLOCK_SPI0
      SBBO r2, r3, 0, 4

      // Reset SPI and wait for finish
      MOV r2, 0x02
      SBBO r2, reg_spi_addr, SPI_SYSCONFIG, 4

SPI_WAIT_RESET:
      LBBO r2, reg_spi_addr, SPI_SYSSTATUS, 4
      QBBC SPI_WAIT_RESET, r2, 0
	
      // Turn off SPI channels
      MOV r2, 0
      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4
  
      // Set to master; chip select lines enabled (CS0 used for DAC)
      MOV r2, 0x00
      SBBO r2, reg_spi_addr, SPI_MODULCTRL, 4
  
      // Configure CH0 for DAC
      MOV r2, (3 << 27) | (DAC_DPE << 16) | (DAC_TRM << 12) | ((DAC_WL - 1) << 7) | (DAC_CLK_DIV << 2) | DAC_CLK_MODE | (1 << 6)
      SBBO r2, reg_spi_addr, SPI_CH0CONF, 4

      // Configure CH1 for ADC
      MOV r2, (3 << 27) | (ADC_DPE << 16) | (ADC_TRM << 12) | ((ADC_WL - 1) << 7) | (ADC_CLK_DIV << 2) | ADC_CLK_MODE
      SBBO r2, reg_spi_addr, SPI_CH1CONF, 4
   
      // Turn on SPI channels
      MOV r2, 0x01
      SBBO r2, reg_spi_addr, SPI_CH0CTRL, 4
      SBBO r2, reg_spi_addr, SPI_CH1CTRL, 4   

      // DAC power-on reset sequence
      MOV r2, (0x07 << AD5668_COMMAND_OFFSET)
      DAC_WRITE r2

      // Initialise ADC
      MOV r2, AD7699_CFG_MASK | (0 << AD7699_CHANNEL_OFFSET) | (0 << AD7699_SEQ_OFFSET)
      ADC_WRITE r2, r2

      // Enable DAC internal reference
      MOV r2, (0x08 << AD5668_COMMAND_OFFSET) | (0x01 << AD5668_REF_OFFSET)
      DAC_WRITE r2
	
      // Read ADC ch0 and ch1: result is always 2 samples behind so start here
      MOV r2, AD7699_CFG_MASK | (0x00 << AD7699_CHANNEL_OFFSET)
      ADC_WRITE r2, r2

      MOV r2, AD7699_CFG_MASK | (0x01 << AD7699_CHANNEL_OFFSET)
      ADC_WRITE r2, r2
SPI_INIT_DONE:	
	
// Prepare McASP0 for audio
MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0

MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
MCASP_REG_WRITE MCASP_PFUNC, 0x00		// All pins are McASP
MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
MCASP_REG_WRITE MCASP_DLBCTL, 0x00
MCASP_REG_WRITE MCASP_DITCTL, 0x00
MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
MCASP_REG_WRITE MCASP_RTDM, 0x03		// Enable TDM slots 0 and 1
MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
MCASP_REG_WRITE MCASP_XTDM, 0x03		// Enable TDM slots 0 and 1
MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
	
MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00	// Disable FIFOs
MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00

MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors

MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST

// The above write sequence will have temporarily changed the AHCLKX frequency
// The PLL needs time to settle or the sample rate will be unstable and possibly
// cause an underrun. Give it ~1ms before going on.
// 10ns per loop iteration = 10^-8s --> 10^5 iterations needed

      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_SETDATAOUT
      SBBO r2, r3, 0, 4

MOV r2, 100000
MCASP_INIT_WAIT:	
      SUB r2, r2, 1
      QBNE MCASP_INIT_WAIT, r2, 0

      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
      SBBO r2, r3, 0, 4
	
MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 10)	// Set XSRCLR
MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 3)	// Set RSMRST
MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 11)	// Set XSMRST

MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00		// Write to the transmit buffer to prevent underflow

MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 4)	// Set RFRST
MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 12)	// Set XFRST

// Initialisation
LBBO reg_frame_total, reg_comm_addr, COMM_BUFFER_FRAMES, 4  // Total frame count (SPI; 0.5x-2x for McASP)
MOV reg_dac_buf0, 0                      // DAC buffer 0 start pointer
LSL reg_dac_buf1, reg_frame_total, 1     // DAC buffer 1 start pointer = N[ch]*2[bytes]*bufsize
LMBD r2, reg_num_channels, 1		 // Returns 1, 2 or 3 depending on the number of channels
LSL reg_dac_buf1, reg_dac_buf1, r2	 // Multiply by 2, 4 or 8 to get the N[ch] scaling above
MOV reg_mcasp_buf0, 0			 // McASP DAC buffer 0 start pointer
LSL reg_mcasp_buf1, reg_frame_total, r2  // McASP DAC buffer 1 start pointer = 2[ch]*2[bytes]*(N/4)[samples/spi]*bufsize
CLR reg_flags, reg_flags, FLAG_BIT_BUFFER1  // Bit 0 holds which buffer we are on
MOV r2, 0
SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4  // Start with frame count of 0
	
// Here we are out of sync by one TDM slot since the 0 word transmitted above will have occupied
// the first output slot. Send one more word before jumping into the loop.
MCASP_DAC_WAIT_BEFORE_LOOP:	
      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
      QBBC MCASP_DAC_WAIT_BEFORE_LOOP, r2, MCASP_XSTAT_XDATA_BIT

      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00

// Likewise, read and discard the first sample we get back from the ADC. This keeps the DAC and ADC
// in sync in terms of which TDM slot we are reading (empirically found that we should throw this away
// rather than keep it and invert the phase)
MCASP_ADC_WAIT_BEFORE_LOOP:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_ADC_WAIT_BEFORE_LOOP, r2, MCASP_RSTAT_RDATA_BIT

      MCASP_REG_READ_EXT MCASP_RBUF, r2
	
WRITE_ONE_BUFFER:
      // Write a single buffer of DAC samples and read a buffer of ADC samples
      // Load starting positions
      MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
      LMBD r2, reg_num_channels, 1		// 1, 2 or 3 for 2, 4 or 8 channels
      LSL reg_adc_current, reg_frame_total, r2
      LSL reg_adc_current, reg_adc_current, 2   // N * 2 * 2 * bufsize
      ADD reg_adc_current, reg_adc_current, reg_dac_current // ADC: starts N * 2 * 2 * bufsize beyond DAC
      MOV reg_mcasp_dac_current, reg_mcasp_buf0 // McASP: set current DAC pointer
      LSL reg_mcasp_adc_current, reg_frame_total, r2 // McASP ADC: starts (N/2)*2*2*bufsize beyond DAC
      LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
      ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
      MOV reg_frame_current, 0
	
WRITE_LOOP:
      // Write N channels to DAC from successive values in memory
      // At the same time, read N channels from ADC
      // Unrolled by a factor of 2 to get high and low words
      MOV r1, 0
ADC_DAC_LOOP:
      QBBC SPI_DAC_LOAD_DONE, reg_flags, FLAG_BIT_USE_SPI
      // Load next 2 SPI DAC samples and store zero in their place
      LBCO reg_dac_data, C_ADC_DAC_MEM, reg_dac_current, 4
      MOV r2, 0
      SBCO r2, C_ADC_DAC_MEM, reg_dac_current, 4
      ADD reg_dac_current, reg_dac_current, 4
SPI_DAC_LOAD_DONE:

      // On even iterations, load two more samples and choose the first one
      // On odd iterations, transmit the second of the samples already loaded
      // QBBS MCASP_DAC_HIGH_WORD, r1, 1
      QBBS MCASP_DAC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
MCASP_DAC_LOW_WORD:	
      // Load next 2 Audio DAC samples and store zero in their place
      LBCO reg_mcasp_dac_data, C_MCASP_MEM, reg_mcasp_dac_current, 4
      MOV r2, 0
      SBCO r2, C_MCASP_MEM, reg_mcasp_dac_current, 4
      ADD reg_mcasp_dac_current, reg_mcasp_dac_current, 4

      // Mask out the low word (first in little endian)
      MOV r2, 0xFFFF
      AND r7, reg_mcasp_dac_data, r2
	
      QBA MCASP_WAIT_XSTAT
MCASP_DAC_HIGH_WORD:
      // Take the high word of the previously loaded data
      LSR r7, reg_mcasp_dac_data, 16
	
      // Every 2 channels we send one audio sample; this loop already
      // sends exactly two SPI channels.
      // Wait for McASP XSTAT[XDATA] to set indicating we can write more data
MCASP_WAIT_XSTAT:
      LBBO r2, reg_mcasp_addr, MCASP_XSTAT, 4
      QBBC MCASP_WAIT_XSTAT, r2, MCASP_XSTAT_XDATA_BIT

      MCASP_REG_WRITE_EXT MCASP_XBUF, r7
	
      // Same idea with ADC: even iterations, load the sample into the low word, odd
      // iterations, load the sample into the high word and store
      // QBBS MCASP_ADC_HIGH_WORD, r1, 1
      QBBS MCASP_ADC_HIGH_WORD, reg_flags, FLAG_BIT_MCASP_HWORD
MCASP_ADC_LOW_WORD:	
      // Start ADC data at 0
      LDI reg_mcasp_adc_data, 0
	
      // Now wait for a received word to become available from the audio ADC
MCASP_WAIT_RSTAT_LOW:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_WAIT_RSTAT_LOW, r2, MCASP_RSTAT_RDATA_BIT

      // Mask low word and store in ADC data register
      MCASP_REG_READ_EXT MCASP_RBUF, r3
      MOV r2, 0xFFFF
      AND reg_mcasp_adc_data, r3, r2
      QBA MCASP_ADC_DONE

MCASP_ADC_HIGH_WORD:	
      // Wait for a received word to become available from the audio ADC
MCASP_WAIT_RSTAT_HIGH:
      LBBO r2, reg_mcasp_addr, MCASP_RSTAT, 4
      QBBC MCASP_WAIT_RSTAT_HIGH, r2, MCASP_RSTAT_RDATA_BIT

      // Read data and shift 16 bits to the left (into the high word)
      MCASP_REG_READ_EXT MCASP_RBUF, r3
      LSL r3, r3, 16
      OR reg_mcasp_adc_data, reg_mcasp_adc_data, r3

      // Now store the result and increment the pointer
      SBCO reg_mcasp_adc_data, C_MCASP_MEM, reg_mcasp_adc_current, 4
      ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
MCASP_ADC_DONE:	
      QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
	
      // DAC: transmit low word (first in little endian)
      MOV r2, 0xFFFF
      AND r7, reg_dac_data, r2
      LSL r7, r7, AD5668_DATA_OFFSET
      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
      OR r7, r7, r8
      LSL r8, r1, AD5668_ADDRESS_OFFSET
      OR r7, r7, r8
      DAC_WRITE r7

      // Read ADC channels: result is always 2 commands behind
      // Start by reading channel 2 (result is channel 0) and go
      // to N+2, but masking the channel number to be between 0 and N-1
      LDI reg_adc_data, 0
      ADD r8, r1, 2
      SUB r7, reg_num_channels, 1
      AND r8, r8, r7
      LSL r8, r8, AD7699_CHANNEL_OFFSET
      MOV r7, AD7699_CFG_MASK
      OR r7, r7, r8
      ADC_WRITE r7, r7

      // Mask out only the relevant 16 bits and store in reg_adc_data
      MOV r2, 0xFFFF
      AND reg_adc_data, r7, r2

      // Increment channel index
      ADD r1, r1, 1

      // DAC: transmit high word (second in little endian)
      LSR r7, reg_dac_data, 16
      LSL r7, r7, AD5668_DATA_OFFSET
      MOV r8, (0x03 << AD5668_COMMAND_OFFSET)
      OR r7, r7, r8
      LSL r8, r1, AD5668_ADDRESS_OFFSET
      OR r7, r7, r8
      DAC_WRITE r7

      // Read ADC channels: result is always 2 commands behind
      // Start by reading channel 2 (result is channel 0) and go
      // to N+2, but masking the channel number to be between 0 and N-1
      LDI reg_adc_data, 0
      ADD r8, r1, 2
      SUB r7, reg_num_channels, 1
      AND r8, r8, r7
      LSL r8, r8, AD7699_CHANNEL_OFFSET
      MOV r7, AD7699_CFG_MASK
      OR r7, r7, r8
      ADC_WRITE r7, r7

      // Move this result up to the 16 high bits
      LSL r7, r7, 16
      OR reg_adc_data, reg_adc_data, r7

      // Store 2 ADC words in memory
      SBCO reg_adc_data, C_ADC_DAC_MEM, reg_adc_current, 4
      ADD reg_adc_current, reg_adc_current, 4

      // Toggle the high/low word for McASP control (since we send one word out of
      // 32 bits for each pair of SPI channels)
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)
	
      // Repeat 4 times for 8 channels (2 samples per loop, r1 += 1 already happened)
      // For 4 or 2 channels, repeat 2 or 1 times, according to flags
      ADD r1, r1, 1
      QBNE ADC_DAC_LOOP, r1, reg_num_channels
      QBA ADC_DAC_LOOP_DONE

SPI_SKIP_WRITE:
      // We get here only if the SPI ADC and DAC are disabled
      // Just keep the loop going for McASP

      // Toggle the high/low word for McASP control (since we send one word out of
      // 32 bits for each pair of SPI channels)
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_MCASP_HWORD)

      ADD r1, r1, 2
      QBNE ADC_DAC_LOOP, r1, reg_num_channels
	
ADC_DAC_LOOP_DONE:	
      // Increment number of frames, see if we have more to write
      ADD reg_frame_current, reg_frame_current, 1
      QBNE WRITE_LOOP, reg_frame_current, reg_frame_total

WRITE_LOOP_DONE:
      // Now done, swap the buffers and do the next one
      // Use r2 as a temp register
      MOV r2, reg_dac_buf0
      MOV reg_dac_buf0, reg_dac_buf1
      MOV reg_dac_buf1, r2
      MOV r2, reg_mcasp_buf0
      MOV reg_mcasp_buf0, reg_mcasp_buf1
      MOV reg_mcasp_buf1, r2

      // Notify ARM of buffer swap
      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1)
      AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
      SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4

      // Increment the frame count in the comm buffer (for status monitoring)
      LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
      ADD r2, r2, reg_frame_total
      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4

      // If LED blink enabled, toggle every 4096 frames
      LBBO r3, reg_comm_addr, COMM_LED_ADDRESS, 4
      QBEQ LED_BLINK_DONE, r3, 0	
      MOV r1, 0x1000
      AND r2, r2, r1          // Test (frame count & 4096)
      QBEQ LED_BLINK_OFF, r2, 0
      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4	
      MOV r1, GPIO_SETDATAOUT
      ADD r3, r3, r1          // Address for GPIO set register
      SBBO r2, r3, 0, 4       // Set GPIO pin
      QBA LED_BLINK_DONE
LED_BLINK_OFF:
      LBBO r2, reg_comm_addr, COMM_LED_PIN_MASK, 4
      MOV r1, GPIO_CLEARDATAOUT
      ADD r3, r3, r1          // Address for GPIO clear register
      SBBO r2, r3, 0, 4       // Clear GPIO pin	
LED_BLINK_DONE:	
	
      QBBC TESTLOW, reg_flags, FLAG_BIT_BUFFER1
      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_SETDATAOUT
      SBBO r2, r3, 0, 4
      QBA TESTDONE
TESTLOW:
      MOV r2, 1 << 28
      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
      SBBO r2, r3, 0, 4
TESTDONE:
	 
      // Check if we should finish: flag is zero as long as it should run
      LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
      QBEQ WRITE_ONE_BUFFER, r2, 0

CLEANUP:
      MCASP_REG_WRITE MCASP_GBLCTL, 0x00	// Turn off McASP

      // Turn off SPI if enabled
      QBBC SPI_CLEANUP_DONE, reg_flags, FLAG_BIT_USE_SPI
	
      MOV r3, SPI_BASE + SPI_CH0CONF
      LBBO r2, r3, 0, 4
      CLR r2, r2, 13
      CLR r2, r2, 27
      SBBO r2, r3, 0, 4

      MOV r3, SPI_BASE + SPI_CH0CTRL
      LBBO r2, r3, 0, 4
      CLR r2, r2, 1
      SBBO r2, r3, 0, 4      
SPI_CLEANUP_DONE:
	
      // Signal the ARM that we have finished 
      MOV R31.b0, PRU0_ARM_INTERRUPT + 16
      HALT