diff pru_rtaudio.p @ 67:472e892c6e41

Merge newapi into default
author Andrew McPherson <a.mcpherson@qmul.ac.uk>
date Fri, 17 Jul 2015 15:28:18 +0100
parents 0d80ff9e2227 579c86316008
children b697e82ebb25
line wrap: on
line diff
--- a/pru_rtaudio.p	Sun Feb 08 00:20:01 2015 +0000
+++ b/pru_rtaudio.p	Fri Jul 17 15:28:18 2015 +0100
@@ -31,7 +31,8 @@
 #define GPIO_CLEARDATAOUT 0x190
 #define GPIO_SETDATAOUT 0x194
 
-#define PRU0_ARM_INTERRUPT 19
+#define PRU0_ARM_INTERRUPT 19	// Interrupt signalling we're done
+#define PRU1_ARM_INTERRUPT 20	// Interrupt signalling a block is ready
 
 #define C_ADC_DAC_MEM C24     // PRU0 mem
 #ifdef DBOX_CAPE
@@ -81,7 +82,8 @@
 #define COMM_FRAME_COUNT      32	  // How many frames have elapse since beginning
 #define COMM_USE_SPI          36          // Whether or not to use SPI ADC and DAC
 #define COMM_NUM_CHANNELS     40	  // Low 2 bits indicate 8 [0x3], 4 [0x1] or 2 [0x0] channels
-	
+#define COMM_USE_DIGITAL  44	  // Whether or not to use DIGITAL
+
 #define MCASP0_BASE 0x48038000
 #define MCASP1_BASE 0x4803C000
 
@@ -180,10 +182,15 @@
 #define FLAG_BIT_BUFFER1	0
 #define FLAG_BIT_USE_SPI	1
 #define FLAG_BIT_MCASP_HWORD	2		// Whether we are on the high word for McASP transmission
-	
+#define FLAG_BIT_USE_DIGITAL 3
 // Registers used throughout
 
 // r1, r2, r3 are used for temporary storage
+#define MEM_DIGITAL_BASE 0x11000 //Base address for DIGITAL : Shared RAM + 0x400
+#define MEM_DIGITAL_BUFFER1_OFFSET 0x400 //Start pointer to DIGITAL_BUFFER1, which is 256 words after.
+// 256 is the maximum number of frames allowed
+
+#define reg_digital_current r6  // Pointer to current storage location of DIGITAL
 #define reg_num_channels	r9		// Number of SPI ADC/DAC channels to use
 #define reg_frame_current	r10		// Current frame count in SPI ADC/DAC transfer
 #define reg_frame_total		r11		// Total frame count for SPI ADC/DAC
@@ -205,182 +212,182 @@
 // r27, r28 used in macros
 #define reg_mcasp_addr		r29		// Base address for McASP
 
-// Convert float to 16-bit int, multiplying by 32768
-// Converts -1.0 to 1.0 to a full 16-bit range
-// input and output can safely be the same register
-.macro FLOAT_TO_INT16
-.mparam input, output
-      // int exponent = ((input >> 23) & 0xFF)
-      LSR r27, input, 23  // exponent goes in r27
-      AND r27, r27, 0xFF
+//0  P8_07 36 0x890/090 66 gpio2[2]
+//1  P8_08 37 0x894/094 67 gpio2[3]
+//2  P8_09 39 0x89c/09c 69 gpio2[5]
+//3  P8_10 38 0x898/098 68 gpio2[4]
+//4  P8_11 13 0x834/034 45 gpio1[13]
+//5  P8_12 12 0x830/030 44 gpio1[12]
+//6  P9_12 30 0x878/078 60 gpio1[28]
+//7  P9_14 18 0x848/048 50 gpio1[18]
+//8  P8_15 15 0x83c/03c 47 gpio1[15]
+//9  P8_16 14 0x838/038 46 gpio1[14]
+//10 P9_16 19 0x84c/04c 51 gpio1[19]
+//11 P8_18 35 0x88c/08c 65 gpio2[1]
+//12 P8_27 56 0x8e0/0e0 86 gpio2[22]
+//13 P8_28 58 0x8e8/0e8 88 gpio2[24]
+//14 P8_29 57 0x8e4/0e4 87 gpio2[23]
+//15 P8_30 59 0x8ec/0ec 89 gpio2[25]
 
-      // Actual exponent is 127 less than the above; below -15 we
-      // should return 0. So check if it is less than 112.
-      QBLE EXPONENT_GREQ_MINUS15, r27, 112
-      LDI output, 0
-      QBA FLOAT_TO_INT16_DONE
-EXPONENT_GREQ_MINUS15:	
+//generic GPIOs constants
+//#define GPIO1 0x4804c000
+#define GPIO2 0x481ac000
+//#define GPIO_CLEARDATAOUT 0x190 //SETDATAOUT is CLEARDATAOUT+4
+#define GPIO_OE 0x134 
+#define GPIO_DATAIN 0x138
 
-      // Next check if exponent is greater than or equal to 0 (i.e.
-      // 127 in our adjusted version. If so we return the max.
-      QBGT EXPONENT_LT_ZERO, r27, 127
-      QBBS NEGATIVE_MAX, input, 31  // Is sign negative?
-      LDI output, 32767		    // Max positive value
-      QBA FLOAT_TO_INT16_DONE
-NEGATIVE_MAX:
-      LDI output, 32768	            // Actually will be -32768 in signed
-      QBA FLOAT_TO_INT16_DONE
-EXPONENT_LT_ZERO:	
-
-      // Mask out the mantissa and shift
-      // int mantissa = (input & 0x7FFFFF) | (1 << 23)
-      MOV r28, 0x007FFFFF
-      AND r28, r28, input
-      SET r28, 23
-
-      // Shift right by -(exponent - 127 - 8) to produce an int
-      // after effectively multiplying by 2^15
-      // ---> (135 - exponent)
-      RSB r27, r27, 135
-      LSR r28, r28, r27
-
-      // Finally, check the sign bit and invert if needed
-      QBBS NEGATIVE_RESULT, input, 31
-      // Positive result: but might be 32768 so needs checking
-      LDI r27, 0x7FFF
-      MIN output, r27, r28	
-      QBA FLOAT_TO_INT16_DONE
-NEGATIVE_RESULT:
-      // Take negative: invert the bits and add 1
-      LDI r27, 0xFFFF
-      XOR r28, r28, r27
-      ADD r28, r28, 1
-      CLR output, r28, 16         // Clear carry bit if present
-FLOAT_TO_INT16_DONE:
+.macro READ_GPIO_BITS
+.mparam gpio_data, gpio_num_bit, digital_bit, digital
+    QBBC DONE, digital, digital_bit //if the pin is set as an output, nothing to do here
+    QBBC CLEAR, gpio_data, gpio_num_bit 
+    SET digital, digital_bit+16
+    QBA DONE
+    CLEAR:
+        CLR digital, digital_bit+16
+        QBA DONE
+    DONE:
 .endm
 
-
-// Convert float to 16-bit unsigned int, multiplying by 65536
-// Converts 0.0 to 1.0 to a full 16-bit range
-// input and output can safely be the same register
-.macro FLOAT_TO_UINT16
-.mparam input, output
-      QBBC NONNEGATIVE, input, 31   // Is sign negative?
-      LDI output, 0		    // All < 0 inputs produce 0 output
-      QBA FLOAT_TO_UINT16_DONE
-NONNEGATIVE:
-      // int exponent = ((input >> 23) & 0xFF)
-      LSR r27, input, 23  // exponent goes in r27
-      AND r27, r27, 0xFF
-
-      // Actual exponent is 127 less than the above; below -16 we
-      // should return 0. So check if it is less than 111.
-      QBLE EXPONENT_GREQ_MINUS16, r27, 111
-      LDI output, 0
-      QBA FLOAT_TO_UINT16_DONE
-EXPONENT_GREQ_MINUS16:	
-
-      // Next check if exponent is greater than or equal to 0 (i.e.
-      // 127 in our adjusted version. If so we return the max.
-      QBGT EXPONENT_LT_ZERO, r27, 127
-      LDI output, 65535		    // Max positive value
-      QBA FLOAT_TO_UINT16_DONE
-EXPONENT_LT_ZERO:	
-
-      // Mask out the mantissa and shift
-      // int mantissa = (input & 0x7FFFFF) | (1 << 23)
-      MOV r28, 0x007FFFFF
-      AND r28, r28, input
-      SET r28, 23
-
-      // Shift right by -(exponent - 127 - 7) to produce an int
-      // after effectively multiplying by 2^16
-      // ---> (134 - exponent)
-      RSB r27, r27, 134
-      LSR r28, r28, r27
-
-      // Check for 65536 and clip at 65535
-      LDI r27, 0xFFFF
-      MIN output, r27, r28	
-FLOAT_TO_UINT16_DONE:
+.macro SET_GPIO_BITS
+.mparam gpio_oe, gpio_setdataout, gpio_cleardataout, gpio_num_bit, digital_bit, digital //sets the bits in GPIO_OE, GPIO_SETDATAOUT and GPIO_CLEARDATAOUT
+//Remember that the GPIO_OE Output data enable register behaves as follows for each bit:
+//0 = The corresponding GPIO pin is configured as an output.
+//1 = The corresponding GPIO pin is configured as an input.
+    QBBS SETINPUT, digital, digital_bit 
+    CLR gpio_oe, gpio_num_bit //if it is an output, configure pin as output
+    QBBC CLEARDATAOUT, digital, digital_bit+16 // check the output value. If it is 0, branch
+    SET gpio_setdataout, gpio_num_bit //if it is 1, set output to high
+    QBA DONE
+CLEARDATAOUT:
+    SET gpio_cleardataout, gpio_num_bit // set output to low
+    QBA DONE
+SETINPUT: //if it is an input, set the relevant bit
+    SET gpio_oe, gpio_num_bit
+    QBA DONE
+DONE:
 .endm
 
-	
-// Convert a 16-bit int to float. This macro assumes that the upper
-// 16 bits of input are 0 and may behave strangely if this is not the case.
-// input and output must be different registers
-.macro INT16_TO_FLOAT
-.mparam input, output
-      // Check edge cases first: 0 and -32768 (= 32768 in unsigned)
-      QBNE INPUT_NOT_ZERO, input, 0
-      LDI output, 0
-      QBA INT16_TO_FLOAT_DONE
-INPUT_NOT_ZERO:
-      LDI r28, 32768
-      QBNE INPUT_NOT_MIN, input, r28
-      MOV output, 0xBF800000	// -1.0
-      QBA INT16_TO_FLOAT_DONE
-INPUT_NOT_MIN:
-      // Check for negative values = values with bit 15 set
-      MOV output, input
-      QBBC NEGATIVE_DONE, output, 15
-      LDI r28, 0xFFFF
-      XOR output, output, r28
-      ADD output, output, 1
-      CLR output, 16	 // Clear any carry bit
-NEGATIVE_DONE:
-      // Now we need to find the highest bit that is 1 in order to determine
-      // the exponent
-      LMBD r28, output, 1
+QBA START // when first starting, go to START, skipping this section.
 
-      // Calculate exponent field: 127 + 8 + (r28 - 23) = 112 + r28
-      ADD r27, r28, 112
-	
-      // Take 23 minus the result to get the shift	
-      RSB r28, r28, 23     
-      LSL output, output, r28
+DIGITAL:
+//IMPORTANT: do NOT use r28 in this macro, as it contains the return address for JAL
+//r27 is now the input word passed in render(), one word per frame
+//[31:16]: data(1=high, 0=low), [15:0]: direction (0=output, 1=input) )
 
-      // Now clear bit 23 (implicitly 1) and replace it with the exponent
-      CLR output, output, 23
-      LSL r27, r27, 23
-      OR  output, output, r27
-	
-      // Put the sign bit back in place
-      QBBC INT16_TO_FLOAT_DONE, input, 15
-      SET output, 31
-INT16_TO_FLOAT_DONE:	
-.endm
 
-// Convert a 16-bit unsigned int to float.
-.macro UINT16_TO_FLOAT
-.mparam input, output
-      MOV output, input
-	
-      // Clear upper 16 bits
-      LDI r27, 0xFFFF
-      AND output, output, r27
+//Preparing the gpio_oe, gpio_cleardataout and gpio_setdataout for each module
+//r2 will hold GPIO1_OE
+//load current status of GPIO_OE in r2
+    MOV r2, GPIO1 | GPIO_OE 
+    //it takes 190ns to go through the next instruction
+    LBBO r2, r2, 0, 4
+//GPIO1-start
+//process oe and datain and prepare dataout for GPIO1
+//r7 will contain GPIO1_CLEARDATAOUT
+//r8 will contain GPIO1_SETDATAOUT
+    MOV r8, 0 
+    MOV r7, 0
+//map GPIO_ANALOG to gpio1 pins,
+//r2 is gpio1_oe, r8 is gpio1_setdataout, r7 is gpio1_cleardataout, r27 is the input word
+//the following operations will read from r27 and update r2,r7,r8
+    SET_GPIO_BITS r2, r8, r7, 13, 4, r27
+    SET_GPIO_BITS r2, r8, r7, 12, 5, r27
+    SET_GPIO_BITS r2, r8, r7, 28, 6, r27
+    SET_GPIO_BITS r2, r8, r7, 18, 7, r27
+    SET_GPIO_BITS r2, r8, r7, 15, 8, r27
+    SET_GPIO_BITS r2, r8, r7, 14, 9, r27
+    SET_GPIO_BITS r2, r8, r7, 19, 10, r27
+//set the output enable register for gpio1.
+    MOV r3, GPIO1 | GPIO_OE  //use r3 as a temp register
+    SBBO r2, r3, 0, 4 //takes two cycles (10ns)
+//GPIO1-end
+// r2 is now unused
 
-      // If zero, we're done
-      QBEQ UINT16_TO_FLOAT_DONE, output, 0
-	
-      // Now we need to find the highest bit that is 1 in order to determine
-      // the exponent
-      LMBD r28, output, 1
+//GPIO2-start
+//r3 will hold GPIO1_OE
+//load current status of GPIO_OE in r3
+    MOV r3, GPIO2 | GPIO_OE  
+//it takes 200ns to go through the next instructions
+    LBBO r3, r3, 0, 4
+//process oe and datain and prepare dataout for GPIO2
+//r4 will contain GPIO2_CLEARDATAOUT
+//r5 will contain GPIO2_SETDATAOUT
+    MOV r5, 0
+    MOV r4, 0 
+//map GPIO_ANALOG to gpio2 pins
+//r3 is gpio2_oe, r5 is gpio2_setdataout, r4 is gpio2_cleardataout, r27 is the input word
+//the following operations will read from r27 and update r3,r4,r5
+    SET_GPIO_BITS r3, r5, r4, 2, 0, r27
+    SET_GPIO_BITS r3, r5, r4, 3, 1, r27
+    SET_GPIO_BITS r3, r5, r4, 5, 2, r27
+    SET_GPIO_BITS r3, r5, r4, 4, 3, r27
+    SET_GPIO_BITS r3, r5, r4, 1, 11, r27
+    SET_GPIO_BITS r3, r5, r4, 22, 12, r27
+    SET_GPIO_BITS r3, r5, r4, 24, 13, r27
+    SET_GPIO_BITS r3, r5, r4, 23, 14, r27
+    SET_GPIO_BITS r3, r5, r4, 25, 15, r27
+//set the output enable register for gpio2.
+    MOV r2, GPIO2 | GPIO_OE  //use r2 as a temp registerp
+    SBBO r3, r2, 0, 4 //takes two cycles (10ns)
+//GPIO2-end
+//r3 is now unused
 
-      // Calculate exponent field: 127 + 7 + (r28 - 23) = 111 + r28
-      ADD r27, r28, 111
-	
-      // Take 23 minus the result to get the shift	
-      RSB r28, r28, 23     
-      LSL output, output, r28
+//load current inputs in r2, r3
+//r2 will contain GPIO1_DATAIN
+//r3 will contain GPIO2_DATAIN
+//load the memory locations
+    MOV r2, GPIO1 | GPIO_DATAIN  
+    MOV r3, GPIO2 | GPIO_DATAIN  
+    //takes 375 nns to go through the next two instructions
+//read the datain
+    LBBO r2, r2, 0, 4
+    LBBO r3, r3, 0, 4
+//now read from r2 and r3 only the channels that are set as input in the lower word of r27 
+// and set their value in the high word of r27
+//GPIO1
+    READ_GPIO_BITS r2, 13, 4, r27
+    READ_GPIO_BITS r2, 12, 5, r27
+    READ_GPIO_BITS r2, 28, 6, r27
+    READ_GPIO_BITS r2, 18, 7, r27
+    READ_GPIO_BITS r2, 15, 8, r27
+    READ_GPIO_BITS r2, 14, 9, r27
+    READ_GPIO_BITS r2, 19, 10, r27
+//GPIO2
+    READ_GPIO_BITS r3, 2, 0, r27
+    READ_GPIO_BITS r3, 3, 1, r27
+    READ_GPIO_BITS r3, 5, 2, r27
+    READ_GPIO_BITS r3, 4, 3, r27
+    READ_GPIO_BITS r3, 1, 11, r27
+    READ_GPIO_BITS r3, 22, 12, r27
+    READ_GPIO_BITS r3, 24, 13, r27
+    READ_GPIO_BITS r3, 23, 14, r27
+    READ_GPIO_BITS r3, 25, 15, r27
+//r2, r3 are now unused
 
-      // Now clear bit 23 (implicitly 1) and replace it with the exponent
-      CLR output, output, 23
-      LSL r27, r27, 23
-      OR  output, output, r27	
-UINT16_TO_FLOAT_DONE:	
+//now all the setdataout and cleardataout are ready to be written to the GPIO register.
+//CLEARDATAOUT and SETDATAOUT are consecutive positions in memory, so we just write 8 bytes to CLEARDATAOUT.
+//We can do this because we chose cleardata and setdata registers for a given GPIO to be consecutive
+//load the memory addresses to be written to
+    MOV r2, GPIO1 | GPIO_CLEARDATAOUT //use r2 as a temp register
+    MOV r3, GPIO2 | GPIO_CLEARDATAOUT //use r3 as a temp register
+//write 8 bytes for each GPIO
+//takes 30ns in total to go through the following two instructions
+    SBBO r7, r2, 0, 8 //store r7 and r8 in GPIO1_CLEARDATAOUT and GPIO1_SETDATAOUT 
+                      //takes 145ns to be effective when going low, 185ns when going high
+    SBBO r4, r3, 0, 8 //store r4 and r5 in GPIO2_CLEARDATAOUT and GPIO2_SETDATAOUT 
+                     //takes 95ns to be effective when going low, 130ns when going high
+//reversing the order of the two lines above will swap the performances between the GPIO modules
+//i.e.: the first line will always take 145ns/185ns and the second one will always take 95ns/130ns, 
+//regardless of whether the order is gpio1-gpio2 or gpio2-gpio1
+JMP r28.w0 // go back to ADC_WRITE_AND_PROCESS_GPIO
+
+.macro HANG //useful for debugging
+DALOOP: 
+set r30.t14
+clr r30.t14
+QBA DALOOP
 .endm	
-	
+
 // Bring CS line low to write to DAC
 .macro DAC_CS_ASSERT
       MOV r27, DAC_CS_PIN
@@ -466,6 +473,30 @@
       ADC_CS_UNASSERT
 .endm
 
+// Complete ADC write+read with chip select and also performs IO for digital
+.macro ADC_WRITE_GPIO
+.mparam in, out, do_gpio
+      ADC_CS_ASSERT
+      ADC_TX in
+      QBBC GPIO_DONE, reg_flags, FLAG_BIT_USE_DIGITAL //skip if DIGITAL is disabled
+      AND r27, do_gpio, 0x3 // only do a DIGITAL every 2 SPI I/O
+      QBNE GPIO_DONE, r27, 0 
+//from here to GPIO_DONE takes 1.8us, while usually ADC_WAIT_FOR_FINISH only waits for 1.14us.
+//TODO: it would be better to split the DIGITAL stuff in two parts:
+//- one taking place during DAC_WRITE which sets the GPIO_OE
+//- and the other during ADC_WRITE which actually reads DATAIN and writes CLEAR/SET DATAOUT
+                            //r27 is actually r27, so do not use r27 from here to ...
+      LBBO r27, reg_digital_current, 0, 4 
+      JAL r28.w0, DIGITAL // note that this is not called as a macro, but with JAL. r28 will contain the return address
+      SBBO r27, reg_digital_current, 0,   4 
+                            //..here you can start using r27 again
+      ADD reg_digital_current, reg_digital_current, 4 //increment pointer
+GPIO_DONE:
+      ADC_WAIT_FOR_FINISH
+      ADC_RX out
+      ADC_CS_UNASSERT
+.endm
+
 // Write a McASP register
 .macro MCASP_REG_WRITE
 .mparam reg, value
@@ -537,11 +568,27 @@
       // Default number of channels in case SPI disabled
       LDI reg_num_channels, 8
 	
+      // Find out whether we should use DIGITAL
+      LBBO r2, reg_comm_addr, COMM_USE_DIGITAL, 4
+      QBEQ DIGITAL_INIT_DONE, r2, 0 // if we use digital
+      SET reg_flags, reg_flags, FLAG_BIT_USE_DIGITAL 
+/* This block of code is not really needed, as the memory is initialized by ARM before the PRU is started.
+Will leave it here for future reference
+DIGITAL_INIT: //set the digital buffer to 0x0000ffff (all inputs), to prevent unwanted high outputs
+              //the loop is unrolled by a factor of four just to take advantage of the speed of SBBO on larger byte bursts, but there is no real need for it
+      MOV r2, 0x0000ffff //value to store. 0x0000ffff means all inputs
+      MOV r3, MEM_DIGITAL_BASE //start of the digital buffer
+      MOV r4, MEM_DIGITAL_BASE+2*MEM_DIGITAL_BUFFER1_OFFSET //end of the digital buffer
+DIGITAL_INIT_BUFFER_LOOP:
+      SBBO r2, r3, 0, 4 
+      ADD r3, r3, 4 //increment pointer
+      QBGT DIGITAL_INIT_BUFFER_LOOP, r3, r4 //loop until we reach the end of the buffer
+*/
+DIGITAL_INIT_DONE:
       // Find out whether we should use SPI ADC and DAC
       LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
       QBEQ SPI_FLAG_CHECK_DONE, r2, 0
       SET reg_flags, reg_flags, FLAG_BIT_USE_SPI
-
 SPI_FLAG_CHECK_DONE:
       // If we don't use SPI, then skip all this init
       QBBC SPI_INIT_DONE, reg_flags, FLAG_BIT_USE_SPI
@@ -614,45 +661,45 @@
       ADC_WRITE r2, r2
 SPI_INIT_DONE:	
 	
-      // Prepare McASP0 for audio
-      MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
-      MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
-      MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
-      MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
-      MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
-      MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
-      MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0
+// Prepare McASP0 for audio
+MCASP_REG_WRITE MCASP_GBLCTL, 0			// Disable McASP
+MCASP_REG_WRITE_EXT MCASP_SRCTL0, 0		// All serialisers off
+MCASP_REG_WRITE_EXT MCASP_SRCTL1, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL2, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL3, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL4, 0
+MCASP_REG_WRITE_EXT MCASP_SRCTL5, 0
 
-      MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
-      MCASP_REG_WRITE MCASP_PFUNC, 0x00			// All pins are McASP
-      MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
-      MCASP_REG_WRITE MCASP_DLBCTL, 0x00
-      MCASP_REG_WRITE MCASP_DITCTL, 0x00
-      MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
-      MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
-      MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
-      MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
-      MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
-      MCASP_REG_WRITE MCASP_RTDM, 0x03			// Enable TDM slots 0 and 1
-      MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
-      MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
-      MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
-      MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
-      MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
-      MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
-      MCASP_REG_WRITE MCASP_XTDM, 0x03			// Enable TDM slots 0 and 1
-      MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
+MCASP_REG_WRITE MCASP_PWRIDLESYSCONFIG, 0x02	// Power on
+MCASP_REG_WRITE MCASP_PFUNC, 0x00		// All pins are McASP
+MCASP_REG_WRITE MCASP_PDIR, MCASP_OUTPUT_PINS	// Set pin direction
+MCASP_REG_WRITE MCASP_DLBCTL, 0x00
+MCASP_REG_WRITE MCASP_DITCTL, 0x00
+MCASP_REG_WRITE MCASP_RMASK, MCASP_DATA_MASK	// 16 bit data receive
+MCASP_REG_WRITE MCASP_RFMT, MCASP_DATA_FORMAT	// Set data format
+MCASP_REG_WRITE MCASP_AFSRCTL, 0x100		// I2S mode
+MCASP_REG_WRITE MCASP_ACLKRCTL, 0x80		// Sample on rising edge
+MCASP_REG_WRITE MCASP_AHCLKRCTL, 0x8001		// Internal clock, not inv, /2; irrelevant?
+MCASP_REG_WRITE MCASP_RTDM, 0x03		// Enable TDM slots 0 and 1
+MCASP_REG_WRITE MCASP_RINTCTL, 0x00		// No interrupts
+MCASP_REG_WRITE MCASP_XMASK, MCASP_DATA_MASK	// 16 bit data transmit
+MCASP_REG_WRITE MCASP_XFMT, MCASP_DATA_FORMAT	// Set data format
+MCASP_REG_WRITE MCASP_AFSXCTL, 0x100		// I2S mode
+MCASP_REG_WRITE MCASP_ACLKXCTL, 0x00		// Transmit on rising edge, sync. xmit and recv
+MCASP_REG_WRITE MCASP_AHCLKXCTL, 0x8001		// External clock from AHCLKX
+MCASP_REG_WRITE MCASP_XTDM, 0x03		// Enable TDM slots 0 and 1
+MCASP_REG_WRITE MCASP_XINTCTL, 0x00		// No interrupts
 	
-      MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
-      MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
-      MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00		// Disable FIFOs
-      MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00
+MCASP_REG_WRITE_EXT MCASP_SRCTL_R, 0x02		// Set up receive serialiser
+MCASP_REG_WRITE_EXT MCASP_SRCTL_X, 0x01		// Set up transmit serialiser
+MCASP_REG_WRITE_EXT MCASP_WFIFOCTL, 0x00	// Disable FIFOs
+MCASP_REG_WRITE_EXT MCASP_RFIFOCTL, 0x00
 
-      MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
-      MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors
+MCASP_REG_WRITE MCASP_XSTAT, 0xFF		// Clear transmit errors
+MCASP_REG_WRITE MCASP_RSTAT, 0xFF		// Clear receive errors
 
-      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
-      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 1)	// Set RHCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 9)	// Set XHCLKRST
 
 // The above write sequence will have temporarily changed the AHCLKX frequency
 // The PLL needs time to settle or the sample rate will be unstable and possibly
@@ -663,7 +710,7 @@
       MOV r3, GPIO1 + GPIO_SETDATAOUT
       SBBO r2, r3, 0, 4
 
-      MOV r2, 100000
+MOV r2, 100000
 MCASP_INIT_WAIT:	
       SUB r2, r2, 1
       QBNE MCASP_INIT_WAIT, r2, 0
@@ -672,30 +719,62 @@
       MOV r3, GPIO1 + GPIO_CLEARDATAOUT
       SBBO r2, r3, 0, 4
 	
-      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
-      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
-      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
-      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 10)	// Set XSRCLR
-      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 3)	// Set RSMRST
-      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 11)	// Set XSMRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 0)	// Set RCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 8)	// Set XCLKRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 2)	// Set RSRCLR
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 10)	// Set XSRCLR
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 3)	// Set RSMRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 11)	// Set XSMRST
 
-      MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00		// Write to the transmit buffer to prevent underflow
+MCASP_REG_WRITE_EXT MCASP_XBUF, 0x00		// Write to the transmit buffer to prevent underflow
 
-      MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 4)	// Set RFRST
-      MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 12)	// Set XFRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_RGBLCTL, (1 << 4)	// Set RFRST
+MCASP_REG_SET_BIT_AND_POLL MCASP_XGBLCTL, (1 << 12)	// Set XFRST
 
 // Initialisation
-      LBBO reg_frame_total, reg_comm_addr, COMM_BUFFER_FRAMES, 4  // Total frame count (SPI; 0.5x-2x for McASP)
-      MOV reg_dac_buf0, 0                      // DAC buffer 0 start pointer
-      LSL reg_dac_buf1, reg_frame_total, 1     // DAC buffer 1 start pointer = N[ch]*2[bytes]*bufsize
-      LMBD r2, reg_num_channels, 1		 // Returns 1, 2 or 3 depending on the number of channels
-      LSL reg_dac_buf1, reg_dac_buf1, r2	 // Multiply by 2, 4 or 8 to get the N[ch] scaling above
-      MOV reg_mcasp_buf0, 0			 // McASP DAC buffer 0 start pointer
-      LSL reg_mcasp_buf1, reg_frame_total, r2    // McASP DAC buffer 1 start pointer = 2[ch]*2[bytes]*(N/4)[samples/spi]*bufsize
-      CLR reg_flags, reg_flags, FLAG_BIT_BUFFER1 // Bit 0 holds which buffer we are on
-      MOV r2, 0
-      SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4  // Start with frame count of 0
-	
+    LBBO reg_frame_total, reg_comm_addr, COMM_BUFFER_FRAMES, 4  // Total frame count (SPI; 0.5x-2x for McASP)
+    MOV reg_dac_buf0, 0                      // DAC buffer 0 start pointer
+    LSL reg_dac_buf1, reg_frame_total, 1     // DAC buffer 1 start pointer = N[ch]*2[bytes]*bufsize
+    LMBD r2, reg_num_channels, 1		 // Returns 1, 2 or 3 depending on the number of channels
+    LSL reg_dac_buf1, reg_dac_buf1, r2	 // Multiply by 2, 4 or 8 to get the N[ch] scaling above
+    MOV reg_mcasp_buf0, 0			 // McASP DAC buffer 0 start pointer
+    LSL reg_mcasp_buf1, reg_frame_total, r2  // McASP DAC buffer 1 start pointer = 2[ch]*2[bytes]*(N/4)[samples/spi]*bufsize
+    CLR reg_flags, reg_flags, FLAG_BIT_BUFFER1  // Bit 0 holds which buffer we are on
+    MOV r2, 0
+    SBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4  // Start with frame count of 0
+/* This block of code is not really needed, as the memory is initialized by ARM before the PRU is started.
+Will leave it here for future reference
+//Initialise all SPI and audio buffers (DAC0, DAC1, ADC0, ADC1) to zero.
+//This is useful for analog outs so they do not have spikes during the first buffer.
+//This is not very useful for audio, as you still hear the initial "tumpf" when the converter starts 
+//and each sample in the DAC buffer is reset to 0 after it is written to the DAC.
+
+    QBBC SPI_INIT_BUFFER_DONE, reg_flags, FLAG_BIT_USE_SPI
+//Initialize SPI buffers
+//compute the memory offset of the end of the audio buffer and store it in r4
+    SUB r4, reg_dac_buf1, reg_dac_buf0 // length of the buffer, assumes reg_dac_buf1>ref_dac_buf0
+    LSL r4, r4, 2 //length of four buffers (DAC0, DAC1, ADC0, ADC1)
+    ADD r4, reg_dac_buf0, r4 //total offset
+    MOV r2, 0// value to store
+    MOV r3, 0 // offset counter
+SPI_INIT_BUFFER_LOOP:
+    SBCO r2, C_ADC_DAC_MEM, r3, 4
+    ADD r3, r3, 4
+    QBGT SPI_INIT_BUFFER_LOOP, r3, r4
+SPI_INIT_BUFFER_DONE:
+
+//Initialize audio buffers
+//compute the memory offset of the end of the audio buffer and store it in r4
+    SUB r4, reg_mcasp_buf1, reg_mcasp_buf0 // length of the buffer, assumes reg_mcasp_buf1>ref_mcasp_buf0
+    LSL r4, r4, 2 //length of four buffers (DAC0, DAC1, ADC0, ADC1)
+    ADD r4, reg_mcasp_buf0, r4 //total offset
+    MOV r2, 0 // value to store
+    MOV r3, 0 // offset counter
+    MCASP_INIT_BUFFER_LOOP:
+    SBCO r2, C_MCASP_MEM, r3, 4
+    ADD r3, r3, 4
+    QBGT MCASP_INIT_BUFFER_LOOP, r3, r4
+*/
 // Here we are out of sync by one TDM slot since the 0 word transmitted above will have occupied
 // the first output slot. Send one more word before jumping into the loop.
 MCASP_DAC_WAIT_BEFORE_LOOP:	
@@ -714,6 +793,7 @@
       MCASP_REG_READ_EXT MCASP_RBUF, r2
 	
 WRITE_ONE_BUFFER:
+
       // Write a single buffer of DAC samples and read a buffer of ADC samples
       // Load starting positions
       MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
@@ -726,7 +806,14 @@
       LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
       ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
       MOV reg_frame_current, 0
-	
+      QBBS DIGITAL_BASE_CHECK_SET, reg_flags, FLAG_BIT_BUFFER1  //check which buffer we are using for DIGITAL
+                  // if we are here, we are using buffer0 
+      MOV reg_digital_current, MEM_DIGITAL_BASE
+      QBA DIGITAL_BASE_CHECK_DONE
+DIGITAL_BASE_CHECK_SET: //if we are here, we are using buffer1 
+      MOV reg_digital_current, MEM_DIGITAL_BASE+MEM_DIGITAL_BUFFER1_OFFSET //so adjust offset appropriately
+DIGITAL_BASE_CHECK_DONE:
+
 WRITE_LOOP:
       // Write N channels to DAC from successive values in memory
       // At the same time, read N channels from ADC
@@ -805,7 +892,7 @@
       ADD reg_mcasp_adc_current, reg_mcasp_adc_current, 4
 MCASP_ADC_DONE:	
       QBBC SPI_SKIP_WRITE, reg_flags, FLAG_BIT_USE_SPI
-	
+
       // DAC: transmit low word (first in little endian)
       MOV r2, 0xFFFF
       AND r7, reg_dac_data, r2
@@ -826,7 +913,9 @@
       LSL r8, r8, AD7699_CHANNEL_OFFSET
       MOV r7, AD7699_CFG_MASK
       OR r7, r7, r8
-      ADC_WRITE r7, r7
+
+//ssssssssssssssssssssssssssss
+      ADC_WRITE_GPIO r7, r7, r1
 
       // Mask out only the relevant 16 bits and store in reg_adc_data
       MOV r2, 0xFFFF
@@ -872,7 +961,6 @@
       ADD r1, r1, 1
       QBNE ADC_DAC_LOOP, r1, reg_num_channels
       QBA ADC_DAC_LOOP_DONE
-
 SPI_SKIP_WRITE:
       // We get here only if the SPI ADC and DAC are disabled
       // Just keep the loop going for McASP
@@ -898,12 +986,13 @@
       MOV r2, reg_mcasp_buf0
       MOV reg_mcasp_buf0, reg_mcasp_buf1
       MOV reg_mcasp_buf1, r2
+      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1) //flip the buffer flag
 
       // Notify ARM of buffer swap
-      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1)
       AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
       SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4
-
+      MOV R31.b0, PRU1_ARM_INTERRUPT + 16           // Interrupt to host loop
+	
       // Increment the frame count in the comm buffer (for status monitoring)
       LBBO r2, reg_comm_addr, COMM_FRAME_COUNT, 4
       ADD r2, r2, reg_frame_total
@@ -926,18 +1015,6 @@
       ADD r3, r3, r1          // Address for GPIO clear register
       SBBO r2, r3, 0, 4       // Clear GPIO pin	
 LED_BLINK_DONE:	
-	
-      QBBC TESTLOW, reg_flags, FLAG_BIT_BUFFER1
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_SETDATAOUT
-      SBBO r2, r3, 0, 4
-      QBA TESTDONE
-TESTLOW:
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
-      SBBO r2, r3, 0, 4
-TESTDONE:
-	 
       // Check if we should finish: flag is zero as long as it should run
       LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
       QBEQ WRITE_ONE_BUFFER, r2, 0
@@ -959,7 +1036,6 @@
       CLR r2, r2, 1
       SBBO r2, r3, 0, 4      
 SPI_CLEANUP_DONE:
-	
       // Signal the ARM that we have finished 
       MOV R31.b0, PRU0_ARM_INTERRUPT + 16
-      HALT
\ No newline at end of file
+      HALT