changeset 16:670be80463a3 matrix_gpio

- analog matrixIn/matrixOut are now mapped as floats from 0 to 1 - use of an external PRU code can be enabled with -P <filename> - 16 channels of programmable GPIO can be accessed straight from render() either writing directly to the matrixGpio[] array or using digitalWrite(), digitalRead(), setDigitalDirection() macros from Utilities.h .
author Giulio Moro <giuliomoro@yahoo.it>
date Mon, 27 Apr 2015 13:01:57 +0100
parents 901d205d1a3c
children 85e8b08a7471
files core/PRU.cpp core/RTAudio.cpp core/RTAudioCommandLine.cpp include/PRU.h include/RTAudio.h include/Utilities.h include/matrix_gpio_mapping.h include/pru_rtaudio_bin.h include/render.h pru_rtaudio.bin pru_rtaudio.p
diffstat 11 files changed, 912 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/core/PRU.cpp	Sat Feb 07 16:41:56 2015 +0000
+++ b/core/PRU.cpp	Mon Apr 27 13:01:57 2015 +0100
@@ -16,6 +16,7 @@
 #include "../include/PRU.h"
 #include "../include/prussdrv.h"
 #include "../include/pruss_intc_mapping.h"
+#include "../include/matrix_gpio_mapping.h"
 #include "../include/GPIOcontrol.h"
 #include "../include/render.h"
 #include "../include/pru_rtaudio_bin.h"
@@ -26,6 +27,7 @@
 #include <cerrno>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <unistd.h>
 
 // Xenomai-specific includes
 #include <sys/mman.h>
@@ -40,7 +42,9 @@
 #define PRU_MEM_DAC_OFFSET 0x0     // Offset within PRU0 RAM
 #define PRU_MEM_DAC_LENGTH 0x2000  // Length of ADC+DAC memory, in bytes
 #define PRU_MEM_COMM_OFFSET 0x0    // Offset within PRU-SHARED RAM
-
+#define PRU_MEM_MATRIX_GPIO_OFFSET 0x1000 //Offset within PRU-SHARED RAM
+#define MEM_MATRIX_GPIO_BUFFER1_OFFSET 0x400 //Start pointer to MATRIX_GPIO_BUFFER1, which is 256 words.
+											// 256 is the maximum number of frames allowed
 #define PRU_SHOULD_STOP 	0
 #define PRU_CURRENT_BUFFER  1
 #define PRU_BUFFER_FRAMES   2
@@ -52,6 +56,26 @@
 #define PRU_FRAME_COUNT		8
 #define PRU_USE_SPI			9
 #define PRU_SPI_NUM_CHANNELS 10
+#define PRU_USE_GPIO_MATRIX 11
+
+short int matrixGpioPins[NUM_MATRIX_GPIOS]={
+		GPIO_NO_BIT_0,
+		GPIO_NO_BIT_1,
+		GPIO_NO_BIT_2,
+		GPIO_NO_BIT_3,
+		GPIO_NO_BIT_4,
+		GPIO_NO_BIT_5,
+		GPIO_NO_BIT_6,
+		GPIO_NO_BIT_7,
+		GPIO_NO_BIT_8,
+		GPIO_NO_BIT_9,
+		GPIO_NO_BIT_10,
+		GPIO_NO_BIT_11,
+		GPIO_NO_BIT_12,
+		GPIO_NO_BIT_13,
+		GPIO_NO_BIT_14,
+		GPIO_NO_BIT_15,
+};
 
 #define PRU_SAMPLE_INTERVAL_NS 11338	// 88200Hz per SPI sample = 11.338us
 
@@ -103,7 +127,7 @@
 // viewed on a scope. If include_led is set,
 // user LED 3 on the BBB is taken over by the PRU
 // to indicate activity
-int PRU::prepareGPIO(int use_spi, int include_test_pin, int include_led)
+int PRU::prepareGPIO(int use_spi, int use_matrix_gpio, int include_test_pin, int include_led)
 {
 	if(use_spi) {
 		// Prepare DAC CS/ pin: output, high to begin
@@ -141,6 +165,27 @@
 		spi_enabled = true;
 	}
 
+	if(use_matrix_gpio){
+		printf("gNumMatrixGpioChannels: %d;\n",gNumMatrixGpioChannels);
+		for(int i=0; i<gNumMatrixGpioChannels; i++){
+			if(gpio_export(matrixGpioPins[i])) {
+				if(gRTAudioVerbose)
+					cout << "Warning: couldn't export matrix GPIO pin " << matrixGpioPins[i] << "\n";
+			}
+			if(gpio_set_dir(matrixGpioPins[i], OUTPUT_PIN)) {
+				if(gRTAudioVerbose)
+					cout << "Couldn't set direction on matrix GPIO pin " << matrixGpioPins[i] << "\n";
+				return -1;
+			}
+			if(gpio_set_value(matrixGpioPins[i], HIGH)) {
+				if(gRTAudioVerbose)
+					cout << "Couldn't set value on matrix GPIO pin " << matrixGpioPins[i] << "\n";
+				return -1;
+			}
+		}
+		matrix_gpio_enabled=true;
+	}
+
 	if(include_test_pin) {
 		// Prepare GPIO test output (for debugging), low to begin
 		if(gpio_export(kPruGPIOTestPin)) {
@@ -210,6 +255,11 @@
 		gpio_unexport(kPruGPIODACSyncPin);
 		gpio_unexport(kPruGPIOADCSyncPin);
 	}
+	if(matrix_gpio_enabled){
+		for(int i=0; i<gNumMatrixGpioChannels; i++){
+			gpio_unexport(matrixGpioPins[i]);
+		}
+	}
 	if(gpio_test_pin_enabled) {
 		gpio_unexport(kPruGPIOTestPin);
 		gpio_unexport(kPruGPIOTestPin2);
@@ -221,7 +271,6 @@
 		// rather than the system default
 		led_set_trigger(3, "mmc1");
 	}
-
 	gpio_enabled = gpio_test_pin_enabled = false;
 }
 
@@ -260,6 +309,7 @@
 
     spi_buffer_frames = frames_per_buffer;
     audio_buffer_frames = spi_buffer_frames * spi_num_channels / 4;
+    matrix_gpio_buffer_frames = audio_buffer_frames;
 
     /* Map PRU memory to pointers */
 	prussdrv_map_prumem (PRUSS0_SHARED_DATARAM, (void **)&pruMem);
@@ -280,6 +330,13 @@
 		pru_buffer_spi_dac = pru_buffer_spi_adc = 0;
 	}
 
+	if(matrix_gpio_enabled) {
+		prussdrv_map_prumem (PRUSS0_SHARED_DATARAM, (void **)&pruMem);
+		pru_buffer_matrix_gpio = (uint32_t *)&pruMem[PRU_MEM_MATRIX_GPIO_OFFSET/sizeof(uint32_t)];
+	}
+	else {
+		pru_buffer_matrix_gpio = 0;
+	}
     /* Set up flags */
     pru_buffer_comm[PRU_SHOULD_STOP] = 0;
     pru_buffer_comm[PRU_CURRENT_BUFFER] = 0;
@@ -303,6 +360,14 @@
     	pru_buffer_comm[PRU_USE_SPI] = 0;
     	pru_buffer_comm[PRU_SPI_NUM_CHANNELS] = 0;
     }
+    if(matrix_gpio_enabled) {
+    	pru_buffer_comm[PRU_USE_GPIO_MATRIX] = 1;
+    	pru_buffer_comm[NUM_MATRIX_GPIOS] = spi_num_channels;
+    }
+    else {
+    	pru_buffer_comm[PRU_USE_GPIO_MATRIX] = 0;
+    	pru_buffer_comm[NUM_MATRIX_GPIOS] = 0;
+    }
 
     /* Clear ADC and DAC memory */
     if(spi_enabled) {
@@ -311,7 +376,7 @@
     }
 	for(int i = 0; i < PRU_MEM_MCASP_LENGTH / 2; i++)
 		pru_buffer_audio_dac[i] = 0;
-
+//TODO: maybe the lines below are to be deleted, as we removed the test code from pru_rtaudio.p ?
 	/* If using GPIO test pin for Xenomai (for debugging), initialise the pointer now */
 	if(xenomai_test_pin && xenomai_gpio_fd < 0) {
 		xenomai_gpio_fd = open("/dev/mem", O_RDWR);
@@ -332,23 +397,34 @@
 }
 
 // Run the code image in the specified file
-int PRU::start()
+int PRU::start(char * const filename)
 {
 	/* Clear any old interrupt */
 	if(pru_number == 0)
 		prussdrv_pru_clear_event(PRU_EVTOUT_0, PRU0_ARM_INTERRUPT);
 	else
 		prussdrv_pru_clear_event(PRU_EVTOUT_1, PRU1_ARM_INTERRUPT);
-
-    /* Load and execute binary on PRU */
-	if(prussdrv_exec_code(pru_number, PRUcode, sizeof(PRUcode))) {
-    	rt_printf("Failed to execute PRU code\n");
-    	return 1;
-    }
+	/* Load and execute binary on PRU */
+	if(filename[0] == '\0') { //if the string is empty, load the embedded code
+		if(gRTAudioVerbose)
+			rt_printf("Using embedded PRU code\n");
+		if(prussdrv_exec_code(pru_number, PRUcode, sizeof(PRUcode))) {
+			rt_printf("Failed to execute PRU code\n");
+			return 1;
+		}
+	} else {
+		if(gRTAudioVerbose)
+			rt_printf("Using PRU code from %s\n",filename);
+		if(prussdrv_exec_program(pru_number, filename)) {
+			rt_printf("Failed to execute PRU code from %s\n", filename);
+			return 1;
+		}
+	}
 
     running = true;
     return 0;
 }
+uint32_t empty[1024]={0x0};
 
 // Main loop to read and write data from/to PRU
 void PRU::loop()
@@ -356,15 +432,34 @@
 	// Polling interval is 1/4 of the period
 	RTIME sleepTime = PRU_SAMPLE_INTERVAL_NS * (spi_num_channels / 2) * spi_buffer_frames / 4;
 	float *audioInBuffer, *audioOutBuffer;
+	float *matrixInBuffer, *matrixOutBuffer;
+	uint32_t *matrixGpioBuffer0, *matrixGpioBuffer1, *matrixGpioBufferTemp;
 
 	audioInBuffer = (float *)malloc(2 * audio_buffer_frames * sizeof(float));
 	audioOutBuffer = (float *)malloc(2 * audio_buffer_frames * sizeof(float));
-
+	matrixInBuffer = (float *)malloc(spi_num_channels * spi_buffer_frames * sizeof(float));
+	matrixOutBuffer = (float *)malloc(spi_num_channels * spi_buffer_frames * sizeof(float));
+	matrixGpioBuffer0 = pru_buffer_matrix_gpio; 
+	matrixGpioBuffer1 = pru_buffer_matrix_gpio+MEM_MATRIX_GPIO_BUFFER1_OFFSET/sizeof(uint32_t);
+   matrix_gpio_buffer_frames = matrix_gpio_enabled ? audio_buffer_frames : 0; //TODO: find a more elegant solution for when the matrix_gpio is disabled e.g.:
+    																	// - embed in the digitalWrite/Read macros a check whether matrix_gpio is enabled
+    																	// - allocate some memory in ARM just to allow render() to run regardless.
+    																	//     in this case it can be matrixGpioBuffer0 == matrixGpioBuffer1
+	printf("matrix_gpio_buffer_frames: %d;\n",matrix_gpio_buffer_frames);
+    matrixGpioBufferTemp = (uint32_t *)malloc(matrix_gpio_buffer_frames*sizeof(uint32_t)); //temp buffer to hold previous states
 	if(audioInBuffer == 0 || audioOutBuffer == 0) {
-		rt_printf("Error: couldn't allocated audio buffers\n");
+		rt_printf("Error: couldn't allocate audio buffers\n");
 		return;
 	}
-
+	if(matrixInBuffer == 0 || matrixOutBuffer == 0) {
+		rt_printf("Error: couldn't allocate matrix buffers\n");
+		return;
+	}
+	if(matrixGpioBufferTemp == 0) {
+		rt_printf("Error: couldn't allocate matrix GPIO buffers\n");
+		return;
+	} 
+	
 	while(!gShouldStop) {
 		// Wait for PRU to move to buffer 1
 		while(pru_buffer_comm[PRU_CURRENT_BUFFER] == 0 && !gShouldStop) {
@@ -383,14 +478,40 @@
 		// Convert short (16-bit) samples to float
 		for(unsigned int n = 0; n < 2 * audio_buffer_frames; n++)
 			audioInBuffer[n] = (float)pru_buffer_audio_adc[n] / 32768.0;
-
-		if(spi_enabled)
-			render(spi_buffer_frames, audio_buffer_frames, audioInBuffer, audioOutBuffer,
-					pru_buffer_spi_adc, pru_buffer_spi_dac);
+		if(spi_enabled) {
+			for(unsigned int n = 0; n < spi_num_channels * spi_buffer_frames; n++)
+				matrixInBuffer[n] = (float)pru_buffer_spi_adc[n] / 65536.0;
+        //use past matrix_gpio values to initialize the array properly:
+        //- pins previously set as outputs will keep their previously set output value,
+        //- pins previously set as inputs will carry the newly read input value
+            if(matrix_gpio_enabled){
+                for(unsigned int n = 0; n < matrix_gpio_buffer_frames; n++){ 
+                    uint16_t inputs=matrixGpioBufferTemp[n]&0xffff;//half-word, has 1 for inputs and 0 for outputs
+//                    printf("inputs: 0x%x\n",inputs);
+                    uint16_t outputs=~inputs; //half-word has 1 for outputs and 0 for inputs;
+                    matrixGpioBuffer0[n]=(matrixGpioBufferTemp[n]&(outputs<<16))| //keep output values set in previous matrixGpioBuffer1[n]
+                                         (matrixGpioBuffer0[n]&(inputs<<16))   | //inputs from current matrixGpioBuffer0[n];
+                                         (matrixGpioBufferTemp[n]&(inputs));     //keep pin configuration from previous matrixGpioBuffer1[n]
+//                    matrixGpioBuffer0[n]=matrixGpioBufferTemp[n]; //ignores inputs
+                }
+            }
+			render(spi_buffer_frames, matrix_gpio_buffer_frames, audio_buffer_frames, audioInBuffer, audioOutBuffer,
+					matrixInBuffer, matrixOutBuffer, matrixGpioBuffer0);
+			for(unsigned int n = 0; n < spi_num_channels * spi_buffer_frames; n++) {
+				int out = matrixOutBuffer[n] * 65536.0;
+				if(out < 0) out = 0;
+				else if(out > 65535) out = 65535;
+				pru_buffer_spi_dac[n] = (uint16_t)out;
+			}
+            if(matrix_gpio_enabled){ // keep track of past matrix_gpio values
+                for(unsigned int n = 0; n < matrix_gpio_buffer_frames; n++){ 
+                    matrixGpioBufferTemp[n]=matrixGpioBuffer0[n];
+                }
+            }
+		}
 		else
-			render(0, audio_buffer_frames, audioInBuffer, audioOutBuffer, 0, 0);
-
-		// Convert float back to short
+			render(0, 0, audio_buffer_frames, audioInBuffer, audioOutBuffer, 0, 0, 0); // we still pass matrixGpioBuffer, just it is unused
+        // Convert float back to short
 		for(unsigned int n = 0; n < 2 * audio_buffer_frames; n++) {
 			int out = audioOutBuffer[n] * 32768.0;
 			if(out < -32768) out = -32768;
@@ -422,11 +543,39 @@
 		for(unsigned int n = 0; n < 2 * audio_buffer_frames; n++)
 			audioInBuffer[n] = (float)pru_buffer_audio_adc[n + audio_buffer_frames * 2] / 32768.0;
 
-		if(spi_enabled)
-			render(spi_buffer_frames, audio_buffer_frames, audioInBuffer, audioOutBuffer,
-					&pru_buffer_spi_adc[spi_buffer_frames * spi_num_channels], &pru_buffer_spi_dac[spi_buffer_frames * spi_num_channels]);
+		if(spi_enabled) {
+			for(unsigned int n = 0; n < spi_num_channels * spi_buffer_frames; n++)
+				matrixInBuffer[n] = (float)pru_buffer_spi_adc[n + spi_buffer_frames * spi_num_channels] / 65536.0;
+
+            //use past matrix_gpio values to initialize the array properly:
+            //- pins previously set as outputs will keep their previously set output value,
+            //- pins previously set as inputs will carry the newly read input value
+            if(matrix_gpio_enabled){
+                for(unsigned int n = 0; n < matrix_gpio_buffer_frames; n++){ 
+                    uint16_t inputs=matrixGpioBufferTemp[n]&0xffff;//half-word, has 1 for inputs and 0 for outputs
+                    uint16_t outputs=~inputs; //half-word has 1 for outputs and one for inputs;
+                    matrixGpioBuffer1[n]=(matrixGpioBufferTemp[n]&(outputs<<16))| //keep output values set in previous matrixGpioBuffer1[n]
+                                         (matrixGpioBuffer1[n]&(inputs<<16))   | //inputs from current matrixGpioBuffer1[n];
+                                         (matrixGpioBufferTemp[n]&(inputs));     //keep pin configuration from previous matrixGpioBuffer1[n]
+//                    matrixGpioBuffer1[n]=matrixGpioBufferTemp[n]; //ignores inputs
+                }
+            }
+			render(spi_buffer_frames, matrix_gpio_buffer_frames, audio_buffer_frames, audioInBuffer, audioOutBuffer,
+					matrixInBuffer, matrixOutBuffer, matrixGpioBuffer1);
+			for(unsigned int n = 0; n < spi_num_channels * spi_buffer_frames; n++) {
+				int out = matrixOutBuffer[n] * 65536.0;
+				if(out < 0) out = 0;
+				else if(out > 65535) out = 65535;
+				pru_buffer_spi_dac[n + spi_buffer_frames * spi_num_channels] = (uint16_t)out;
+			}
+            if(matrix_gpio_enabled){ // keep track of past matrix_gpio values
+                for(unsigned int n = 0; n < matrix_gpio_buffer_frames; n++){ 
+                    matrixGpioBufferTemp[n]=matrixGpioBuffer1[n];
+                }
+            }
+		}
 		else
-			render(0, audio_buffer_frames, audioInBuffer, audioOutBuffer, 0, 0);
+			render(0, 0, audio_buffer_frames, audioInBuffer, audioOutBuffer, 0, 0, 0); // we still pass matrixGpioBuffer, just it is unused
 
 		// Convert float back to short
 		for(unsigned int n = 0; n < 2 * audio_buffer_frames; n++) {
@@ -445,8 +594,11 @@
 	// Tell PRU to stop
 	pru_buffer_comm[PRU_SHOULD_STOP] = 1;
 
+	free(matrixOutBuffer);
 	free(audioInBuffer);
 	free(audioOutBuffer);
+	free(matrixInBuffer);
+    free(matrixGpioBufferTemp);
 }
 
 // Wait for an interrupt from the PRU indicate it is finished
--- a/core/RTAudio.cpp	Sat Feb 07 16:41:56 2015 +0000
+++ b/core/RTAudio.cpp	Mon Apr 27 13:01:57 2015 +0100
@@ -56,16 +56,18 @@
 bool gShouldStop = false;
 
 // general settings
+char *gPRUFilename;//[256]	 = "pru_rtaudio.bin"; 	// path to PRU binary file
 int gRTAudioVerbose = 0;   						// Verbosity level for debugging
 int gAmplifierMutePin = -1;
 int gAmplifierShouldBeginMuted = 0;
 
 // Number of audio and matrix channels, globally accessible
-// At least gNumMatrixChannels needs to be global to be used
-// by the analogRead() and analogWrite() macros without creating
+// At least gNumMatrixChannels and gNumMatrixGpioChannels need to be global to be used
+// by the analogRead() and analogWrite() and the digital macros without creating
 // extra confusion in their use cases by passing this argument
 int gNumAudioChannels = 0;
 int gNumMatrixChannels = 0;
+int gNumMatrixGpioChannels = 0;
 
 // initAudio() prepares the infrastructure for running PRU-based real-time
 // audio, but does not actually start the calculations.
@@ -83,7 +85,7 @@
 {
 	rt_print_auto_init(1);
 	setVerboseLevel(settings->verbose);
-
+	gPRUFilename=settings->pruFilename;
 	if(gRTAudioVerbose == 1)
 		rt_printf("Running with Xenomai\n");
 
@@ -106,7 +108,7 @@
 
 		if(gpio_export(settings->ampMutePin)) {
 			if(gRTAudioVerbose)
-				cout << "Warning: couldn't export amplifier mute pin\n";
+				cout << "Warning: couldn't export amplifier mute pin " << settings-> ampMutePin << "\n";
 		}
 		if(gpio_set_dir(settings->ampMutePin, OUTPUT_PIN)) {
 			if(gRTAudioVerbose)
@@ -142,7 +144,8 @@
 	gPRU = new PRU();
 	gAudioCodec = new I2c_Codec();
 
-	if(gPRU->prepareGPIO(settings->useMatrix, 1, 1)) {
+	gNumMatrixGpioChannels = settings->useMatrixGpio ? settings->numMatrixGpioChannels : 0; //this is called here to make sure prepareGPIO initializes the appropriate GPIO pins
+	if(gPRU->prepareGPIO(settings->useMatrix, settings->useMatrixGpio, 1, 1)) {
 		cout << "Error: unable to prepare GPIO for PRU audio\n";
 		return 1;
 	}
@@ -177,8 +180,7 @@
 
 	gNumAudioChannels = 2;
 	gNumMatrixChannels = settings->useMatrix ? settings->numMatrixChannels : 0;
-
-	if(!initialise_render(gNumMatrixChannels, gNumAudioChannels,
+	if(!initialise_render(gNumMatrixChannels, gNumMatrixGpioChannels, gNumAudioChannels,
 				          settings->useMatrix ? settings->periodSize : 0, /* matrix period size */
 				          audioPeriodSize,
 				          matrixSampleRate, audioSampleRate,
@@ -208,8 +210,8 @@
 		gShouldStop = 1;
 	}
 	else {
-		if(gPRU->start()) {
-			rt_printf("Error: unable to start PRU\n");
+		if(gPRU->start(gPRUFilename)) {
+			rt_printf("Error: unable to start PRU from file %s\n", gPRUFilename);
 			gShouldStop = 1;
 		}
 		else {
--- a/core/RTAudioCommandLine.cpp	Sat Feb 07 16:41:56 2015 +0000
+++ b/core/RTAudioCommandLine.cpp	Mon Apr 27 13:01:57 2015 +0100
@@ -9,7 +9,6 @@
 #include <cstdlib>
 #include <cstring>
 #include <getopt.h>
-
 #include "../include/RTAudio.h"
 
 // Default command-line options for RTAudio
@@ -18,15 +17,18 @@
 	{"period", 1, NULL, 'p'},
 	{"verbose", 0, NULL, 'v'},
 	{"use-matrix", 1, NULL, 'm'},
+	{"use-matrix-gpio", 1, NULL, 'g'},
 	{"matrix-channels", 1, NULL, 'C'},
+	{"matrix-gpio-channels", 1, NULL, 'G'},
 	{"mute-speaker", 1, NULL, 'M'},
 	{"dac-level", 1, NULL, 'D'},
 	{"adc-level", 1, NULL, 'A'},
 	{"hp-level", 1, NULL, 'H'},
+	{"pru-file",1,NULL,'P'},
 	{NULL, 0, NULL, 0}
 };
 
-const char gDefaultShortOptions[] = "p:vm:M:C:D:A:H:";
+const char gDefaultShortOptions[] = "p:vm:M:C:D:A:H:P:g:G:";
 
 // This function sets the default settings for the RTAudioSettings structure
 void BeagleRT_defaultSettings(RTAudioSettings *settings)
@@ -38,8 +40,11 @@
 	settings->adcLevel = DEFAULT_ADC_LEVEL;
 	settings->headphoneLevel = DEFAULT_HP_LEVEL;
 	settings->useMatrix = 1;
+	settings->useMatrixGpio = 1;
 	settings->numMatrixChannels = 8;
+	settings->numMatrixGpioChannels = 16;
 	settings->verbose = 0;
+	settings->pruFilename[0]='\0';
 	settings->codecI2CAddress = CODEC_I2C_ADDRESS;
 	settings->ampMutePin = kAmplifierMutePin;
 }
@@ -125,6 +130,10 @@
 		case 'm':
 			settings->useMatrix = atoi(optarg);
 			break;
+		case 'g':
+			settings->useMatrixGpio = atoi(optarg);
+			settings->numMatrixGpioChannels = 0;
+			break;
 		case 'C':
 			settings->numMatrixChannels = atoi(optarg);
 			if(settings->numMatrixChannels >= 8)
@@ -134,6 +143,16 @@
 			else
 				settings->numMatrixChannels = 2;
 			break;
+		case 'G':
+			settings->numMatrixGpioChannels = atoi(optarg);
+			if(settings->numMatrixGpioChannels >= 16)
+				settings->numMatrixGpioChannels = 16;
+			else if (settings->numMatrixGpioChannels < 1){
+				settings->numMatrixGpioChannels = 0;
+				settings->useMatrixGpio = 0; //TODO: this actually works only if -G 0 is specified after -g 1.
+											 //No worries, though: disabling numMatrixGpio will only prevent the pins from being exported.
+			}
+			break;
 		case 'M':
 			settings->beginMuted = atoi(optarg);
 			break;
@@ -146,6 +165,12 @@
 		case 'H':
 			settings->headphoneLevel = atof(optarg);
 			break;
+		case 'P':
+			if(strlen(optarg)<MAX_PRU_FILENAME_LENGTH)
+				strcpy(settings->pruFilename, optarg);
+			else
+				std::cerr << "Warning: filename for the PRU code is too long (>" << MAX_PRU_FILENAME_LENGTH << " characters). Using embedded PRU code instead\n";
+			break;
 		case '?':
 		default:
 			return c;
@@ -157,12 +182,16 @@
 // Call from within your own usage function
 void BeagleRT_usage()
 {
-	std::cerr << "   --period [-p] period:       Set the hardware period (buffer) size in matrix samples\n";
-	std::cerr << "   --dac-level [-D] dBs:       Set the DAC output level (0dB max; -63.5dB min)\n";
-	std::cerr << "   --adc-level [-A] dBs:       Set the ADC input level (0dB max; -12dB min)\n";
-	std::cerr << "   --hp-level [-H] dBs:        Set the headphone output level (0dB max; -63.5dB min)\n";
-	std::cerr << "   --mute-speaker [-M] val:    Set whether to mute the speaker initially (default: no)\n";
-	std::cerr << "   --use-matrix [-m] val:      Set whether to use ADC/DAC matrix\n";
-	std::cerr << "   --matrix-channels [-C] val: Set the number of ADC/DAC channels (default: 8)\n";
-	std::cerr << "   --verbose [-v]:             Enable verbose logging information\n";
+	std::cerr << "   --period [-p] period:            Set the hardware period (buffer) size in matrix samples\n";
+	std::cerr << "   --dac-level [-D] dBs:            Set the DAC output level (0dB max; -63.5dB min)\n";
+	std::cerr << "   --adc-level [-A] dBs:            Set the ADC input level (0dB max; -12dB min)\n";
+	std::cerr << "   --hp-level [-H] dBs:             Set the headphone output level (0dB max; -63.5dB min)\n";
+	std::cerr << "   --mute-speaker [-M] val:         Set whether to mute the speaker initially (default: no)\n";
+	std::cerr << "   --use-matrix [-m] val:           Set whether to use ADC/DAC matrix (default: yes)\n";
+	std::cerr << "   --use-gpio-matrix [-g] val:      Set whether to use GPIO matrix (default: yes)\n";
+	std::cerr << "   --matrix-channels [-C] val:      Set the number of ADC/DAC channels (default: 8)\n";
+	std::cerr << "   --matrix-gpio-channels [-G] val: Set the number of GPIO channels (default: 16)\n";
+	std::cerr << "   --pru-file [-P] val:             Set an optional external file to use for the PRU binary code\n";
+	std::cerr << "   --verbose [-v]:                  Enable verbose logging information\n";
 }
+
--- a/include/PRU.h	Sat Feb 07 16:41:56 2015 +0000
+++ b/include/PRU.h	Mon Apr 27 13:01:57 2015 +0100
@@ -27,7 +27,7 @@
 	~PRU();
 
 	// Prepare the GPIO pins needed for the PRU
-	int prepareGPIO(int use_spi, int include_test_pin, int include_led);
+	int prepareGPIO(int use_spi, int use_matrix_gpio, int include_test_pin, int include_led);
 
 	// Clean up the GPIO at the end
 	void cleanupGPIO();
@@ -37,7 +37,7 @@
 				   bool xenomai_test_pin = false);
 
 	// Run the code image in pru_rtaudio_bin.h
-	int start();
+	int start(char * const filename);
 
 	// Loop: read and write data from the PRU
 	void loop();
@@ -56,6 +56,7 @@
 	int pru_number;		// Which PRU we use
 	bool running;		// Whether the PRU is running
 	bool spi_enabled;	// Whether SPI ADC and DAC are used
+	bool matrix_gpio_enabled; // Whether Matrix GPIO is used
 	bool gpio_enabled;	// Whether GPIO has been prepared
 	bool led_enabled;	// Whether a user LED is enabled
 	bool gpio_test_pin_enabled; // Whether the test pin was also enabled
@@ -64,9 +65,11 @@
 	volatile uint32_t *pru_buffer_comm;
 	uint16_t *pru_buffer_spi_dac;
 	uint16_t *pru_buffer_spi_adc;
+	uint32_t *pru_buffer_matrix_gpio;
 	int16_t *pru_buffer_audio_dac;
 	int16_t *pru_buffer_audio_adc;
 	unsigned int spi_buffer_frames;
+	unsigned int matrix_gpio_buffer_frames;
 	unsigned int audio_buffer_frames;
 
 	int xenomai_gpio_fd;	// File descriptor for /dev/mem for fast GPIO
--- a/include/RTAudio.h	Sat Feb 07 16:41:56 2015 +0000
+++ b/include/RTAudio.h	Mon Apr 27 13:01:57 2015 +0100
@@ -29,6 +29,7 @@
 #define DEFAULT_DAC_LEVEL	0.0
 #define DEFAULT_ADC_LEVEL	-6.0
 #define DEFAULT_HP_LEVEL	-6.0
+#define MAX_PRU_FILENAME_LENGTH 256
 
 enum {
 	kAmplifierMutePin = 61	// P8-26 controls amplifier mute
@@ -44,9 +45,11 @@
 	float adcLevel;			// Level for the audio ADC input
 	float headphoneLevel;	// Level for the headphone output
 	int useMatrix;			// Whether to use the matrix
+	int useMatrixGpio;		// Whether to use the 16 programmable GPIOs
 	int numMatrixChannels;	// How many channels for the ADC and DAC
+	int numMatrixGpioChannels;	// How many channels for the GPIOs
 	int verbose;			// Whether to use verbose logging
-
+	char pruFilename[MAX_PRU_FILENAME_LENGTH]; //the external .bin file to load. If empty will use PRU code from pru_rtaudio_bin.h
 	// These items are hardware-dependent and should only be changed
 	// to run on different hardware
 	int codecI2CAddress;	// Where the codec can be found on the I2C bus
--- a/include/Utilities.h	Sat Feb 07 16:41:56 2015 +0000
+++ b/include/Utilities.h	Mon Apr 27 13:01:57 2015 +0100
@@ -18,6 +18,17 @@
 // Write an analog output frame at output pin p, frame f, to value v
 #define analogWrite(p, f, v) (matrixOut[(f)*gNumMatrixChannels + (p)] = (uint16_t)(v))
 
+#define setBit(word,bit) ((word)|(1<<(bit)))
+#define clearBit(word,bit) ((word)&~(1<<(bit)))
+#define getBit(word,bit) (((word)>>(bit))&1)
+#define changeBit(word,bit,value) ((clearBit((word),(bit))) | ((value)<<(bit)))
+//matrixGpio API:
+#define setDigitalDirection(pin,frame,direction) matrixGpio[(frame)]=changeBit(matrixGpio[(frame)],(pin),(direction))
+#define digitalWriteAll(frame,value) matrixGpio[(frame)]=0xffff0000*(!(!value));
+//sets the bit in the high word, clears the bit in the low word (just in case the direction was not previously set)
+#define digitalWrite(pin, frame, value) matrixGpio[(frame)]=( changeBit(matrixGpio[(frame)], (pin+16), (value)) & (0xffffffff-(1<<(pin))) ) //could have been done with two subsequent assignments
+#define digitalRead(pin, frame) (   getBit(matrixGpio[(frame)], pin+16) )
+
 float map(float x, float in_min, float in_max, float out_min, float out_max);
 float constrain(float x, float min_val, float max_val);
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/matrix_gpio_mapping.h	Mon Apr 27 13:01:57 2015 +0100
@@ -0,0 +1,71 @@
+#ifndef MATRIX_GPIO_MAPPING_H
+#define MATRIX_GPIO_MAPPING_H
+
+#define NUM_MATRIX_GPIOS 16
+extern short int matrixGpioPins[NUM_MATRIX_GPIOS];
+//GPIO_INPUT and GPIO_OUTPUT values when calling the setDigitalDirection() macro.
+//TODO: these are inverted with respect to INPUT_PIN and OUTPUT_PIN defined in GPIOcontrol.h,
+//which might lead to unexpected results in case someone uses those in place of these or viceversa
+#define GPIO_INPUT 1
+#define GPIO_OUTPUT 0
+
+//mapping GPIO numbers to header pins
+//if you want to use different pins/ordering, define here new pins. The ordering here is NOT binding
+#define P8_07_GPIO_NO 66
+#define P8_08_GPIO_NO 67
+#define P8_09_GPIO_NO 69
+#define P8_10_GPIO_NO 68
+#define P8_11_GPIO_NO 45
+#define P8_12_GPIO_NO 44
+#define P9_12_GPIO_NO 60
+#define P9_14_GPIO_NO 50
+#define P8_15_GPIO_NO 47
+#define P8_16_GPIO_NO 46
+#define P9_16_GPIO_NO 51
+#define P8_18_GPIO_NO 65
+#define P8_27_GPIO_NO 86
+#define P8_28_GPIO_NO 88
+#define P8_29_GPIO_NO 87
+#define P8_30_GPIO_NO 89
+
+//mapping pin headers to bits in the matrixGpio word.
+//used in the declaration of short int matrixGpioPins[NUM_MATRIX_GPIOS] below, which is used in PRU::prepareGPIO to export the pins
+//if you want to use different pins, declare them above and use them here
+//The ordering here is NOT binding, but if you want to use a different ordering, please change it here as well as below and in the PRU, for consistency
+#define GPIO_NO_BIT_0 P8_07_GPIO_NO
+#define GPIO_NO_BIT_1 P8_08_GPIO_NO
+#define GPIO_NO_BIT_2 P8_09_GPIO_NO
+#define GPIO_NO_BIT_3 P8_10_GPIO_NO
+#define GPIO_NO_BIT_4 P8_11_GPIO_NO
+#define GPIO_NO_BIT_5 P8_12_GPIO_NO
+#define GPIO_NO_BIT_6 P9_12_GPIO_NO
+#define GPIO_NO_BIT_7 P9_14_GPIO_NO
+#define GPIO_NO_BIT_8 P8_15_GPIO_NO
+#define GPIO_NO_BIT_9 P8_16_GPIO_NO
+#define GPIO_NO_BIT_10 P9_16_GPIO_NO
+#define GPIO_NO_BIT_11 P8_18_GPIO_NO
+#define GPIO_NO_BIT_12 P8_27_GPIO_NO
+#define GPIO_NO_BIT_13 P8_28_GPIO_NO
+#define GPIO_NO_BIT_14 P8_29_GPIO_NO
+#define GPIO_NO_BIT_15 P8_30_GPIO_NO
+
+//mapping bits in the matrixGpio word to pin headers, so that pin header name can be used instead of but number
+//The ordering here IS binding. If you want to use different pins/ordering, please do it above as well as here and in the PRU, for consistency
+#define P8_07 0
+#define P8_08 1
+#define P8_09 2
+#define P8_10 3
+#define P8_11 4
+#define P8_12 5
+#define P9_12 6
+#define P9_14 7
+#define P8_15 8
+#define P8_16 9
+#define P9_16 10
+#define P8_18 11
+#define P8_27 12
+#define P8_28 13
+#define P8_29 14
+#define P8_30 15
+
+#endif
--- a/include/pru_rtaudio_bin.h	Sat Feb 07 16:41:56 2015 +0000
+++ b/include/pru_rtaudio_bin.h	Mon Apr 27 13:01:57 2015 +0100
@@ -5,6 +5,309 @@
 /* This file is generated by the PRU assembler.                       */
 
 const unsigned int PRUcode[] =  {
+     0x7b00002e,
+     0x244804c2,
+     0x24c13482,
+     0xf1002282,
+     0x240000e8,
+     0x240000e7,
+     0xd104fb07,
+     0x1d0de2e2,
+     0xc914fb03,
+     0x1f0de8e8,
+     0x79000005,
+     0x1f0de7e7,
+     0x79000003,
+     0x1f0de2e2,
+     0x79000001,
+     0xd105fb07,
+     0x1d0ce2e2,
+     0xc915fb03,
+     0x1f0ce8e8,
+     0x79000005,
+     0x1f0ce7e7,
+     0x79000003,
+     0x1f0ce2e2,
+     0x79000001,
+     0xd106fb07,
+     0x1d1ce2e2,
+     0xc916fb03,
+     0x1f1ce8e8,
+     0x79000005,
+     0x1f1ce7e7,
+     0x79000003,
+     0x1f1ce2e2,
+     0x79000001,
+     0xd107fb07,
+     0x1d12e2e2,
+     0xc917fb03,
+     0x1f12e8e8,
+     0x79000005,
+     0x1f12e7e7,
+     0x79000003,
+     0x1f12e2e2,
+     0x79000001,
+     0xd108fb07,
+     0x1d0fe2e2,
+     0xc918fb03,
+     0x1f0fe8e8,
+     0x79000005,
+     0x1f0fe7e7,
+     0x79000003,
+     0x1f0fe2e2,
+     0x79000001,
+     0xd109fb07,
+     0x1d0ee2e2,
+     0xc919fb03,
+     0x1f0ee8e8,
+     0x79000005,
+     0x1f0ee7e7,
+     0x79000003,
+     0x1f0ee2e2,
+     0x79000001,
+     0xd10afb07,
+     0x1d13e2e2,
+     0xc91afb03,
+     0x1f13e8e8,
+     0x79000005,
+     0x1f13e7e7,
+     0x79000003,
+     0x1f13e2e2,
+     0x79000001,
+     0x244804c3,
+     0x24c13483,
+     0xe1002382,
+     0x24481ac3,
+     0x24c13483,
+     0xf1002283,
+     0x240000e5,
+     0x240000e4,
+     0xd100fb07,
+     0x1d02e3e3,
+     0xc910fb03,
+     0x1f02e5e5,
+     0x79000005,
+     0x1f02e4e4,
+     0x79000003,
+     0x1f02e3e3,
+     0x79000001,
+     0xd101fb07,
+     0x1d03e3e3,
+     0xc911fb03,
+     0x1f03e5e5,
+     0x79000005,
+     0x1f03e4e4,
+     0x79000003,
+     0x1f03e3e3,
+     0x79000001,
+     0xd102fb07,
+     0x1d05e3e3,
+     0xc912fb03,
+     0x1f05e5e5,
+     0x79000005,
+     0x1f05e4e4,
+     0x79000003,
+     0x1f05e3e3,
+     0x79000001,
+     0xd103fb07,
+     0x1d04e3e3,
+     0xc913fb03,
+     0x1f04e5e5,
+     0x79000005,
+     0x1f04e4e4,
+     0x79000003,
+     0x1f04e3e3,
+     0x79000001,
+     0xd10bfb07,
+     0x1d01e3e3,
+     0xc91bfb03,
+     0x1f01e5e5,
+     0x79000005,
+     0x1f01e4e4,
+     0x79000003,
+     0x1f01e3e3,
+     0x79000001,
+     0xd10cfb07,
+     0x1d16e3e3,
+     0xc91cfb03,
+     0x1f16e5e5,
+     0x79000005,
+     0x1f16e4e4,
+     0x79000003,
+     0x1f16e3e3,
+     0x79000001,
+     0xd10dfb07,
+     0x1d18e3e3,
+     0xc91dfb03,
+     0x1f18e5e5,
+     0x79000005,
+     0x1f18e4e4,
+     0x79000003,
+     0x1f18e3e3,
+     0x79000001,
+     0xd10efb07,
+     0x1d17e3e3,
+     0xc91efb03,
+     0x1f17e5e5,
+     0x79000005,
+     0x1f17e4e4,
+     0x79000003,
+     0x1f17e3e3,
+     0x79000001,
+     0xd10ffb07,
+     0x1d19e3e3,
+     0xc91ffb03,
+     0x1f19e5e5,
+     0x79000005,
+     0x1f19e4e4,
+     0x79000003,
+     0x1f19e3e3,
+     0x79000001,
+     0x24481ac2,
+     0x24c13482,
+     0xe1002283,
+     0x244804c2,
+     0x24c13882,
+     0x24481ac3,
+     0x24c13883,
+     0xf1002282,
+     0xf1002383,
+     0xc904fb07,
+     0x1f0efefe,
+     0xc90de203,
+     0x1f14fbfb,
+     0x79000003,
+     0x1d14fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc905fb07,
+     0x1f0efefe,
+     0xc90ce203,
+     0x1f15fbfb,
+     0x79000003,
+     0x1d15fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc906fb07,
+     0x1f0efefe,
+     0xc91ce203,
+     0x1f16fbfb,
+     0x79000003,
+     0x1d16fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc907fb07,
+     0x1f0efefe,
+     0xc912e203,
+     0x1f17fbfb,
+     0x79000003,
+     0x1d17fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc908fb07,
+     0x1f0efefe,
+     0xc90fe203,
+     0x1f18fbfb,
+     0x79000003,
+     0x1d18fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc909fb07,
+     0x1f0efefe,
+     0xc90ee203,
+     0x1f19fbfb,
+     0x79000003,
+     0x1d19fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90afb07,
+     0x1f0efefe,
+     0xc913e203,
+     0x1f1afbfb,
+     0x79000003,
+     0x1d1afbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc900fb07,
+     0x1f0efefe,
+     0xc902e303,
+     0x1f10fbfb,
+     0x79000003,
+     0x1d10fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc901fb07,
+     0x1f0efefe,
+     0xc903e303,
+     0x1f11fbfb,
+     0x79000003,
+     0x1d11fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc902fb07,
+     0x1f0efefe,
+     0xc905e303,
+     0x1f12fbfb,
+     0x79000003,
+     0x1d12fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc903fb07,
+     0x1f0efefe,
+     0xc904e303,
+     0x1f13fbfb,
+     0x79000003,
+     0x1d13fbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90bfb07,
+     0x1f0efefe,
+     0xc901e303,
+     0x1f1bfbfb,
+     0x79000003,
+     0x1d1bfbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90cfb07,
+     0x1f0efefe,
+     0xc916e303,
+     0x1f1cfbfb,
+     0x79000003,
+     0x1d1cfbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90dfb07,
+     0x1f0efefe,
+     0xc918e303,
+     0x1f1dfbfb,
+     0x79000003,
+     0x1d1dfbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90efb07,
+     0x1f0efefe,
+     0xc917e303,
+     0x1f1efbfb,
+     0x79000003,
+     0x1d1efbfb,
+     0x79000001,
+     0x1d0efefe,
+     0xc90ffb07,
+     0x1f0efefe,
+     0xc919e303,
+     0x1f1ffbfb,
+     0x79000003,
+     0x1d1ffbfb,
+     0x79000001,
+     0x1d0efefe,
+     0x244804c2,
+     0x24c19082,
+     0x24481ac3,
+     0x24c19083,
+     0xe1006287,
+     0xe1006384,
+     0x209c0000,
+     0x240000fe,
      0x240002c3,
      0x24202083,
      0x240000e2,
@@ -24,6 +327,9 @@
      0x81042480,
      0x240000f8,
      0x240008e9,
+     0xf12c3982,
+     0x5100e202,
+     0x1f03f8f8,
      0xf1243982,
      0x5100e202,
      0x1f01f8f8,
@@ -325,6 +631,7 @@
      0x240280fc,
      0x00fcfdfc,
      0xf1003c82,
+     0x1f0ffefe,
      0x10f0f0f2,
      0x2701e9e2,
      0x08e2ebf3,
@@ -335,6 +642,14 @@
      0x0901f7f7,
      0x02f6f7f7,
      0x240000ea,
+     0xd100f803,
+     0x240000e2,
+     0x79000002,
+     0x240400e2,
+     0x240001c6,
+     0x24100086,
+     0x00e2e6e6,
+     0x1d0ffefe,
      0x240000e1,
      0xc901f805,
      0x90f2388c,
@@ -375,7 +690,7 @@
      0x12e3efef,
      0x80f73c8f,
      0x0104f7f7,
-     0xc901f85c,
+     0xc901f863,
      0x24ffffe2,
      0x10e2ece7,
      0x0904e7e7,
@@ -409,6 +724,13 @@
      0x24c1909c,
      0xe1003c9b,
      0xe14c3a87,
+     0xc903f807,
+     0x1103e1fb,
+     0x6900fb05,
+     0xf100269b,
+     0x2300019c,
+     0xe100269b,
+     0x0104e6e6,
      0xf1443a9b,
      0xcf00fbff,
      0xf1503a87,
@@ -465,13 +787,13 @@
      0x0104f3f3,
      0x1504f8f8,
      0x0101e1e1,
-     0x6ee9e17f,
+     0x6ee9e178,
      0x79000004,
      0x1504f8f8,
      0x0102e1e1,
-     0x6ee9e17b,
+     0x6ee9e174,
      0x0101eaea,
-     0x6eebea78,
+     0x6eebea71,
      0x10f0f0e2,
      0x10f1f1f0,
      0x10e2e2f1,
@@ -498,20 +820,8 @@
      0x240190e1,
      0x00e1e3e3,
      0xe1002382,
-     0xc900f807,
-     0x241000c2,
-     0x24000082,
-     0x244804c3,
-     0x24c19483,
-     0xe1002382,
-     0x79000006,
-     0x241000c2,
-     0x24000082,
-     0x244804c3,
-     0x24c19083,
-     0xe1002382,
      0xf1003982,
-     0x5700e246,
+     0x5700e242,
      0x240000fb,
      0xe1443d9b,
      0xc901f80c,
--- a/include/render.h	Sat Feb 07 16:41:56 2015 +0000
+++ b/include/render.h	Mon Apr 27 13:01:57 2015 +0100
@@ -10,7 +10,8 @@
 
 // uint types
 #include <stdint.h>
-
+#include "../include/Utilities.h"
+#include "../include/matrix_gpio_mapping.h"
 // Mappings from pin numbers on PCB to actual DAC channels
 // This gives the DAC and ADC connectors the same effective pinout
 #define DAC_PIN0	6
@@ -35,15 +36,15 @@
 
 extern int gNumAudioChannels;	// How many audio channels are present
 extern int gNumMatrixChannels;	// How many matrix channels are present
-
-bool initialise_render(int numMatrixChannels, int numAudioChannels,
+extern int gNumMatrixGpioChannels;
+bool initialise_render(int numMatrixChannels, int numMatrixGpioChannels, int numAudioChannels,
 		  	   	   	   int numMatrixFramesPerPeriod,
 					   int numAudioFramesPerPeriod,
 					   float matrixSampleRate, float audioSampleRate,
 					   void *userData);
 
-void render(int numMatrixFrames, int numAudioFrames, float *audioIn, float *audioOut,
-			uint16_t *matrixIn, uint16_t *matrixOut);
+void render(int numMatrixFrames, int numAudioFrames, int numMatrixGpioFrames, float *audioIn, float *audioOut,
+			float *matrixIn, float *matrixOut, uint32_t *matrixGpio);
 
 void render_medium_prio();
 void render_low_prio();
Binary file pru_rtaudio.bin has changed
--- a/pru_rtaudio.p	Sat Feb 07 16:41:56 2015 +0000
+++ b/pru_rtaudio.p	Mon Apr 27 13:01:57 2015 +0100
@@ -81,7 +81,8 @@
 #define COMM_FRAME_COUNT      32	  // How many frames have elapse since beginning
 #define COMM_USE_SPI          36          // Whether or not to use SPI ADC and DAC
 #define COMM_NUM_CHANNELS     40	  // Low 2 bits indicate 8 [0x3], 4 [0x1] or 2 [0x0] channels
-	
+#define COMM_USE_MATRIX_GPIO  44	  // Whether or not to use MATRIX_GPIO
+
 #define MCASP0_BASE 0x48038000
 #define MCASP1_BASE 0x4803C000
 
@@ -180,10 +181,18 @@
 #define FLAG_BIT_BUFFER1	0
 #define FLAG_BIT_USE_SPI	1
 #define FLAG_BIT_MCASP_HWORD	2		// Whether we are on the high word for McASP transmission
-	
+#define FLAG_BIT_USE_MATRIX_GPIO 3
+/*#define FLAG_BIT_MATRIX_GPIO_BUFFER 4 //Whether we are using buffer located at
+                                     // 0:  MEM_MATRIX_GPIO_BASE or
+*/                                     // 1: MEM_MATRIX_GPIO_BASE + 512
 // Registers used throughout
 
 // r1, r2, r3 are used for temporary storage
+#define MEM_MATRIX_GPIO_BASE 0x11000 //Base address for MATRIX_GPIO : Shared RAM + 0x400
+#define MEM_MATRIX_GPIO_BUFFER1_OFFSET 0x400 //Start pointer to MATRIX_GPIO_BUFFER1, which is 256 words after.
+// 256 is the maximum number of frames allowed
+
+#define reg_matrix_gpio_current r6  // Pointer to current storage location of MATRIX_GPIO
 #define reg_num_channels	r9		// Number of SPI ADC/DAC channels to use
 #define reg_frame_current	r10		// Current frame count in SPI ADC/DAC transfer
 #define reg_frame_total		r11		// Total frame count for SPI ADC/DAC
@@ -205,6 +214,209 @@
 // r27, r28 used in macros
 #define reg_mcasp_addr		r29		// Base address for McASP
 
+// MATRIX_GPIO new code starts here
+//0  P8_07 36 0x890/090 66 gpio2[2]
+//1  P8_08 37 0x894/094 67 gpio2[3]
+//2  P8_09 39 0x89c/09c 69 gpio2[5]
+//3  P8_10 38 0x898/098 68 gpio2[4]
+//4  P8_11 13 0x834/034 45 gpio1[13]
+//5  P8_12 12 0x830/030 44 gpio1[12]
+//6  P9_12 30 0x878/078 60 gpio1[28]
+//7  P9_14 18 0x848/048 50 gpio1[18]
+//8  P8_15 15 0x83c/03c 47 gpio1[15]
+//9  P8_16 14 0x838/038 46 gpio1[14]
+//10 P9_16 19 0x84c/04c 51 gpio1[19]
+//11 P8_18 35 0x88c/08c 65 gpio2[1]
+//12 P8_27 56 0x8e0/0e0 86 gpio2[22]
+//13 P8_28 58 0x8e8/0e8 88 gpio2[24]
+//14 P8_29 57 0x8e4/0e4 87 gpio2[23]
+//15 P8_30 59 0x8ec/0ec 89 gpio2[25]
+
+//TODO during initialization, set the pinmuxers to mode 7, input enable
+ 
+//generic GPIOs constants
+//#define GPIO1 0x4804c000
+#define GPIO2 0x481ac000
+//#define GPIO_CLEARDATAOUT 0x190 //SETDATAOUT is CLEARDATAOUT+4
+#define GPIO_OE 0x134 
+#define GPIO_DATAIN 0x138
+
+//gpioX_oe must be adjacent to gpioX_datain
+//gpioX_cleardataout must be adjacent to gpioX_setdataout
+#define reg_gpio1_oe r2
+#define reg_gpio2_oe r3
+#define reg_gpio2_cleardataout r4
+#define reg_gpio2_setdataout r5
+#define reg_gpio1_cleardataout r7
+#define reg_gpio1_setdataout r8
+#define reg_matrix_gpio r27 //will first contain matrixGpioOut from render() and matrixGpioIn to render() later
+//aliases
+#define reg_gpio1_datain reg_gpio1_oe
+#define reg_gpio2_datain reg_gpio2_oe
+
+.macro SET_GPIO1_BITS
+.mparam gpio_num_bit, matrix_gpio_bit
+    SET_GPIO_BITS reg_gpio1_oe, reg_gpio1_setdataout, reg_gpio1_cleardataout, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+.macro SET_GPIO2_BITS
+.mparam gpio_num_bit, matrix_gpio_bit
+    SET_GPIO_BITS reg_gpio2_oe, reg_gpio2_setdataout, reg_gpio2_cleardataout, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+.macro READ_GPIO1_BITS
+.mparam gpio_num_bit, matrix_gpio_bit
+    READ_GPIO_BITS reg_gpio1_datain, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+.macro READ_GPIO2_BITS
+.mparam gpio_num_bit, matrix_gpio_bit
+    READ_GPIO_BITS reg_gpio2_datain, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+
+.macro READ_GPIO_BITS
+.mparam gpio_data, gpio_num_bit, matrix_gpio_bit, matrix_gpio
+    QBBC DONE, matrix_gpio, matrix_gpio_bit //if the pin is set as an output, nothing to do here
+SET r30.t14
+    QBBC CLEAR, gpio_data, gpio_num_bit 
+    SET matrix_gpio, matrix_gpio_bit+16
+    QBA DONE
+    CLEAR:
+        CLR matrix_gpio, matrix_gpio_bit+16
+        QBA DONE
+    DONE:
+CLR r30.t14
+.endm
+
+.macro SET_GPIO_BITS
+.mparam gpio_oe, gpio_setdataout, gpio_cleardataout, gpio_num_bit, matrix_gpio_bit, matrix_gpio //sets the bits in GPIO_OE, GPIO_SETDATAOUT and GPIO_CLEARDATAOUT
+//Remember that the GPIO_OE Output data enable register behaves as follows for each bit:
+//0 = The corresponding GPIO pin is configured as an output.
+//1 = The corresponding GPIO pin is configured as an input.
+    QBBS SETINPUT, matrix_gpio, matrix_gpio_bit 
+    CLR gpio_oe, gpio_num_bit //if it is an output, configure pin as output
+    QBBC CLEARDATAOUT, matrix_gpio, matrix_gpio_bit+16 // check the output value. If it is 0, branch
+    SET gpio_setdataout, gpio_num_bit //if it is 1, set output to high
+    QBA DONE
+CLEARDATAOUT:
+    SET gpio_cleardataout, gpio_num_bit // set output to low
+    QBA DONE
+SETINPUT: //if it is an input, set the relevant bit
+    SET gpio_oe, gpio_num_bit
+    QBA DONE
+DONE:
+.endm
+
+.macro PROCESS_GPIO1_BITS
+//- sets appropriate bits for output in reg_gpio1_oe, reg_gpio1_cleardataout, reg_gpio1_data
+//- sets appropriate bits in reg_matrix_gpio to reflect the input values
+.mparam gpio_num_bit, matrix_gpio_bit
+// params to SET_GPIO_BITS gpio_oe, gpio_setdataout, gpio_cleardataout, gpio_num_bit, matrix_gpio_bit, matrix_gpio //sets the bits in GPIO_OE, GPIO_SETDATAOUT and GPIO_CLEARDATAOUT
+    SET_GPIO_BITS reg_gpio1_oe, reg_gpio1_setdataout, reg_gpio1_cleardataout, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+    READ_GPIO_BITS reg_gpio1_oe, reg_gpio1_datain, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+
+.macro PROCESS_GPIO2_BITS
+//- sets appropriate bits for output in reg_gpio2_oe, reg_gpio2_cleardataout, reg_gpio2_data
+//- sets appropriate bits in reg_matrix_gpio to reflect the input values
+.mparam gpio_num_bit, matrix_gpio_bit
+    SET_GPIO_BITS reg_gpio2_oe, reg_gpio2_setdataout, reg_gpio2_cleardataout, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+    READ_GPIO_BITS reg_gpio2_oe, reg_gpio2_datain, gpio_num_bit, matrix_gpio_bit, reg_matrix_gpio
+.endm
+
+QBA START // when first starting, go to START, skipping this section.
+
+MATRIX_GPIO:
+//reg_matrix_gpio is now the input word passed in render(), one word per frame
+//[31:16]: data(1=high, 0=low), [15:0]: direction (0=output, 1=input) )
+//Preparing the gpio_oe, gpio_cleardataout and gpio_setdataout for each module
+
+//load current status of GPIO_OE in reg_gpioX_oe
+    MOV reg_gpio1_oe, GPIO1 | GPIO_OE 
+    //takes ...ns to go through the next two instructions
+    LBBO reg_gpio1_oe, reg_gpio1_oe, 0, 4
+//GPIO1-start
+//process oe and datain and prepare dataout for GPIO1
+    LDI reg_gpio1_setdataout, 0 
+    LDI reg_gpio1_cleardataout, 0
+//map GPIO_MATRIX to gpio1 pins, affects reg_gpio1_oe, reg_gpio1_cleardataout, reg_gpio1_data, reg_matrix_gpio
+    SET_GPIO1_BITS 13, 4
+    SET_GPIO1_BITS 12, 5
+    SET_GPIO1_BITS 28, 6
+    SET_GPIO1_BITS 18, 7
+    SET_GPIO1_BITS 15, 8
+    SET_GPIO1_BITS 14, 9
+    SET_GPIO1_BITS 19, 10
+//set the output enable register for gpio1.
+    MOV reg_gpio2_oe, GPIO1 | GPIO_OE  //reg_gpio2_oe is now unused. It is here used as a temp
+    SBBO reg_gpio1_oe, reg_gpio2_oe, 0, 4 //takes two cycles (10ns)
+//GPIO1-end
+
+// reg_gpio1_oe is now unused, its register is taken by gpio2_cleardataout
+// reg_gpio1_datain now unused, its register is taken by gpio2_setdataout
+//GPIO2-start
+//load current status of GPIO_OE in reg_gpioX_oe
+    MOV reg_gpio2_oe, GPIO2 | GPIO_OE 
+    //takes ...ns to go through the next two instructions
+    LBBO reg_gpio2_oe, reg_gpio1_oe, 0, 4
+//process oe and datain and prepare dataout for GPIO2
+    LDI reg_gpio2_setdataout, 0 
+    LDI reg_gpio2_cleardataout, 0
+//map GPIO_MATRIX to gpio2 pins, affects reg_gpio2_oe, reg_gpio2_cleardataout, reg_gpio2_data, reg_matrix_gpio
+    SET_GPIO2_BITS 2, 0
+    SET_GPIO2_BITS 3, 1
+    SET_GPIO2_BITS 5, 2
+    SET_GPIO2_BITS 4, 3
+    SET_GPIO2_BITS 1, 11
+    SET_GPIO2_BITS 22, 12
+    SET_GPIO2_BITS 24, 13
+    SET_GPIO2_BITS 23, 14
+    SET_GPIO2_BITS 25, 15
+//set the output enable register for gpio2.
+    MOV reg_gpio1_oe, GPIO2 | GPIO_OE  //reg_gpio1_oe is now unused. It is here used as a temp
+    SBBO reg_gpio2_oe, reg_gpio1_oe, 0, 4 //takes two cycles (10ns)
+//GPIO2-end
+
+//load current inputs in reg_gpioX_datain
+    MOV reg_gpio1_datain, GPIO1 | GPIO_DATAIN 
+    MOV reg_gpio2_datain, GPIO2 | GPIO_DATAIN 
+    //takes ...ns to go through the next two instructions
+    LBBO reg_gpio1_datain, reg_gpio1_datain, 0, 4
+    LBBO reg_gpio2_datain, reg_gpio2_datain, 0, 4
+//TODO: read inputs
+    READ_GPIO1_BITS 13, 4
+    READ_GPIO1_BITS 12, 5
+    READ_GPIO1_BITS 28, 6
+    READ_GPIO1_BITS 18, 7
+    READ_GPIO1_BITS 15, 8
+    READ_GPIO1_BITS 14, 9
+    READ_GPIO1_BITS 19, 10
+    READ_GPIO2_BITS 2, 0
+    READ_GPIO2_BITS 3, 1
+    READ_GPIO2_BITS 5, 2
+    READ_GPIO2_BITS 4, 3
+    READ_GPIO2_BITS 1, 11
+    READ_GPIO2_BITS 22, 12
+    READ_GPIO2_BITS 24, 13
+    READ_GPIO2_BITS 23, 14
+    READ_GPIO2_BITS 25, 15
+//reg_gpio2_oe is now unused, so reg_temp1 is available for temporary storage from now on
+//reg_gpio2_datain is now unsued, so reg_temp2 is available for temporary storage from now on
+
+//now all the setdataout and cleardataout are ready to be written to the GPIO register.
+//CLEARDATAOUT and SETDATAOUT are consecutive positions in memory, so we write 8 bytes to CLEARDATAOUT.
+//We can do this because we chose reg_gpio1_cleardataout and reg_gpioX_setdataout to be consecutive
+//load the memory addresses to be written to
+    MOV reg_gpio1_datain, GPIO1 | GPIO_CLEARDATAOUT //reg_gpio1_datain is now unused and is used here as a temp
+    MOV reg_gpio2_datain, GPIO2 | GPIO_CLEARDATAOUT //reg_gpio2_datain is now unused and is used here as a temp
+//write 8 bytes for each GPIO
+//takes 30ns in total to go through the followint two lines
+    SBBO reg_gpio1_cleardataout, reg_gpio1_datain, 0, 8 // takes 145ns to be effective when going low, 185ns when going high
+    SBBO reg_gpio2_cleardataout, reg_gpio2_datain, 0, 8 //takes 95ns to be effective when going low, 130ns when going high
+//reversing the order of the two lines above will swap the performances between the GPIO modules
+//i.e.: the first line will always take 145ns/185ns and the second one will always take 95ns/130ns, 
+//regardless of whether the order is gpio1-gpio2 or gpio2-gpio1
+JMP r28.w0 // go back to ADC_WRITE_GPIO
+
+// MATRIX_GPIO new code ends here
+
 	
 // Bring CS line low to write to DAC
 .macro DAC_CS_ASSERT
@@ -291,6 +503,30 @@
       ADC_CS_UNASSERT
 .endm
 
+// Complete ADC write+read with chip select and also performs IO for matrix_gpio
+.macro ADC_WRITE_GPIO
+.mparam in, out, do_gpio
+      ADC_CS_ASSERT
+      ADC_TX in
+      QBBC GPIO_DONE, reg_flags, FLAG_BIT_USE_MATRIX_GPIO //skip if MATRIX_GPIO is disabled
+      AND r27, do_gpio, 0x3 // only do a MATRIX_GPIO every 2 SPI I/O
+      QBNE GPIO_DONE, r27, 0 
+//from here to GPIO_DONE takes 1.8us, while usually ADC_WAIT_FOR_FINISH only waits for 1.14us.
+//TODO: it would be better to split the MATRIX_GPIO stuff in two parts:
+//- one taking place during DAC_WRITE which sets the GPIO_OE
+//- and the other during ADC_WRITE which actually reads DATAIN and writes CLEAR/SET DATAOUT
+                            //reg_matrix_gpio is actually r27, so do not use r27 from here to ...
+      LBBO reg_matrix_gpio, reg_matrix_gpio_current, 0, 4 
+      JAL r28.w0, MATRIX_GPIO
+      SBBO reg_matrix_gpio, reg_matrix_gpio_current, 0,   4 
+                            //..here you can start using r27 again
+      ADD reg_matrix_gpio_current, reg_matrix_gpio_current, 4 //increment pointer
+GPIO_DONE:
+      ADC_WAIT_FOR_FINISH
+      ADC_RX out
+      ADC_CS_UNASSERT
+.endm
+
 // Write a McASP register
 .macro MCASP_REG_WRITE
 .mparam reg, value
@@ -335,6 +571,7 @@
 .endm
    
 START:
+      MOV r30, 0 
       // Set up c24 and c25 offsets with CTBIR register
       // Thus C24 points to start of PRU0 RAM
       MOV r3, 0x22020       // CTBIR0
@@ -362,6 +599,12 @@
       // Default number of channels in case SPI disabled
       LDI reg_num_channels, 8
 	
+      // Find out whether we should use MATRIX_GPIO
+      LBBO r2, reg_comm_addr, COMM_USE_MATRIX_GPIO, 4
+      QBEQ MATRIX_GPIO_FLAG_CHECK_DONE, r2, 0
+      SET reg_flags, reg_flags, FLAG_BIT_USE_MATRIX_GPIO
+//      SET reg_flags, reg_flags, FLAG_BIT_MATRIX_GPIO_BUFFER //set the flag, so that in WRITE_ONE_BUFFER we will start from buffer0
+MATRIX_GPIO_FLAG_CHECK_DONE:
       // Find out whether we should use SPI ADC and DAC
       LBBO r2, reg_comm_addr, COMM_USE_SPI, 4
       QBEQ SPI_FLAG_CHECK_DONE, r2, 0
@@ -541,6 +784,7 @@
 WRITE_ONE_BUFFER:
       // Write a single buffer of DAC samples and read a buffer of ADC samples
       // Load starting positions
+SET r30.t15
       MOV reg_dac_current, reg_dac_buf0         // DAC: reg_dac_current is current pointer
       LMBD r2, reg_num_channels, 1		// 1, 2 or 3 for 2, 4 or 8 channels
       LSL reg_adc_current, reg_frame_total, r2
@@ -551,7 +795,17 @@
       LSL reg_mcasp_adc_current, reg_mcasp_adc_current, 1
       ADC reg_mcasp_adc_current, reg_mcasp_adc_current, reg_mcasp_dac_current
       MOV reg_frame_current, 0
-	
+      QBBS MATRIX_GPIO_BASE_CHECK_SET, reg_flags, FLAG_BIT_BUFFER1  //check which buffer we are using for MATRIX_GPIO
+                  // if we are here, we are using buffer0 
+      MOV r2, 0  //so adjust offset appropriately
+      QBA MATRIX_GPIO_BASE_CHECK_DONE
+MATRIX_GPIO_BASE_CHECK_SET: //if we are here, we are using buffer1 
+      MOV r2, MEM_MATRIX_GPIO_BUFFER1_OFFSET //so adjust offset appropriately
+MATRIX_GPIO_BASE_CHECK_DONE:
+      MOV reg_matrix_gpio_current, MEM_MATRIX_GPIO_BASE
+      ADD reg_matrix_gpio_current, reg_matrix_gpio_current, r2
+
+CLR r30.t15
 WRITE_LOOP:
       // Write N channels to DAC from successive values in memory
       // At the same time, read N channels from ADC
@@ -651,7 +905,8 @@
       LSL r8, r8, AD7699_CHANNEL_OFFSET
       MOV r7, AD7699_CFG_MASK
       OR r7, r7, r8
-      ADC_WRITE r7, r7
+//ssssssssssssssssssssssssssss
+      ADC_WRITE_GPIO r7, r7, r1
 
       // Mask out only the relevant 16 bits and store in reg_adc_data
       MOV r2, 0xFFFF
@@ -723,9 +978,9 @@
       MOV r2, reg_mcasp_buf0
       MOV reg_mcasp_buf0, reg_mcasp_buf1
       MOV reg_mcasp_buf1, r2
+      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1) //flip the buffer flag
 
       // Notify ARM of buffer swap
-      XOR reg_flags, reg_flags, (1 << FLAG_BIT_BUFFER1)
       AND r2, reg_flags, (1 << FLAG_BIT_BUFFER1)    // Mask out every but low bit
       SBBO r2, reg_comm_addr, COMM_CURRENT_BUFFER, 4
 
@@ -752,17 +1007,6 @@
       SBBO r2, r3, 0, 4       // Clear GPIO pin	
 LED_BLINK_DONE:	
 	
-      QBBC TESTLOW, reg_flags, FLAG_BIT_BUFFER1
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_SETDATAOUT
-      SBBO r2, r3, 0, 4
-      QBA TESTDONE
-TESTLOW:
-      MOV r2, 1 << 28
-      MOV r3, GPIO1 + GPIO_CLEARDATAOUT
-      SBBO r2, r3, 0, 4
-TESTDONE:
-	 
       // Check if we should finish: flag is zero as long as it should run
       LBBO r2, reg_comm_addr, COMM_SHOULD_STOP, 4
       QBEQ WRITE_ONE_BUFFER, r2, 0
@@ -787,4 +1031,4 @@
 	
       // Signal the ARM that we have finished 
       MOV R31.b0, PRU0_ARM_INTERRUPT + 16
-      HALT
\ No newline at end of file
+      HALT