andrewm@318: @
andrewm@318: @ FormatConvert.S
andrewm@318: @
andrewm@318: @ NEON-based vector functions for converting data between int
andrewm@318: @ and float with clipping and optional level scaling. 
andrewm@318: @ Written in assembly for performance reasons.
andrewm@318: @
andrewm@318: @ Bela: http://bela.io
andrewm@318: @
andrewm@318: @ (c) 2016 Andrew McPherson
andrewm@318: @ Centre for Digital Music
andrewm@318: @ Queen Mary University of London
andrewm@318: @
andrewm@318: 
andrewm@318: 	.syntax unified
andrewm@318: 	.arch armv7-a
andrewm@318: 	.fpu neon
andrewm@318: 
andrewm@318: @	void int16_to_float_audio(int numSamples, int16_t *inBuffer, float *outBuffer);
andrewm@318: @
andrewm@318: @	Convert 16-bit signed ints to floats between -1.0 and 1.0. Used for audio input.
andrewm@318: @
andrewm@318: @	numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
andrewm@318: @	inBuffer should be aligned on an 8-byte boundary.
andrewm@318: @ 	outBuffer should be aligned on a 16-byte boundary.
andrewm@318: 
andrewm@318: @ Registers:
andrewm@318: @    r0: numSamples            How many frames to convert
andrewm@318: @    r1: inBuffer              Buffer for input samples from ADC
andrewm@318: @    r2: outBuffer             Buffer to store output samples for render function
andrewm@318: 
andrewm@318: dIFAu_Input		.dn		D0.S16
andrewm@318: qIFAu_Expanded	.qn		Q1.S32
andrewm@318: qIFAu_Output	.qn		Q2.F32
andrewm@318: dIFAu_Output_0	.dn		D4.F32
andrewm@318: dIFAu_Output_1	.dn		D5.F32
andrewm@318: 
andrewm@318: 	.align	2
andrewm@318: 	.global	int16_to_float_audio
andrewm@318: 	.thumb
andrewm@318: 	.thumb_func
andrewm@318: 	.type	int16_to_float_audio, %function
andrewm@318: int16_to_float_audio:
andrewm@318: 	lsr r0, r0, #2
andrewm@318: 	lsl r0, r0, #2				@ Clip off the last two bits of numSamples (only multiples of 4 allowed)
andrewm@318: 	cmp r0, #0					@ Check for trivial case: zero samples
andrewm@318: 	it eq
andrewm@318: 	bxeq lr						@ Return if that's the case
andrewm@318: 
andrewm@318: 	@ ---- loop: iterate over the number of samples ----
andrewm@318: int16_to_float_audio_loop:
andrewm@318: 	vld1 {dIFAu_Input}, [r1]!						@ Load four 16-bit signed ints from inBuffer++
andrewm@318: 	vmovl qIFAu_Expanded, dIFAu_Input				@ Convert four 16-bit signed ints to 32-bit signed ints
andrewm@318: 	vcvt qIFAu_Output, qIFAu_Expanded, #15			@ Convert four Q17.15 fixed points to floats (equiv. to / 32768)
andrewm@318: 	vst1 {dIFAu_Output_0, dIFAu_Output_1}, [r2]!	@ Store floats back into memory
andrewm@318: 
andrewm@318: 	subs r0, r0, #4							@ numSamples -= 4
andrewm@318: 	it gt
andrewm@318: 	bgt int16_to_float_audio_loop			@ Loop if numSamples > 0
andrewm@318: 
andrewm@318: 	bx lr
andrewm@318: 	
andrewm@318: 
andrewm@318: 
andrewm@318: @	void int16_to_float_analog(int numSamples, uint16_t *inBuffer, float *outBuffer);
andrewm@318: @
andrewm@318: @	Convert 16-bit unsigned ints to floats between 0.0 and 1.0. Used for analog input.
andrewm@318: @
andrewm@318: @	numSamples should be a multiple of 4 (i.e. integer numFrames at 4 channels)
andrewm@318: @	inBuffer should be aligned on an 8-byte boundary.
andrewm@318: @ 	outBuffer should be aligned on a 16-byte boundary.
andrewm@318: 
andrewm@318: @ Registers:
andrewm@318: @    r0: numSamples            How many frames to convert
andrewm@318: @    r1: inBuffer              Buffer for input samples from ADC
andrewm@318: @    r2: outBuffer             Buffer to store output samples for render function
andrewm@318: 
andrewm@318: dIFAn_Input		.dn		D0.U16
andrewm@318: qIFAn_Expanded	.qn		Q1.U32
andrewm@318: qIFAn_Output	.qn		Q2.F32
andrewm@318: dIFAn_Output_0	.dn		D4.F32
andrewm@318: dIFAn_Output_1	.dn		D5.F32
andrewm@318: 
andrewm@318: 	.align	2
andrewm@318: 	.global	int16_to_float_analog
andrewm@318: 	.thumb
andrewm@318: 	.thumb_func
andrewm@318: 	.type	int16_to_float_analog, %function
andrewm@318: int16_to_float_analog:
andrewm@318: 	lsr r0, r0, #2
andrewm@318: 	lsl r0, r0, #2				@ Clip off the last two bits of numSamples (only multiples of 4 allowed)
andrewm@318: 	cmp r0, #0					@ Check for trivial case: zero samples
andrewm@318: 	it eq
andrewm@318: 	bxeq lr						@ Return if that's the case
andrewm@318: 
andrewm@318: 	@ ---- loop: iterate over the number of samples ----
andrewm@318: int16_to_float_analog_loop:
andrewm@318: 	vld1 {dIFAn_Input}, [r1]!						@ Load four 16-bit signed ints from inBuffer++
andrewm@318: 	vmovl qIFAn_Expanded, dIFAn_Input				@ Convert four 16-bit signed ints to 32-bit signed ints
andrewm@318: 	vcvt qIFAn_Output, qIFAn_Expanded, #16			@ Convert four Q16.16 fixed points to floats (equiv. to / 65536)
andrewm@318: 	vst1 {dIFAn_Output_0, dIFAn_Output_1}, [r2]!	@ Store floats back into memory
andrewm@318: 
andrewm@318: 	subs r0, r0, #4							@ numSamples -= 4
andrewm@318: 	it gt
andrewm@318: 	bgt int16_to_float_analog_loop			@ Loop if numSamples > 0
andrewm@318: 
andrewm@318: 	bx lr
andrewm@318: 	
andrewm@318: 
andrewm@318: @	void float_to_int16_audio(int numSamples, float *inBuffer, int16_t *outBuffer);
andrewm@318: @
andrewm@318: @	Convert floats between -1.0 and 1.0 to 16-bit signed ints, with saturation.
andrewm@318: @	Used for audio output.
andrewm@318: @
andrewm@318: @	numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
andrewm@318: @	inBuffer should be aligned on a 16-byte boundary.
andrewm@318: @ 	outBuffer should be aligned on an 8-byte boundary.
andrewm@318: 
andrewm@318: @ Registers:
andrewm@318: @    r0: numSamples            How many frames to convert
andrewm@318: @    r1: inBuffer              Buffer for input samples from render function
andrewm@318: @    r2: outBuffer             Buffer to store output samples for DAC
andrewm@318: 
andrewm@318: qFIAu_Input		.qn		Q0.F32
andrewm@318: dFIAu_Input_0	.dn		D0.F32
andrewm@318: dFIAu_Input_1,	.dn		D1.F32
andrewm@318: qFIAu_Converted	.qn		Q1.S32
andrewm@318: dFIAu_Narrowed	.dn		D4.S16	
andrewm@318: 
andrewm@318: 	.align	2
andrewm@318: 	.global	float_to_int16_audio
andrewm@318: 	.thumb
andrewm@318: 	.thumb_func
andrewm@318: 	.type	float_to_int16_audio, %function
andrewm@318: 	
andrewm@318: float_to_int16_audio:
andrewm@318: 	lsr r0, r0, #2
andrewm@318: 	lsl r0, r0, #2				@ Clip off the last two bits of numSamples (only multiples of 4 allowed)
andrewm@318: 	cmp r0, #0					@ Check for trivial case: zero samples
andrewm@318: 	it eq
andrewm@318: 	bxeq lr						@ Return if that's the case
andrewm@318: 
andrewm@318: 	@ ---- loop: iterate over the number of samples ----
andrewm@318: float_to_int16_audio_loop:
andrewm@318: 	vld1 {dFIAu_Input_0, dFIAu_Input_1}, [r1]!		@ Load four floats from inBuffer++
andrewm@318: 	vcvt qFIAu_Converted, qFIAu_Input, #15			@ Convert four floats into four Q17.15 fixed points (equiv. to * 32768)
andrewm@318: 													@ This will truncate the result to a 32-bit representable value
andrewm@318: 	vqmovn dFIAu_Narrowed, qFIAu_Converted			@ Convert four 32-bit signed ints to 16-bit signed ints, with saturation
andrewm@318: 	vst1 {dFIAu_Narrowed}, [r2]!					@ Store ints back into memory
andrewm@318: 
andrewm@318: 	subs r0, r0, #4							@ numSamples -= 4
andrewm@318: 	it gt
andrewm@318: 	bgt float_to_int16_audio_loop			@ Loop if numSamples > 0
andrewm@318: 
andrewm@318: 	bx lr
andrewm@318: 	
andrewm@318: 
andrewm@318: @	void float_to_int16_analog(int numSamples, float *inBuffer, uint16_t *outBuffer);
andrewm@318: @
andrewm@318: @	Convert floats between 0.0 and 1.0 to 16-bit unsigned ints, with saturation.
andrewm@318: @	Used for analog output.
andrewm@318: @
andrewm@318: @	numSamples should be a multiple of 4 (i.e. with stereo audio, frames should be even)
andrewm@318: @	inBuffer should be aligned on a 16-byte boundary.
andrewm@318: @ 	outBuffer should be aligned on an 8-byte boundary.
andrewm@318: 
andrewm@318: @ Registers:
andrewm@318: @    r0: numSamples            How many frames to convert
andrewm@318: @    r1: inBuffer              Buffer for input samples from render function
andrewm@318: @    r2: outBuffer             Buffer to store output samples for DAC
andrewm@318: 
andrewm@318: qFIAn_Input		.qn		Q0.F32
andrewm@318: dFIAn_Input_0	.dn		D0.F32
andrewm@318: dFIAn_Input_1,	.dn		D1.F32
andrewm@318: qFIAn_Converted	.qn		Q1.U32
andrewm@318: dFIAn_Narrowed	.dn		D4.U16	
andrewm@318: 
andrewm@318: 	.align	2
andrewm@318: 	.global	float_to_int16_analog
andrewm@318: 	.thumb
andrewm@318: 	.thumb_func
andrewm@318: 	.type	float_to_int16_analog, %function
andrewm@318: 	
andrewm@318: float_to_int16_analog:
andrewm@318: 	lsr r0, r0, #2
andrewm@318: 	lsl r0, r0, #2				@ Clip off the last two bits of numSamples (only multiples of 4 allowed)
andrewm@318: 	cmp r0, #0					@ Check for trivial case: zero samples
andrewm@318: 	it eq
andrewm@318: 	bxeq lr						@ Return if that's the case
andrewm@318: 
andrewm@318: 	@ ---- loop: iterate over the number of samples ----
andrewm@318: float_to_int16_analog_loop:
andrewm@318: 	vld1 {dFIAn_Input_0, dFIAn_Input_1}, [r1]!		@ Load four floats from inBuffer++
andrewm@318: 	vcvt qFIAn_Converted, qFIAn_Input, #16			@ Convert four floats into four Q16.16 fixed points (equiv. to * 65536)
andrewm@318: 													@ This will truncate the result to a 32-bit representable value
andrewm@318: 	vqmovn dFIAn_Narrowed, qFIAn_Converted			@ Convert four 32-bit unsigned ints to 16-bit unsigned ints, with saturation
andrewm@318: 	vst1 {dFIAn_Narrowed}, [r2]!					@ Store ints back into memory
andrewm@318: 
andrewm@318: 	subs r0, r0, #4							@ numSamples -= 4
andrewm@318: 	it gt
andrewm@318: 	bgt float_to_int16_analog_loop			@ Loop if numSamples > 0
andrewm@318: 
andrewm@318: 	bx lr
andrewm@318: