To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.
root / src / portaudio_20161030_catalina_patch / src / os / win / pa_x86_plain_converters.c @ 162:d43aab368df9
History | View | Annotate | Download (39.1 KB)
| 1 |
/*
|
|---|---|
| 2 |
* Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
|
| 3 |
* Copyright (c) 1999-2002 Ross Bencina, Phil Burk
|
| 4 |
*
|
| 5 |
* Permission is hereby granted, free of charge, to any person obtaining
|
| 6 |
* a copy of this software and associated documentation files
|
| 7 |
* (the "Software"), to deal in the Software without restriction,
|
| 8 |
* including without limitation the rights to use, copy, modify, merge,
|
| 9 |
* publish, distribute, sublicense, and/or sell copies of the Software,
|
| 10 |
* and to permit persons to whom the Software is furnished to do so,
|
| 11 |
* subject to the following conditions:
|
| 12 |
*
|
| 13 |
* The above copyright notice and this permission notice shall be
|
| 14 |
* included in all copies or substantial portions of the Software.
|
| 15 |
*
|
| 16 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 17 |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 18 |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
| 19 |
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
|
| 20 |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
| 21 |
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
| 22 |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 23 |
*/
|
| 24 |
|
| 25 |
/*
|
| 26 |
* The text above constitutes the entire PortAudio license; however,
|
| 27 |
* the PortAudio community also makes the following non-binding requests:
|
| 28 |
*
|
| 29 |
* Any person wishing to distribute modifications to the Software is
|
| 30 |
* requested to send the modifications to the original developer so that
|
| 31 |
* they can be incorporated into the canonical version. It is also
|
| 32 |
* requested that these non-binding requests be included along with the
|
| 33 |
* license above.
|
| 34 |
*/
|
| 35 |
|
| 36 |
/** @file
|
| 37 |
@ingroup win_src
|
| 38 |
*/
|
| 39 |
|
| 40 |
#include "pa_x86_plain_converters.h" |
| 41 |
|
| 42 |
#include "pa_converters.h" |
| 43 |
#include "pa_dither.h" |
| 44 |
|
| 45 |
/*
|
| 46 |
the main reason these versions are faster than the equivalent C versions
|
| 47 |
is that float -> int casting is expensive in C on x86 because the rounding
|
| 48 |
mode needs to be changed for every cast. these versions only set
|
| 49 |
the rounding mode once outside the loop.
|
| 50 |
|
| 51 |
small additional speed gains are made by the way that clamping is
|
| 52 |
implemented.
|
| 53 |
|
| 54 |
TODO:
|
| 55 |
o- inline dither code
|
| 56 |
o- implement Dither only (no-clip) versions
|
| 57 |
o- implement int8 and uint8 versions
|
| 58 |
o- test thouroughly
|
| 59 |
|
| 60 |
o- the packed 24 bit functions could benefit from unrolling and avoiding
|
| 61 |
byte and word sized register access.
|
| 62 |
*/
|
| 63 |
|
| 64 |
/* -------------------------------------------------------------------------- */
|
| 65 |
|
| 66 |
/*
|
| 67 |
#define PA_CLIP_( val, min, max )\
|
| 68 |
{ val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
|
| 69 |
*/
|
| 70 |
|
| 71 |
/*
|
| 72 |
the following notes were used to determine whether a floating point
|
| 73 |
value should be saturated (ie >1 or <-1) by loading it into an integer
|
| 74 |
register. these should be rewritten so that they make sense.
|
| 75 |
|
| 76 |
an ieee floating point value
|
| 77 |
|
| 78 |
1.xxxxxxxxxxxxxxxxxxxx?
|
| 79 |
|
| 80 |
|
| 81 |
is less than or equal to 1 and greater than or equal to -1 either:
|
| 82 |
|
| 83 |
if the mantissa is 0 and the unbiased exponent is 0
|
| 84 |
|
| 85 |
OR
|
| 86 |
|
| 87 |
if the unbiased exponent < 0
|
| 88 |
|
| 89 |
this translates to:
|
| 90 |
|
| 91 |
if the mantissa is 0 and the biased exponent is 7F
|
| 92 |
|
| 93 |
or
|
| 94 |
|
| 95 |
if the biased exponent is less than 7F
|
| 96 |
|
| 97 |
|
| 98 |
therefore the value is greater than 1 or less than -1 if
|
| 99 |
|
| 100 |
the mantissa is not 0 and the biased exponent is 7F
|
| 101 |
|
| 102 |
or
|
| 103 |
|
| 104 |
if the biased exponent is greater than 7F
|
| 105 |
|
| 106 |
|
| 107 |
in other words, if we mask out the sign bit, the value is
|
| 108 |
greater than 1 or less than -1 if its integer representation is greater than:
|
| 109 |
|
| 110 |
0 01111111 0000 0000 0000 0000 0000 000
|
| 111 |
|
| 112 |
0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
|
| 113 |
*/
|
| 114 |
|
| 115 |
#if defined(_WIN64) || defined(_WIN32_WCE)
|
| 116 |
|
| 117 |
/*
|
| 118 |
-EMT64/AMD64 uses different asm
|
| 119 |
-VC2005 doesnt allow _WIN64 with inline assembly either!
|
| 120 |
*/
|
| 121 |
void PaUtil_InitializeX86PlainConverters( void ) |
| 122 |
{
|
| 123 |
} |
| 124 |
|
| 125 |
#else
|
| 126 |
|
| 127 |
/* -------------------------------------------------------------------------- */
|
| 128 |
|
| 129 |
static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/ |
| 130 |
static const double int32Scaler_ = 0x7FFFFFFF; |
| 131 |
static const double ditheredInt32Scaler_ = 0x7FFFFFFE; |
| 132 |
static const double int24Scaler_ = 0x7FFFFF; |
| 133 |
static const double ditheredInt24Scaler_ = 0x7FFFFE; |
| 134 |
static const double int16Scaler_ = 0x7FFF; |
| 135 |
static const double ditheredInt16Scaler_ = 0x7FFE; |
| 136 |
|
| 137 |
#define PA_DITHER_BITS_ (15) |
| 138 |
/* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
|
| 139 |
#define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1)) |
| 140 |
static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_; |
| 141 |
#define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1) |
| 142 |
|
| 143 |
/* -------------------------------------------------------------------------- */
|
| 144 |
|
| 145 |
static void Float32_To_Int32( |
| 146 |
void *destinationBuffer, signed int destinationStride, |
| 147 |
void *sourceBuffer, signed int sourceStride, |
| 148 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 149 |
{
|
| 150 |
/*
|
| 151 |
float *src = (float*)sourceBuffer;
|
| 152 |
signed long *dest = (signed long*)destinationBuffer;
|
| 153 |
(void)ditherGenerator; // unused parameter
|
| 154 |
|
| 155 |
while( count-- )
|
| 156 |
{
|
| 157 |
// REVIEW
|
| 158 |
double scaled = *src * 0x7FFFFFFF;
|
| 159 |
*dest = (signed long) scaled;
|
| 160 |
|
| 161 |
src += sourceStride;
|
| 162 |
dest += destinationStride;
|
| 163 |
}
|
| 164 |
*/
|
| 165 |
|
| 166 |
short savedFpuControlWord;
|
| 167 |
|
| 168 |
(void) ditherGenerator; /* unused parameter */ |
| 169 |
|
| 170 |
|
| 171 |
__asm{
|
| 172 |
// esi -> source ptr
|
| 173 |
// eax -> source byte stride
|
| 174 |
// edi -> destination ptr
|
| 175 |
// ebx -> destination byte stride
|
| 176 |
// ecx -> source end ptr
|
| 177 |
// edx -> temp
|
| 178 |
|
| 179 |
mov esi, sourceBuffer |
| 180 |
|
| 181 |
mov edx, 4 // sizeof float32 and int32 |
| 182 |
mov eax, sourceStride |
| 183 |
imul eax, edx |
| 184 |
|
| 185 |
mov ecx, count |
| 186 |
imul ecx, eax |
| 187 |
add ecx, esi |
| 188 |
|
| 189 |
mov edi, destinationBuffer |
| 190 |
|
| 191 |
mov ebx, destinationStride |
| 192 |
imul ebx, edx |
| 193 |
|
| 194 |
fwait |
| 195 |
fstcw savedFpuControlWord |
| 196 |
fldcw fpuControlWord_ |
| 197 |
|
| 198 |
fld int32Scaler_ // stack: (int)0x7FFFFFFF
|
| 199 |
|
| 200 |
Float32_To_Int32_loop: |
| 201 |
|
| 202 |
// load unscaled value into st(0)
|
| 203 |
fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
|
| 204 |
add esi, eax // increment source ptr
|
| 205 |
//lea esi, [esi+eax]
|
| 206 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF |
| 207 |
/*
|
| 208 |
note: we could store to a temporary qword here which would cause
|
| 209 |
wraparound distortion instead of int indefinite 0x10. that would
|
| 210 |
be more work, and given that not enabling clipping is only advisable
|
| 211 |
when you know that your signal isn't going to clip it isn't worth it.
|
| 212 |
*/
|
| 213 |
fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
|
| 214 |
|
| 215 |
add edi, ebx // increment destination ptr
|
| 216 |
//lea edi, [edi+ebx]
|
| 217 |
|
| 218 |
cmp esi, ecx // has src ptr reached end?
|
| 219 |
jne Float32_To_Int32_loop |
| 220 |
|
| 221 |
ffree st(0)
|
| 222 |
fincstp |
| 223 |
|
| 224 |
fwait |
| 225 |
fnclex |
| 226 |
fldcw savedFpuControlWord |
| 227 |
} |
| 228 |
} |
| 229 |
|
| 230 |
/* -------------------------------------------------------------------------- */
|
| 231 |
|
| 232 |
static void Float32_To_Int32_Clip( |
| 233 |
void *destinationBuffer, signed int destinationStride, |
| 234 |
void *sourceBuffer, signed int sourceStride, |
| 235 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 236 |
{
|
| 237 |
/*
|
| 238 |
float *src = (float*)sourceBuffer;
|
| 239 |
signed long *dest = (signed long*)destinationBuffer;
|
| 240 |
(void) ditherGenerator; // unused parameter
|
| 241 |
|
| 242 |
while( count-- )
|
| 243 |
{
|
| 244 |
// REVIEW
|
| 245 |
double scaled = *src * 0x7FFFFFFF;
|
| 246 |
PA_CLIP_( scaled, -2147483648., 2147483647. );
|
| 247 |
*dest = (signed long) scaled;
|
| 248 |
|
| 249 |
src += sourceStride;
|
| 250 |
dest += destinationStride;
|
| 251 |
}
|
| 252 |
*/
|
| 253 |
|
| 254 |
short savedFpuControlWord;
|
| 255 |
|
| 256 |
(void) ditherGenerator; /* unused parameter */ |
| 257 |
|
| 258 |
__asm{
|
| 259 |
// esi -> source ptr
|
| 260 |
// eax -> source byte stride
|
| 261 |
// edi -> destination ptr
|
| 262 |
// ebx -> destination byte stride
|
| 263 |
// ecx -> source end ptr
|
| 264 |
// edx -> temp
|
| 265 |
|
| 266 |
mov esi, sourceBuffer |
| 267 |
|
| 268 |
mov edx, 4 // sizeof float32 and int32 |
| 269 |
mov eax, sourceStride |
| 270 |
imul eax, edx |
| 271 |
|
| 272 |
mov ecx, count |
| 273 |
imul ecx, eax |
| 274 |
add ecx, esi |
| 275 |
|
| 276 |
mov edi, destinationBuffer |
| 277 |
|
| 278 |
mov ebx, destinationStride |
| 279 |
imul ebx, edx |
| 280 |
|
| 281 |
fwait |
| 282 |
fstcw savedFpuControlWord |
| 283 |
fldcw fpuControlWord_ |
| 284 |
|
| 285 |
fld int32Scaler_ // stack: (int)0x7FFFFFFF
|
| 286 |
|
| 287 |
Float32_To_Int32_Clip_loop: |
| 288 |
|
| 289 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 290 |
|
| 291 |
and edx, 0x7FFFFFFF // mask off sign |
| 292 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 293 |
|
| 294 |
jg Float32_To_Int32_Clip_clamp |
| 295 |
|
| 296 |
// load unscaled value into st(0)
|
| 297 |
fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
|
| 298 |
add esi, eax // increment source ptr
|
| 299 |
//lea esi, [esi+eax]
|
| 300 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF |
| 301 |
fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
|
| 302 |
jmp Float32_To_Int32_Clip_stored |
| 303 |
|
| 304 |
Float32_To_Int32_Clip_clamp: |
| 305 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 306 |
shr edx, 31 // move sign bit into bit 0 |
| 307 |
add esi, eax // increment source ptr
|
| 308 |
//lea esi, [esi+eax]
|
| 309 |
add edx, 0x7FFFFFFF // convert to maximum range integers |
| 310 |
mov dword ptr [edi], edx |
| 311 |
|
| 312 |
Float32_To_Int32_Clip_stored: |
| 313 |
|
| 314 |
//add edi, ebx // increment destination ptr
|
| 315 |
lea edi, [edi+ebx] |
| 316 |
|
| 317 |
cmp esi, ecx // has src ptr reached end?
|
| 318 |
jne Float32_To_Int32_Clip_loop |
| 319 |
|
| 320 |
ffree st(0)
|
| 321 |
fincstp |
| 322 |
|
| 323 |
fwait |
| 324 |
fnclex |
| 325 |
fldcw savedFpuControlWord |
| 326 |
} |
| 327 |
} |
| 328 |
|
| 329 |
/* -------------------------------------------------------------------------- */
|
| 330 |
|
| 331 |
static void Float32_To_Int32_DitherClip( |
| 332 |
void *destinationBuffer, signed int destinationStride, |
| 333 |
void *sourceBuffer, signed int sourceStride, |
| 334 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 335 |
{
|
| 336 |
/*
|
| 337 |
float *src = (float*)sourceBuffer;
|
| 338 |
signed long *dest = (signed long*)destinationBuffer;
|
| 339 |
|
| 340 |
while( count-- )
|
| 341 |
{
|
| 342 |
// REVIEW
|
| 343 |
double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
|
| 344 |
// use smaller scaler to prevent overflow when we add the dither
|
| 345 |
double dithered = ((double)*src * (2147483646.0)) + dither;
|
| 346 |
PA_CLIP_( dithered, -2147483648., 2147483647. );
|
| 347 |
*dest = (signed long) dithered;
|
| 348 |
|
| 349 |
|
| 350 |
src += sourceStride;
|
| 351 |
dest += destinationStride;
|
| 352 |
}
|
| 353 |
*/
|
| 354 |
|
| 355 |
short savedFpuControlWord;
|
| 356 |
|
| 357 |
// spill storage:
|
| 358 |
signed long sourceByteStride; |
| 359 |
signed long highpassedDither; |
| 360 |
|
| 361 |
// dither state:
|
| 362 |
unsigned long ditherPrevious = ditherGenerator->previous; |
| 363 |
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; |
| 364 |
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; |
| 365 |
|
| 366 |
__asm{
|
| 367 |
// esi -> source ptr
|
| 368 |
// eax -> source byte stride
|
| 369 |
// edi -> destination ptr
|
| 370 |
// ebx -> destination byte stride
|
| 371 |
// ecx -> source end ptr
|
| 372 |
// edx -> temp
|
| 373 |
|
| 374 |
mov esi, sourceBuffer |
| 375 |
|
| 376 |
mov edx, 4 // sizeof float32 and int32 |
| 377 |
mov eax, sourceStride |
| 378 |
imul eax, edx |
| 379 |
|
| 380 |
mov ecx, count |
| 381 |
imul ecx, eax |
| 382 |
add ecx, esi |
| 383 |
|
| 384 |
mov edi, destinationBuffer |
| 385 |
|
| 386 |
mov ebx, destinationStride |
| 387 |
imul ebx, edx |
| 388 |
|
| 389 |
fwait |
| 390 |
fstcw savedFpuControlWord |
| 391 |
fldcw fpuControlWord_ |
| 392 |
|
| 393 |
fld ditheredInt32Scaler_ // stack: int scaler
|
| 394 |
|
| 395 |
Float32_To_Int32_DitherClip_loop: |
| 396 |
|
| 397 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 398 |
|
| 399 |
and edx, 0x7FFFFFFF // mask off sign |
| 400 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 401 |
|
| 402 |
jg Float32_To_Int32_DitherClip_clamp |
| 403 |
|
| 404 |
// load unscaled value into st(0)
|
| 405 |
fld dword ptr [esi] // stack: value, int scaler
|
| 406 |
add esi, eax // increment source ptr
|
| 407 |
//lea esi, [esi+eax]
|
| 408 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler |
| 409 |
|
| 410 |
/*
|
| 411 |
// call PaUtil_GenerateFloatTriangularDither with C calling convention
|
| 412 |
mov sourceByteStride, eax // save eax
|
| 413 |
mov sourceEnd, ecx // save ecx
|
| 414 |
push ditherGenerator // pass ditherGenerator parameter on stack
|
| 415 |
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
|
| 416 |
pop edx // clear parameter off stack
|
| 417 |
mov ecx, sourceEnd // restore ecx
|
| 418 |
mov eax, sourceByteStride // restore eax
|
| 419 |
*/
|
| 420 |
|
| 421 |
// generate dither
|
| 422 |
mov sourceByteStride, eax // save eax
|
| 423 |
mov edx, 196314165
|
| 424 |
mov eax, ditherRandSeed1 |
| 425 |
mul edx // eax:edx = eax * 196314165
|
| 426 |
//add eax, 907633515
|
| 427 |
lea eax, [eax+907633515]
|
| 428 |
mov ditherRandSeed1, eax |
| 429 |
mov edx, 196314165
|
| 430 |
mov eax, ditherRandSeed2 |
| 431 |
mul edx // eax:edx = eax * 196314165
|
| 432 |
//add eax, 907633515
|
| 433 |
lea eax, [eax+907633515]
|
| 434 |
mov edx, ditherRandSeed1 |
| 435 |
shr edx, PA_DITHER_SHIFT_ |
| 436 |
mov ditherRandSeed2, eax |
| 437 |
shr eax, PA_DITHER_SHIFT_ |
| 438 |
//add eax, edx // eax -> current
|
| 439 |
lea eax, [eax+edx] |
| 440 |
mov edx, ditherPrevious |
| 441 |
neg edx |
| 442 |
lea edx, [eax+edx] // highpass = current - previous
|
| 443 |
mov highpassedDither, edx |
| 444 |
mov ditherPrevious, eax // previous = current
|
| 445 |
mov eax, sourceByteStride // restore eax
|
| 446 |
fild highpassedDither |
| 447 |
fmul const_float_dither_scale_ |
| 448 |
// end generate dither, dither signal in st(0)
|
| 449 |
|
| 450 |
faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler |
| 451 |
fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
|
| 452 |
jmp Float32_To_Int32_DitherClip_stored |
| 453 |
|
| 454 |
Float32_To_Int32_DitherClip_clamp: |
| 455 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 456 |
shr edx, 31 // move sign bit into bit 0 |
| 457 |
add esi, eax // increment source ptr
|
| 458 |
//lea esi, [esi+eax]
|
| 459 |
add edx, 0x7FFFFFFF // convert to maximum range integers |
| 460 |
mov dword ptr [edi], edx |
| 461 |
|
| 462 |
Float32_To_Int32_DitherClip_stored: |
| 463 |
|
| 464 |
//add edi, ebx // increment destination ptr
|
| 465 |
lea edi, [edi+ebx] |
| 466 |
|
| 467 |
cmp esi, ecx // has src ptr reached end?
|
| 468 |
jne Float32_To_Int32_DitherClip_loop |
| 469 |
|
| 470 |
ffree st(0)
|
| 471 |
fincstp |
| 472 |
|
| 473 |
fwait |
| 474 |
fnclex |
| 475 |
fldcw savedFpuControlWord |
| 476 |
} |
| 477 |
|
| 478 |
ditherGenerator->previous = ditherPrevious; |
| 479 |
ditherGenerator->randSeed1 = ditherRandSeed1; |
| 480 |
ditherGenerator->randSeed2 = ditherRandSeed2; |
| 481 |
} |
| 482 |
|
| 483 |
/* -------------------------------------------------------------------------- */
|
| 484 |
|
| 485 |
static void Float32_To_Int24( |
| 486 |
void *destinationBuffer, signed int destinationStride, |
| 487 |
void *sourceBuffer, signed int sourceStride, |
| 488 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 489 |
{
|
| 490 |
/*
|
| 491 |
float *src = (float*)sourceBuffer;
|
| 492 |
unsigned char *dest = (unsigned char*)destinationBuffer;
|
| 493 |
signed long temp;
|
| 494 |
|
| 495 |
(void) ditherGenerator; // unused parameter
|
| 496 |
|
| 497 |
while( count-- )
|
| 498 |
{
|
| 499 |
// convert to 32 bit and drop the low 8 bits
|
| 500 |
double scaled = *src * 0x7FFFFFFF;
|
| 501 |
temp = (signed long) scaled;
|
| 502 |
|
| 503 |
dest[0] = (unsigned char)(temp >> 8);
|
| 504 |
dest[1] = (unsigned char)(temp >> 16);
|
| 505 |
dest[2] = (unsigned char)(temp >> 24);
|
| 506 |
|
| 507 |
src += sourceStride;
|
| 508 |
dest += destinationStride * 3;
|
| 509 |
}
|
| 510 |
*/
|
| 511 |
|
| 512 |
short savedFpuControlWord;
|
| 513 |
|
| 514 |
signed long tempInt32; |
| 515 |
|
| 516 |
(void) ditherGenerator; /* unused parameter */ |
| 517 |
|
| 518 |
__asm{
|
| 519 |
// esi -> source ptr
|
| 520 |
// eax -> source byte stride
|
| 521 |
// edi -> destination ptr
|
| 522 |
// ebx -> destination byte stride
|
| 523 |
// ecx -> source end ptr
|
| 524 |
// edx -> temp
|
| 525 |
|
| 526 |
mov esi, sourceBuffer |
| 527 |
|
| 528 |
mov edx, 4 // sizeof float32 |
| 529 |
mov eax, sourceStride |
| 530 |
imul eax, edx |
| 531 |
|
| 532 |
mov ecx, count |
| 533 |
imul ecx, eax |
| 534 |
add ecx, esi |
| 535 |
|
| 536 |
mov edi, destinationBuffer |
| 537 |
|
| 538 |
mov edx, 3 // sizeof int24 |
| 539 |
mov ebx, destinationStride |
| 540 |
imul ebx, edx |
| 541 |
|
| 542 |
fwait |
| 543 |
fstcw savedFpuControlWord |
| 544 |
fldcw fpuControlWord_ |
| 545 |
|
| 546 |
fld int24Scaler_ // stack: (int)0x7FFFFF
|
| 547 |
|
| 548 |
Float32_To_Int24_loop: |
| 549 |
|
| 550 |
// load unscaled value into st(0)
|
| 551 |
fld dword ptr [esi] // stack: value, (int)0x7FFFFF
|
| 552 |
add esi, eax // increment source ptr
|
| 553 |
//lea esi, [esi+eax]
|
| 554 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF |
| 555 |
fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
|
| 556 |
mov edx, tempInt32 |
| 557 |
|
| 558 |
mov byte ptr [edi], DL |
| 559 |
shr edx, 8
|
| 560 |
//mov byte ptr [edi+1], DL
|
| 561 |
//mov byte ptr [edi+2], DH
|
| 562 |
mov word ptr [edi+1], DX
|
| 563 |
|
| 564 |
//add edi, ebx // increment destination ptr
|
| 565 |
lea edi, [edi+ebx] |
| 566 |
|
| 567 |
cmp esi, ecx // has src ptr reached end?
|
| 568 |
jne Float32_To_Int24_loop |
| 569 |
|
| 570 |
ffree st(0)
|
| 571 |
fincstp |
| 572 |
|
| 573 |
fwait |
| 574 |
fnclex |
| 575 |
fldcw savedFpuControlWord |
| 576 |
} |
| 577 |
} |
| 578 |
|
| 579 |
/* -------------------------------------------------------------------------- */
|
| 580 |
|
| 581 |
static void Float32_To_Int24_Clip( |
| 582 |
void *destinationBuffer, signed int destinationStride, |
| 583 |
void *sourceBuffer, signed int sourceStride, |
| 584 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 585 |
{
|
| 586 |
/*
|
| 587 |
float *src = (float*)sourceBuffer;
|
| 588 |
unsigned char *dest = (unsigned char*)destinationBuffer;
|
| 589 |
signed long temp;
|
| 590 |
|
| 591 |
(void) ditherGenerator; // unused parameter
|
| 592 |
|
| 593 |
while( count-- )
|
| 594 |
{
|
| 595 |
// convert to 32 bit and drop the low 8 bits
|
| 596 |
double scaled = *src * 0x7FFFFFFF;
|
| 597 |
PA_CLIP_( scaled, -2147483648., 2147483647. );
|
| 598 |
temp = (signed long) scaled;
|
| 599 |
|
| 600 |
dest[0] = (unsigned char)(temp >> 8);
|
| 601 |
dest[1] = (unsigned char)(temp >> 16);
|
| 602 |
dest[2] = (unsigned char)(temp >> 24);
|
| 603 |
|
| 604 |
src += sourceStride;
|
| 605 |
dest += destinationStride * 3;
|
| 606 |
}
|
| 607 |
*/
|
| 608 |
|
| 609 |
short savedFpuControlWord;
|
| 610 |
|
| 611 |
signed long tempInt32; |
| 612 |
|
| 613 |
(void) ditherGenerator; /* unused parameter */ |
| 614 |
|
| 615 |
__asm{
|
| 616 |
// esi -> source ptr
|
| 617 |
// eax -> source byte stride
|
| 618 |
// edi -> destination ptr
|
| 619 |
// ebx -> destination byte stride
|
| 620 |
// ecx -> source end ptr
|
| 621 |
// edx -> temp
|
| 622 |
|
| 623 |
mov esi, sourceBuffer |
| 624 |
|
| 625 |
mov edx, 4 // sizeof float32 |
| 626 |
mov eax, sourceStride |
| 627 |
imul eax, edx |
| 628 |
|
| 629 |
mov ecx, count |
| 630 |
imul ecx, eax |
| 631 |
add ecx, esi |
| 632 |
|
| 633 |
mov edi, destinationBuffer |
| 634 |
|
| 635 |
mov edx, 3 // sizeof int24 |
| 636 |
mov ebx, destinationStride |
| 637 |
imul ebx, edx |
| 638 |
|
| 639 |
fwait |
| 640 |
fstcw savedFpuControlWord |
| 641 |
fldcw fpuControlWord_ |
| 642 |
|
| 643 |
fld int24Scaler_ // stack: (int)0x7FFFFF
|
| 644 |
|
| 645 |
Float32_To_Int24_Clip_loop: |
| 646 |
|
| 647 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 648 |
|
| 649 |
and edx, 0x7FFFFFFF // mask off sign |
| 650 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 651 |
|
| 652 |
jg Float32_To_Int24_Clip_clamp |
| 653 |
|
| 654 |
// load unscaled value into st(0)
|
| 655 |
fld dword ptr [esi] // stack: value, (int)0x7FFFFF
|
| 656 |
add esi, eax // increment source ptr
|
| 657 |
//lea esi, [esi+eax]
|
| 658 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF |
| 659 |
fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
|
| 660 |
mov edx, tempInt32 |
| 661 |
jmp Float32_To_Int24_Clip_store |
| 662 |
|
| 663 |
Float32_To_Int24_Clip_clamp: |
| 664 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 665 |
shr edx, 31 // move sign bit into bit 0 |
| 666 |
add esi, eax // increment source ptr
|
| 667 |
//lea esi, [esi+eax]
|
| 668 |
add edx, 0x7FFFFF // convert to maximum range integers |
| 669 |
|
| 670 |
Float32_To_Int24_Clip_store: |
| 671 |
|
| 672 |
mov byte ptr [edi], DL |
| 673 |
shr edx, 8
|
| 674 |
//mov byte ptr [edi+1], DL
|
| 675 |
//mov byte ptr [edi+2], DH
|
| 676 |
mov word ptr [edi+1], DX
|
| 677 |
|
| 678 |
//add edi, ebx // increment destination ptr
|
| 679 |
lea edi, [edi+ebx] |
| 680 |
|
| 681 |
cmp esi, ecx // has src ptr reached end?
|
| 682 |
jne Float32_To_Int24_Clip_loop |
| 683 |
|
| 684 |
ffree st(0)
|
| 685 |
fincstp |
| 686 |
|
| 687 |
fwait |
| 688 |
fnclex |
| 689 |
fldcw savedFpuControlWord |
| 690 |
} |
| 691 |
} |
| 692 |
|
| 693 |
/* -------------------------------------------------------------------------- */
|
| 694 |
|
| 695 |
static void Float32_To_Int24_DitherClip( |
| 696 |
void *destinationBuffer, signed int destinationStride, |
| 697 |
void *sourceBuffer, signed int sourceStride, |
| 698 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 699 |
{
|
| 700 |
/*
|
| 701 |
float *src = (float*)sourceBuffer;
|
| 702 |
unsigned char *dest = (unsigned char*)destinationBuffer;
|
| 703 |
signed long temp;
|
| 704 |
|
| 705 |
while( count-- )
|
| 706 |
{
|
| 707 |
// convert to 32 bit and drop the low 8 bits
|
| 708 |
|
| 709 |
// FIXME: the dither amplitude here appears to be too small by 8 bits
|
| 710 |
double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
|
| 711 |
// use smaller scaler to prevent overflow when we add the dither
|
| 712 |
double dithered = ((double)*src * (2147483646.0)) + dither;
|
| 713 |
PA_CLIP_( dithered, -2147483648., 2147483647. );
|
| 714 |
|
| 715 |
temp = (signed long) dithered;
|
| 716 |
|
| 717 |
dest[0] = (unsigned char)(temp >> 8);
|
| 718 |
dest[1] = (unsigned char)(temp >> 16);
|
| 719 |
dest[2] = (unsigned char)(temp >> 24);
|
| 720 |
|
| 721 |
src += sourceStride;
|
| 722 |
dest += destinationStride * 3;
|
| 723 |
}
|
| 724 |
*/
|
| 725 |
|
| 726 |
short savedFpuControlWord;
|
| 727 |
|
| 728 |
// spill storage:
|
| 729 |
signed long sourceByteStride; |
| 730 |
signed long highpassedDither; |
| 731 |
|
| 732 |
// dither state:
|
| 733 |
unsigned long ditherPrevious = ditherGenerator->previous; |
| 734 |
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; |
| 735 |
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; |
| 736 |
|
| 737 |
signed long tempInt32; |
| 738 |
|
| 739 |
__asm{
|
| 740 |
// esi -> source ptr
|
| 741 |
// eax -> source byte stride
|
| 742 |
// edi -> destination ptr
|
| 743 |
// ebx -> destination byte stride
|
| 744 |
// ecx -> source end ptr
|
| 745 |
// edx -> temp
|
| 746 |
|
| 747 |
mov esi, sourceBuffer |
| 748 |
|
| 749 |
mov edx, 4 // sizeof float32 |
| 750 |
mov eax, sourceStride |
| 751 |
imul eax, edx |
| 752 |
|
| 753 |
mov ecx, count |
| 754 |
imul ecx, eax |
| 755 |
add ecx, esi |
| 756 |
|
| 757 |
mov edi, destinationBuffer |
| 758 |
|
| 759 |
mov edx, 3 // sizeof int24 |
| 760 |
mov ebx, destinationStride |
| 761 |
imul ebx, edx |
| 762 |
|
| 763 |
fwait |
| 764 |
fstcw savedFpuControlWord |
| 765 |
fldcw fpuControlWord_ |
| 766 |
|
| 767 |
fld ditheredInt24Scaler_ // stack: int scaler
|
| 768 |
|
| 769 |
Float32_To_Int24_DitherClip_loop: |
| 770 |
|
| 771 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 772 |
|
| 773 |
and edx, 0x7FFFFFFF // mask off sign |
| 774 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 775 |
|
| 776 |
jg Float32_To_Int24_DitherClip_clamp |
| 777 |
|
| 778 |
// load unscaled value into st(0)
|
| 779 |
fld dword ptr [esi] // stack: value, int scaler
|
| 780 |
add esi, eax // increment source ptr
|
| 781 |
//lea esi, [esi+eax]
|
| 782 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler |
| 783 |
|
| 784 |
/*
|
| 785 |
// call PaUtil_GenerateFloatTriangularDither with C calling convention
|
| 786 |
mov sourceByteStride, eax // save eax
|
| 787 |
mov sourceEnd, ecx // save ecx
|
| 788 |
push ditherGenerator // pass ditherGenerator parameter on stack
|
| 789 |
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
|
| 790 |
pop edx // clear parameter off stack
|
| 791 |
mov ecx, sourceEnd // restore ecx
|
| 792 |
mov eax, sourceByteStride // restore eax
|
| 793 |
*/
|
| 794 |
|
| 795 |
// generate dither
|
| 796 |
mov sourceByteStride, eax // save eax
|
| 797 |
mov edx, 196314165
|
| 798 |
mov eax, ditherRandSeed1 |
| 799 |
mul edx // eax:edx = eax * 196314165
|
| 800 |
//add eax, 907633515
|
| 801 |
lea eax, [eax+907633515]
|
| 802 |
mov ditherRandSeed1, eax |
| 803 |
mov edx, 196314165
|
| 804 |
mov eax, ditherRandSeed2 |
| 805 |
mul edx // eax:edx = eax * 196314165
|
| 806 |
//add eax, 907633515
|
| 807 |
lea eax, [eax+907633515]
|
| 808 |
mov edx, ditherRandSeed1 |
| 809 |
shr edx, PA_DITHER_SHIFT_ |
| 810 |
mov ditherRandSeed2, eax |
| 811 |
shr eax, PA_DITHER_SHIFT_ |
| 812 |
//add eax, edx // eax -> current
|
| 813 |
lea eax, [eax+edx] |
| 814 |
mov edx, ditherPrevious |
| 815 |
neg edx |
| 816 |
lea edx, [eax+edx] // highpass = current - previous
|
| 817 |
mov highpassedDither, edx |
| 818 |
mov ditherPrevious, eax // previous = current
|
| 819 |
mov eax, sourceByteStride // restore eax
|
| 820 |
fild highpassedDither |
| 821 |
fmul const_float_dither_scale_ |
| 822 |
// end generate dither, dither signal in st(0)
|
| 823 |
|
| 824 |
faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler |
| 825 |
fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
|
| 826 |
mov edx, tempInt32 |
| 827 |
jmp Float32_To_Int24_DitherClip_store |
| 828 |
|
| 829 |
Float32_To_Int24_DitherClip_clamp: |
| 830 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 831 |
shr edx, 31 // move sign bit into bit 0 |
| 832 |
add esi, eax // increment source ptr
|
| 833 |
//lea esi, [esi+eax]
|
| 834 |
add edx, 0x7FFFFF // convert to maximum range integers |
| 835 |
|
| 836 |
Float32_To_Int24_DitherClip_store: |
| 837 |
|
| 838 |
mov byte ptr [edi], DL |
| 839 |
shr edx, 8
|
| 840 |
//mov byte ptr [edi+1], DL
|
| 841 |
//mov byte ptr [edi+2], DH
|
| 842 |
mov word ptr [edi+1], DX
|
| 843 |
|
| 844 |
//add edi, ebx // increment destination ptr
|
| 845 |
lea edi, [edi+ebx] |
| 846 |
|
| 847 |
cmp esi, ecx // has src ptr reached end?
|
| 848 |
jne Float32_To_Int24_DitherClip_loop |
| 849 |
|
| 850 |
ffree st(0)
|
| 851 |
fincstp |
| 852 |
|
| 853 |
fwait |
| 854 |
fnclex |
| 855 |
fldcw savedFpuControlWord |
| 856 |
} |
| 857 |
|
| 858 |
ditherGenerator->previous = ditherPrevious; |
| 859 |
ditherGenerator->randSeed1 = ditherRandSeed1; |
| 860 |
ditherGenerator->randSeed2 = ditherRandSeed2; |
| 861 |
} |
| 862 |
|
| 863 |
/* -------------------------------------------------------------------------- */
|
| 864 |
|
| 865 |
static void Float32_To_Int16( |
| 866 |
void *destinationBuffer, signed int destinationStride, |
| 867 |
void *sourceBuffer, signed int sourceStride, |
| 868 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 869 |
{
|
| 870 |
/*
|
| 871 |
float *src = (float*)sourceBuffer;
|
| 872 |
signed short *dest = (signed short*)destinationBuffer;
|
| 873 |
(void)ditherGenerator; // unused parameter
|
| 874 |
|
| 875 |
while( count-- )
|
| 876 |
{
|
| 877 |
|
| 878 |
short samp = (short) (*src * (32767.0f));
|
| 879 |
*dest = samp;
|
| 880 |
|
| 881 |
src += sourceStride;
|
| 882 |
dest += destinationStride;
|
| 883 |
}
|
| 884 |
*/
|
| 885 |
|
| 886 |
short savedFpuControlWord;
|
| 887 |
|
| 888 |
(void) ditherGenerator; /* unused parameter */ |
| 889 |
|
| 890 |
__asm{
|
| 891 |
// esi -> source ptr
|
| 892 |
// eax -> source byte stride
|
| 893 |
// edi -> destination ptr
|
| 894 |
// ebx -> destination byte stride
|
| 895 |
// ecx -> source end ptr
|
| 896 |
// edx -> temp
|
| 897 |
|
| 898 |
mov esi, sourceBuffer |
| 899 |
|
| 900 |
mov edx, 4 // sizeof float32 |
| 901 |
mov eax, sourceStride |
| 902 |
imul eax, edx // source byte stride
|
| 903 |
|
| 904 |
mov ecx, count |
| 905 |
imul ecx, eax |
| 906 |
add ecx, esi // source end ptr = count * source byte stride + source ptr
|
| 907 |
|
| 908 |
mov edi, destinationBuffer |
| 909 |
|
| 910 |
mov edx, 2 // sizeof int16 |
| 911 |
mov ebx, destinationStride |
| 912 |
imul ebx, edx // destination byte stride
|
| 913 |
|
| 914 |
fwait |
| 915 |
fstcw savedFpuControlWord |
| 916 |
fldcw fpuControlWord_ |
| 917 |
|
| 918 |
fld int16Scaler_ // stack: (int)0x7FFF
|
| 919 |
|
| 920 |
Float32_To_Int16_loop: |
| 921 |
|
| 922 |
// load unscaled value into st(0)
|
| 923 |
fld dword ptr [esi] // stack: value, (int)0x7FFF
|
| 924 |
add esi, eax // increment source ptr
|
| 925 |
//lea esi, [esi+eax]
|
| 926 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF |
| 927 |
fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
|
| 928 |
|
| 929 |
add edi, ebx // increment destination ptr
|
| 930 |
//lea edi, [edi+ebx]
|
| 931 |
|
| 932 |
cmp esi, ecx // has src ptr reached end?
|
| 933 |
jne Float32_To_Int16_loop |
| 934 |
|
| 935 |
ffree st(0)
|
| 936 |
fincstp |
| 937 |
|
| 938 |
fwait |
| 939 |
fnclex |
| 940 |
fldcw savedFpuControlWord |
| 941 |
} |
| 942 |
} |
| 943 |
|
| 944 |
/* -------------------------------------------------------------------------- */
|
| 945 |
|
| 946 |
static void Float32_To_Int16_Clip( |
| 947 |
void *destinationBuffer, signed int destinationStride, |
| 948 |
void *sourceBuffer, signed int sourceStride, |
| 949 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 950 |
{
|
| 951 |
/*
|
| 952 |
float *src = (float*)sourceBuffer;
|
| 953 |
signed short *dest = (signed short*)destinationBuffer;
|
| 954 |
(void)ditherGenerator; // unused parameter
|
| 955 |
|
| 956 |
while( count-- )
|
| 957 |
{
|
| 958 |
long samp = (signed long) (*src * (32767.0f));
|
| 959 |
PA_CLIP_( samp, -0x8000, 0x7FFF );
|
| 960 |
*dest = (signed short) samp;
|
| 961 |
|
| 962 |
src += sourceStride;
|
| 963 |
dest += destinationStride;
|
| 964 |
}
|
| 965 |
*/
|
| 966 |
|
| 967 |
short savedFpuControlWord;
|
| 968 |
|
| 969 |
(void) ditherGenerator; /* unused parameter */ |
| 970 |
|
| 971 |
__asm{
|
| 972 |
// esi -> source ptr
|
| 973 |
// eax -> source byte stride
|
| 974 |
// edi -> destination ptr
|
| 975 |
// ebx -> destination byte stride
|
| 976 |
// ecx -> source end ptr
|
| 977 |
// edx -> temp
|
| 978 |
|
| 979 |
mov esi, sourceBuffer |
| 980 |
|
| 981 |
mov edx, 4 // sizeof float32 |
| 982 |
mov eax, sourceStride |
| 983 |
imul eax, edx // source byte stride
|
| 984 |
|
| 985 |
mov ecx, count |
| 986 |
imul ecx, eax |
| 987 |
add ecx, esi // source end ptr = count * source byte stride + source ptr
|
| 988 |
|
| 989 |
mov edi, destinationBuffer |
| 990 |
|
| 991 |
mov edx, 2 // sizeof int16 |
| 992 |
mov ebx, destinationStride |
| 993 |
imul ebx, edx // destination byte stride
|
| 994 |
|
| 995 |
fwait |
| 996 |
fstcw savedFpuControlWord |
| 997 |
fldcw fpuControlWord_ |
| 998 |
|
| 999 |
fld int16Scaler_ // stack: (int)0x7FFF
|
| 1000 |
|
| 1001 |
Float32_To_Int16_Clip_loop: |
| 1002 |
|
| 1003 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 1004 |
|
| 1005 |
and edx, 0x7FFFFFFF // mask off sign |
| 1006 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 1007 |
|
| 1008 |
jg Float32_To_Int16_Clip_clamp |
| 1009 |
|
| 1010 |
// load unscaled value into st(0)
|
| 1011 |
fld dword ptr [esi] // stack: value, (int)0x7FFF
|
| 1012 |
add esi, eax // increment source ptr
|
| 1013 |
//lea esi, [esi+eax]
|
| 1014 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF |
| 1015 |
fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
|
| 1016 |
jmp Float32_To_Int16_Clip_stored |
| 1017 |
|
| 1018 |
Float32_To_Int16_Clip_clamp: |
| 1019 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 1020 |
shr edx, 31 // move sign bit into bit 0 |
| 1021 |
add esi, eax // increment source ptr
|
| 1022 |
//lea esi, [esi+eax]
|
| 1023 |
add dx, 0x7FFF // convert to maximum range integers |
| 1024 |
mov word ptr [edi], dx // store clamped into into dest
|
| 1025 |
|
| 1026 |
Float32_To_Int16_Clip_stored: |
| 1027 |
|
| 1028 |
add edi, ebx // increment destination ptr
|
| 1029 |
//lea edi, [edi+ebx]
|
| 1030 |
|
| 1031 |
cmp esi, ecx // has src ptr reached end?
|
| 1032 |
jne Float32_To_Int16_Clip_loop |
| 1033 |
|
| 1034 |
ffree st(0)
|
| 1035 |
fincstp |
| 1036 |
|
| 1037 |
fwait |
| 1038 |
fnclex |
| 1039 |
fldcw savedFpuControlWord |
| 1040 |
} |
| 1041 |
} |
| 1042 |
|
| 1043 |
/* -------------------------------------------------------------------------- */
|
| 1044 |
|
| 1045 |
static void Float32_To_Int16_DitherClip( |
| 1046 |
void *destinationBuffer, signed int destinationStride, |
| 1047 |
void *sourceBuffer, signed int sourceStride, |
| 1048 |
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator ) |
| 1049 |
{
|
| 1050 |
/*
|
| 1051 |
float *src = (float*)sourceBuffer;
|
| 1052 |
signed short *dest = (signed short*)destinationBuffer;
|
| 1053 |
(void)ditherGenerator; // unused parameter
|
| 1054 |
|
| 1055 |
while( count-- )
|
| 1056 |
{
|
| 1057 |
|
| 1058 |
float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
|
| 1059 |
// use smaller scaler to prevent overflow when we add the dither
|
| 1060 |
float dithered = (*src * (32766.0f)) + dither;
|
| 1061 |
signed long samp = (signed long) dithered;
|
| 1062 |
PA_CLIP_( samp, -0x8000, 0x7FFF );
|
| 1063 |
*dest = (signed short) samp;
|
| 1064 |
|
| 1065 |
src += sourceStride;
|
| 1066 |
dest += destinationStride;
|
| 1067 |
}
|
| 1068 |
*/
|
| 1069 |
|
| 1070 |
short savedFpuControlWord;
|
| 1071 |
|
| 1072 |
// spill storage:
|
| 1073 |
signed long sourceByteStride; |
| 1074 |
signed long highpassedDither; |
| 1075 |
|
| 1076 |
// dither state:
|
| 1077 |
unsigned long ditherPrevious = ditherGenerator->previous; |
| 1078 |
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1; |
| 1079 |
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2; |
| 1080 |
|
| 1081 |
__asm{
|
| 1082 |
// esi -> source ptr
|
| 1083 |
// eax -> source byte stride
|
| 1084 |
// edi -> destination ptr
|
| 1085 |
// ebx -> destination byte stride
|
| 1086 |
// ecx -> source end ptr
|
| 1087 |
// edx -> temp
|
| 1088 |
|
| 1089 |
mov esi, sourceBuffer |
| 1090 |
|
| 1091 |
mov edx, 4 // sizeof float32 |
| 1092 |
mov eax, sourceStride |
| 1093 |
imul eax, edx // source byte stride
|
| 1094 |
|
| 1095 |
mov ecx, count |
| 1096 |
imul ecx, eax |
| 1097 |
add ecx, esi // source end ptr = count * source byte stride + source ptr
|
| 1098 |
|
| 1099 |
mov edi, destinationBuffer |
| 1100 |
|
| 1101 |
mov edx, 2 // sizeof int16 |
| 1102 |
mov ebx, destinationStride |
| 1103 |
imul ebx, edx // destination byte stride
|
| 1104 |
|
| 1105 |
fwait |
| 1106 |
fstcw savedFpuControlWord |
| 1107 |
fldcw fpuControlWord_ |
| 1108 |
|
| 1109 |
fld ditheredInt16Scaler_ // stack: int scaler
|
| 1110 |
|
| 1111 |
Float32_To_Int16_DitherClip_loop: |
| 1112 |
|
| 1113 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 1114 |
|
| 1115 |
and edx, 0x7FFFFFFF // mask off sign |
| 1116 |
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0 |
| 1117 |
|
| 1118 |
jg Float32_To_Int16_DitherClip_clamp |
| 1119 |
|
| 1120 |
// load unscaled value into st(0)
|
| 1121 |
fld dword ptr [esi] // stack: value, int scaler
|
| 1122 |
add esi, eax // increment source ptr
|
| 1123 |
//lea esi, [esi+eax]
|
| 1124 |
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler |
| 1125 |
|
| 1126 |
/*
|
| 1127 |
// call PaUtil_GenerateFloatTriangularDither with C calling convention
|
| 1128 |
mov sourceByteStride, eax // save eax
|
| 1129 |
mov sourceEnd, ecx // save ecx
|
| 1130 |
push ditherGenerator // pass ditherGenerator parameter on stack
|
| 1131 |
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
|
| 1132 |
pop edx // clear parameter off stack
|
| 1133 |
mov ecx, sourceEnd // restore ecx
|
| 1134 |
mov eax, sourceByteStride // restore eax
|
| 1135 |
*/
|
| 1136 |
|
| 1137 |
// generate dither
|
| 1138 |
mov sourceByteStride, eax // save eax
|
| 1139 |
mov edx, 196314165
|
| 1140 |
mov eax, ditherRandSeed1 |
| 1141 |
mul edx // eax:edx = eax * 196314165
|
| 1142 |
//add eax, 907633515
|
| 1143 |
lea eax, [eax+907633515]
|
| 1144 |
mov ditherRandSeed1, eax |
| 1145 |
mov edx, 196314165
|
| 1146 |
mov eax, ditherRandSeed2 |
| 1147 |
mul edx // eax:edx = eax * 196314165
|
| 1148 |
//add eax, 907633515
|
| 1149 |
lea eax, [eax+907633515]
|
| 1150 |
mov edx, ditherRandSeed1 |
| 1151 |
shr edx, PA_DITHER_SHIFT_ |
| 1152 |
mov ditherRandSeed2, eax |
| 1153 |
shr eax, PA_DITHER_SHIFT_ |
| 1154 |
//add eax, edx // eax -> current
|
| 1155 |
lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
|
| 1156 |
mov edx, ditherPrevious |
| 1157 |
neg edx |
| 1158 |
lea edx, [eax+edx] // highpass = current - previous
|
| 1159 |
mov highpassedDither, edx |
| 1160 |
mov ditherPrevious, eax // previous = current
|
| 1161 |
mov eax, sourceByteStride // restore eax
|
| 1162 |
fild highpassedDither |
| 1163 |
fmul const_float_dither_scale_ |
| 1164 |
// end generate dither, dither signal in st(0)
|
| 1165 |
|
| 1166 |
faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler |
| 1167 |
fistp word ptr [edi] // store scaled int into dest, stack: int scaler
|
| 1168 |
jmp Float32_To_Int16_DitherClip_stored |
| 1169 |
|
| 1170 |
Float32_To_Int16_DitherClip_clamp: |
| 1171 |
mov edx, dword ptr [esi] // load floating point value into integer register
|
| 1172 |
shr edx, 31 // move sign bit into bit 0 |
| 1173 |
add esi, eax // increment source ptr
|
| 1174 |
//lea esi, [esi+eax]
|
| 1175 |
add dx, 0x7FFF // convert to maximum range integers |
| 1176 |
mov word ptr [edi], dx // store clamped into into dest
|
| 1177 |
|
| 1178 |
Float32_To_Int16_DitherClip_stored: |
| 1179 |
|
| 1180 |
add edi, ebx // increment destination ptr
|
| 1181 |
//lea edi, [edi+ebx]
|
| 1182 |
|
| 1183 |
cmp esi, ecx // has src ptr reached end?
|
| 1184 |
jne Float32_To_Int16_DitherClip_loop |
| 1185 |
|
| 1186 |
ffree st(0)
|
| 1187 |
fincstp |
| 1188 |
|
| 1189 |
fwait |
| 1190 |
fnclex |
| 1191 |
fldcw savedFpuControlWord |
| 1192 |
} |
| 1193 |
|
| 1194 |
ditherGenerator->previous = ditherPrevious; |
| 1195 |
ditherGenerator->randSeed1 = ditherRandSeed1; |
| 1196 |
ditherGenerator->randSeed2 = ditherRandSeed2; |
| 1197 |
} |
| 1198 |
|
| 1199 |
/* -------------------------------------------------------------------------- */
|
| 1200 |
|
| 1201 |
void PaUtil_InitializeX86PlainConverters( void ) |
| 1202 |
{
|
| 1203 |
paConverters.Float32_To_Int32 = Float32_To_Int32; |
| 1204 |
paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip; |
| 1205 |
paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip; |
| 1206 |
|
| 1207 |
paConverters.Float32_To_Int24 = Float32_To_Int24; |
| 1208 |
paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip; |
| 1209 |
paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip; |
| 1210 |
|
| 1211 |
paConverters.Float32_To_Int16 = Float32_To_Int16; |
| 1212 |
paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip; |
| 1213 |
paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip; |
| 1214 |
} |
| 1215 |
|
| 1216 |
#endif
|
| 1217 |
|
| 1218 |
/* -------------------------------------------------------------------------- */
|