To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

The primary repository for this project is hosted at https://github.com/sonic-visualiser/sv-dependency-builds .
This repository is a read-only copy which is updated automatically every hour.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / src / portaudio_20161030_catalina_patch / src / os / win / pa_x86_plain_converters.c @ 164:9fa11135915a

History | View | Annotate | Download (39.1 KB)

1
/*
2
 * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
3
 * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining
6
 * a copy of this software and associated documentation files
7
 * (the "Software"), to deal in the Software without restriction,
8
 * including without limitation the rights to use, copy, modify, merge,
9
 * publish, distribute, sublicense, and/or sell copies of the Software,
10
 * and to permit persons to whom the Software is furnished to do so,
11
 * subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be
14
 * included in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24

    
25
/*
26
 * The text above constitutes the entire PortAudio license; however, 
27
 * the PortAudio community also makes the following non-binding requests:
28
 *
29
 * Any person wishing to distribute modifications to the Software is
30
 * requested to send the modifications to the original developer so that
31
 * they can be incorporated into the canonical version. It is also 
32
 * requested that these non-binding requests be included along with the 
33
 * license above.
34
 */
35

    
36
/** @file
37
 @ingroup win_src
38
*/
39

    
40
#include "pa_x86_plain_converters.h"
41

    
42
#include "pa_converters.h"
43
#include "pa_dither.h"
44

    
45
/*
46
    the main reason these versions are faster than the equivalent C versions
47
    is that float -> int casting is expensive in C on x86 because the rounding
48
    mode needs to be changed for every cast. these versions only set
49
    the rounding mode once outside the loop.
50

51
    small additional speed gains are made by the way that clamping is
52
    implemented.
53

54
TODO:
55
    o- inline dither code
56
    o- implement Dither only (no-clip) versions
57
    o- implement int8 and uint8 versions
58
    o- test thouroughly
59

60
    o- the packed 24 bit functions could benefit from unrolling and avoiding
61
        byte and word sized register access.
62
*/
63

    
64
/* -------------------------------------------------------------------------- */
65

    
66
/*
67
#define PA_CLIP_( val, min, max )\
68
    { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
69
*/
70

    
71
/*
72
    the following notes were used to determine whether a floating point
73
    value should be saturated (ie >1 or <-1) by loading it into an integer
74
    register. these should be rewritten so that they make sense.
75

76
    an ieee floating point value
77

78
    1.xxxxxxxxxxxxxxxxxxxx?
79

80

81
    is less than  or equal to 1 and greater than or equal to -1 either:
82

83
        if the mantissa is 0 and the unbiased exponent is 0
84

85
        OR
86

87
        if the unbiased exponent < 0
88

89
    this translates to:
90

91
        if the mantissa is 0 and the biased exponent is 7F
92

93
        or
94

95
        if the biased exponent is less than 7F
96

97

98
    therefore the value is greater than 1 or less than -1 if
99

100
        the mantissa is not 0 and the biased exponent is 7F
101

102
        or
103

104
        if the biased exponent is greater than 7F
105

106

107
    in other words, if we mask out the sign bit, the value is
108
    greater than 1 or less than -1 if its integer representation is greater than:
109

110
    0 01111111 0000 0000 0000 0000 0000 000
111

112
    0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
113
*/
114

    
115
#if defined(_WIN64) || defined(_WIN32_WCE)
116

    
117
/*
118
        -EMT64/AMD64 uses different asm
119
        -VC2005 doesnt allow _WIN64 with inline assembly either!
120
 */
121
void PaUtil_InitializeX86PlainConverters( void )
122
{
123
}
124

    
125
#else
126

    
127
/* -------------------------------------------------------------------------- */
128

    
129
static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
130
static const double int32Scaler_ = 0x7FFFFFFF;
131
static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
132
static const double int24Scaler_ = 0x7FFFFF;
133
static const double ditheredInt24Scaler_ = 0x7FFFFE;
134
static const double int16Scaler_ = 0x7FFF;
135
static const double ditheredInt16Scaler_ = 0x7FFE;
136

    
137
#define PA_DITHER_BITS_   (15)
138
/* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
139
#define PA_FLOAT_DITHER_SCALE_  (1.0F / ((1<<PA_DITHER_BITS_)-1))
140
static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
141
#define PA_DITHER_SHIFT_  ((32 - PA_DITHER_BITS_) + 1)
142

    
143
/* -------------------------------------------------------------------------- */
144

    
145
static void Float32_To_Int32(
146
    void *destinationBuffer, signed int destinationStride,
147
    void *sourceBuffer, signed int sourceStride,
148
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
149
{
150
/*
151
    float *src = (float*)sourceBuffer;
152
    signed long *dest =  (signed long*)destinationBuffer;
153
    (void)ditherGenerator; // unused parameter
154

155
    while( count-- )
156
    {
157
        // REVIEW
158
        double scaled = *src * 0x7FFFFFFF;
159
        *dest = (signed long) scaled;
160

161
        src += sourceStride;
162
        dest += destinationStride;
163
    }
164
*/
165

    
166
    short savedFpuControlWord;
167

    
168
    (void) ditherGenerator; /* unused parameter */
169

    
170

    
171
    __asm{
172
        // esi -> source ptr
173
        // eax -> source byte stride
174
        // edi -> destination ptr
175
        // ebx -> destination byte stride
176
        // ecx -> source end ptr
177
        // edx -> temp
178

    
179
        mov     esi, sourceBuffer
180

    
181
        mov     edx, 4                  // sizeof float32 and int32
182
        mov     eax, sourceStride
183
        imul    eax, edx
184

    
185
        mov     ecx, count
186
        imul    ecx, eax
187
        add     ecx, esi
188
    
189
        mov     edi, destinationBuffer
190
        
191
        mov     ebx, destinationStride
192
        imul    ebx, edx
193

    
194
        fwait
195
        fstcw   savedFpuControlWord
196
        fldcw   fpuControlWord_
197

    
198
        fld     int32Scaler_             // stack:  (int)0x7FFFFFFF
199

    
200
    Float32_To_Int32_loop:
201

    
202
        // load unscaled value into st(0)
203
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFFFF
204
        add     esi, eax                // increment source ptr
205
        //lea     esi, [esi+eax]
206
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFFFF, (int)0x7FFFFFFF
207
        /*
208
            note: we could store to a temporary qword here which would cause
209
            wraparound distortion instead of int indefinite 0x10. that would
210
            be more work, and given that not enabling clipping is only advisable
211
            when you know that your signal isn't going to clip it isn't worth it.
212
        */
213
        fistp   dword ptr [edi]         // pop st(0) into dest, stack:  (int)0x7FFFFFFF
214

    
215
        add     edi, ebx                // increment destination ptr
216
        //lea     edi, [edi+ebx]
217

    
218
        cmp     esi, ecx                // has src ptr reached end?
219
        jne     Float32_To_Int32_loop
220

    
221
        ffree   st(0)
222
        fincstp
223

    
224
        fwait
225
        fnclex
226
        fldcw   savedFpuControlWord
227
    }
228
}
229

    
230
/* -------------------------------------------------------------------------- */
231

    
232
static void Float32_To_Int32_Clip(
233
    void *destinationBuffer, signed int destinationStride,
234
    void *sourceBuffer, signed int sourceStride,
235
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
236
{
237
/*
238
    float *src = (float*)sourceBuffer;
239
    signed long *dest =  (signed long*)destinationBuffer;
240
    (void) ditherGenerator; // unused parameter
241

242
    while( count-- )
243
    {
244
        // REVIEW
245
        double scaled = *src * 0x7FFFFFFF;
246
        PA_CLIP_( scaled, -2147483648., 2147483647.  );
247
        *dest = (signed long) scaled;
248

249
        src += sourceStride;
250
        dest += destinationStride;
251
    }
252
*/
253

    
254
    short savedFpuControlWord;
255

    
256
    (void) ditherGenerator; /* unused parameter */
257

    
258
    __asm{
259
        // esi -> source ptr
260
        // eax -> source byte stride
261
        // edi -> destination ptr
262
        // ebx -> destination byte stride
263
        // ecx -> source end ptr
264
        // edx -> temp
265

    
266
        mov     esi, sourceBuffer
267

    
268
        mov     edx, 4                  // sizeof float32 and int32
269
        mov     eax, sourceStride
270
        imul    eax, edx
271

    
272
        mov     ecx, count
273
        imul    ecx, eax
274
        add     ecx, esi
275
    
276
        mov     edi, destinationBuffer
277
        
278
        mov     ebx, destinationStride
279
        imul    ebx, edx
280

    
281
        fwait
282
        fstcw   savedFpuControlWord
283
        fldcw   fpuControlWord_
284

    
285
        fld     int32Scaler_             // stack:  (int)0x7FFFFFFF
286

    
287
    Float32_To_Int32_Clip_loop:
288

    
289
        mov     edx, dword ptr [esi]    // load floating point value into integer register
290

    
291
        and     edx, 0x7FFFFFFF         // mask off sign
292
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
293

    
294
        jg      Float32_To_Int32_Clip_clamp
295

    
296
        // load unscaled value into st(0)
297
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFFFF
298
        add     esi, eax                // increment source ptr
299
        //lea     esi, [esi+eax]
300
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFFFF, (int)0x7FFFFFFF
301
        fistp   dword ptr [edi]         // pop st(0) into dest, stack:  (int)0x7FFFFFFF
302
        jmp     Float32_To_Int32_Clip_stored
303
    
304
    Float32_To_Int32_Clip_clamp:
305
        mov     edx, dword ptr [esi]    // load floating point value into integer register
306
        shr     edx, 31                 // move sign bit into bit 0
307
        add     esi, eax                // increment source ptr
308
        //lea     esi, [esi+eax]
309
        add     edx, 0x7FFFFFFF         // convert to maximum range integers
310
        mov     dword ptr [edi], edx
311

    
312
    Float32_To_Int32_Clip_stored:
313

    
314
        //add     edi, ebx                // increment destination ptr
315
        lea     edi, [edi+ebx]
316

    
317
        cmp     esi, ecx                // has src ptr reached end?
318
        jne     Float32_To_Int32_Clip_loop
319

    
320
        ffree   st(0)
321
        fincstp
322

    
323
        fwait
324
        fnclex
325
        fldcw   savedFpuControlWord
326
    }
327
}
328

    
329
/* -------------------------------------------------------------------------- */
330

    
331
static void Float32_To_Int32_DitherClip(
332
    void *destinationBuffer, signed int destinationStride,
333
    void *sourceBuffer, signed int sourceStride,
334
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
335
{
336
    /*
337
    float *src = (float*)sourceBuffer;
338
    signed long *dest =  (signed long*)destinationBuffer;
339

340
    while( count-- )
341
    {
342
        // REVIEW
343
        double dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
344
        // use smaller scaler to prevent overflow when we add the dither
345
        double dithered = ((double)*src * (2147483646.0)) + dither;
346
        PA_CLIP_( dithered, -2147483648., 2147483647.  );
347
        *dest = (signed long) dithered;
348

349

350
        src += sourceStride;
351
        dest += destinationStride;
352
    }
353
    */
354

    
355
    short savedFpuControlWord;
356

    
357
    // spill storage:
358
    signed long sourceByteStride;
359
    signed long highpassedDither;
360

    
361
    // dither state:
362
    unsigned long ditherPrevious = ditherGenerator->previous;
363
    unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
364
    unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
365
                    
366
    __asm{
367
        // esi -> source ptr
368
        // eax -> source byte stride
369
        // edi -> destination ptr
370
        // ebx -> destination byte stride
371
        // ecx -> source end ptr
372
        // edx -> temp
373

    
374
        mov     esi, sourceBuffer
375

    
376
        mov     edx, 4                  // sizeof float32 and int32
377
        mov     eax, sourceStride
378
        imul    eax, edx
379

    
380
        mov     ecx, count
381
        imul    ecx, eax
382
        add     ecx, esi
383
    
384
        mov     edi, destinationBuffer
385
        
386
        mov     ebx, destinationStride
387
        imul    ebx, edx
388

    
389
        fwait
390
        fstcw   savedFpuControlWord
391
        fldcw   fpuControlWord_
392

    
393
        fld     ditheredInt32Scaler_    // stack:  int scaler
394

    
395
    Float32_To_Int32_DitherClip_loop:
396

    
397
        mov     edx, dword ptr [esi]    // load floating point value into integer register
398

    
399
        and     edx, 0x7FFFFFFF         // mask off sign
400
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
401

    
402
        jg      Float32_To_Int32_DitherClip_clamp
403

    
404
        // load unscaled value into st(0)
405
        fld     dword ptr [esi]         // stack:  value, int scaler
406
        add     esi, eax                // increment source ptr
407
        //lea     esi, [esi+eax]
408
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
409

    
410
        /*
411
        // call PaUtil_GenerateFloatTriangularDither with C calling convention
412
        mov     sourceByteStride, eax   // save eax
413
        mov     sourceEnd, ecx          // save ecx
414
        push    ditherGenerator         // pass ditherGenerator parameter on stack
415
            call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
416
            pop     edx                     // clear parameter off stack
417
        mov     ecx, sourceEnd          // restore ecx
418
        mov     eax, sourceByteStride   // restore eax
419
        */
420

    
421
    // generate dither
422
        mov     sourceByteStride, eax   // save eax
423
        mov     edx, 196314165
424
        mov     eax, ditherRandSeed1
425
        mul     edx                     // eax:edx = eax * 196314165
426
        //add     eax, 907633515
427
        lea     eax, [eax+907633515]
428
        mov     ditherRandSeed1, eax
429
        mov     edx, 196314165
430
        mov     eax, ditherRandSeed2
431
        mul     edx                     // eax:edx = eax * 196314165
432
        //add     eax, 907633515
433
        lea     eax, [eax+907633515]
434
        mov     edx, ditherRandSeed1
435
        shr     edx, PA_DITHER_SHIFT_
436
        mov     ditherRandSeed2, eax
437
        shr     eax, PA_DITHER_SHIFT_
438
        //add     eax, edx                // eax -> current
439
        lea     eax, [eax+edx]
440
        mov     edx, ditherPrevious
441
        neg     edx
442
        lea     edx, [eax+edx]          // highpass = current - previous
443
        mov     highpassedDither, edx
444
        mov     ditherPrevious, eax     // previous = current
445
        mov     eax, sourceByteStride   // restore eax
446
        fild    highpassedDither
447
        fmul    const_float_dither_scale_
448
    // end generate dither, dither signal in st(0)
449
    
450
        faddp   st(1), st(0)            // stack: dither + value*(int scaler), int scaler
451
        fistp   dword ptr [edi]         // pop st(0) into dest, stack:  int scaler
452
        jmp     Float32_To_Int32_DitherClip_stored
453
    
454
    Float32_To_Int32_DitherClip_clamp:
455
        mov     edx, dword ptr [esi]    // load floating point value into integer register
456
        shr     edx, 31                 // move sign bit into bit 0
457
        add     esi, eax                // increment source ptr
458
        //lea     esi, [esi+eax]
459
        add     edx, 0x7FFFFFFF         // convert to maximum range integers
460
        mov     dword ptr [edi], edx
461

    
462
    Float32_To_Int32_DitherClip_stored:
463

    
464
        //add     edi, ebx              // increment destination ptr
465
        lea     edi, [edi+ebx]
466

    
467
        cmp     esi, ecx                // has src ptr reached end?
468
        jne     Float32_To_Int32_DitherClip_loop
469

    
470
        ffree   st(0)
471
        fincstp
472

    
473
        fwait
474
        fnclex
475
        fldcw   savedFpuControlWord
476
    }
477

    
478
    ditherGenerator->previous = ditherPrevious;
479
    ditherGenerator->randSeed1 = ditherRandSeed1;
480
    ditherGenerator->randSeed2 = ditherRandSeed2;
481
}
482

    
483
/* -------------------------------------------------------------------------- */
484

    
485
static void Float32_To_Int24(
486
    void *destinationBuffer, signed int destinationStride,
487
    void *sourceBuffer, signed int sourceStride,
488
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
489
{
490
/*
491
    float *src = (float*)sourceBuffer;
492
    unsigned char *dest = (unsigned char*)destinationBuffer;
493
    signed long temp;
494

495
    (void) ditherGenerator; // unused parameter
496
    
497
    while( count-- )
498
    {
499
        // convert to 32 bit and drop the low 8 bits
500
        double scaled = *src * 0x7FFFFFFF;
501
        temp = (signed long) scaled;
502

503
        dest[0] = (unsigned char)(temp >> 8);
504
        dest[1] = (unsigned char)(temp >> 16);
505
        dest[2] = (unsigned char)(temp >> 24);
506

507
        src += sourceStride;
508
        dest += destinationStride * 3;
509
    }
510
*/
511

    
512
    short savedFpuControlWord;
513
    
514
    signed long tempInt32;
515

    
516
    (void) ditherGenerator; /* unused parameter */
517
                 
518
    __asm{
519
        // esi -> source ptr
520
        // eax -> source byte stride
521
        // edi -> destination ptr
522
        // ebx -> destination byte stride
523
        // ecx -> source end ptr
524
        // edx -> temp
525

    
526
        mov     esi, sourceBuffer
527

    
528
        mov     edx, 4                  // sizeof float32
529
        mov     eax, sourceStride
530
        imul    eax, edx
531

    
532
        mov     ecx, count
533
        imul    ecx, eax
534
        add     ecx, esi
535

    
536
        mov     edi, destinationBuffer
537

    
538
        mov     edx, 3                  // sizeof int24
539
        mov     ebx, destinationStride
540
        imul    ebx, edx
541

    
542
        fwait
543
        fstcw   savedFpuControlWord
544
        fldcw   fpuControlWord_
545

    
546
        fld     int24Scaler_             // stack:  (int)0x7FFFFF
547

    
548
    Float32_To_Int24_loop:
549

    
550
        // load unscaled value into st(0)
551
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFF
552
        add     esi, eax                // increment source ptr
553
        //lea     esi, [esi+eax]
554
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFF, (int)0x7FFFFF
555
        fistp   tempInt32               // pop st(0) into tempInt32, stack:  (int)0x7FFFFF
556
        mov     edx, tempInt32
557

    
558
        mov     byte ptr [edi], DL
559
        shr     edx, 8
560
        //mov     byte ptr [edi+1], DL
561
        //mov     byte ptr [edi+2], DH
562
        mov     word ptr [edi+1], DX
563

    
564
        //add     edi, ebx                // increment destination ptr
565
        lea     edi, [edi+ebx]
566

    
567
        cmp     esi, ecx                // has src ptr reached end?
568
        jne     Float32_To_Int24_loop
569

    
570
        ffree   st(0)
571
        fincstp
572

    
573
        fwait
574
        fnclex
575
        fldcw   savedFpuControlWord
576
    }
577
}
578

    
579
/* -------------------------------------------------------------------------- */
580

    
581
static void Float32_To_Int24_Clip(
582
    void *destinationBuffer, signed int destinationStride,
583
    void *sourceBuffer, signed int sourceStride,
584
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
585
{
586
/*
587
    float *src = (float*)sourceBuffer;
588
    unsigned char *dest = (unsigned char*)destinationBuffer;
589
    signed long temp;
590

591
    (void) ditherGenerator; // unused parameter
592
    
593
    while( count-- )
594
    {
595
        // convert to 32 bit and drop the low 8 bits
596
        double scaled = *src * 0x7FFFFFFF;
597
        PA_CLIP_( scaled, -2147483648., 2147483647.  );
598
        temp = (signed long) scaled;
599

600
        dest[0] = (unsigned char)(temp >> 8);
601
        dest[1] = (unsigned char)(temp >> 16);
602
        dest[2] = (unsigned char)(temp >> 24);
603

604
        src += sourceStride;
605
        dest += destinationStride * 3;
606
    }
607
*/
608

    
609
    short savedFpuControlWord;
610
    
611
    signed long tempInt32;
612

    
613
    (void) ditherGenerator; /* unused parameter */
614
                 
615
    __asm{
616
        // esi -> source ptr
617
        // eax -> source byte stride
618
        // edi -> destination ptr
619
        // ebx -> destination byte stride
620
        // ecx -> source end ptr
621
        // edx -> temp
622

    
623
        mov     esi, sourceBuffer
624

    
625
        mov     edx, 4                  // sizeof float32
626
        mov     eax, sourceStride
627
        imul    eax, edx
628

    
629
        mov     ecx, count
630
        imul    ecx, eax
631
        add     ecx, esi
632

    
633
        mov     edi, destinationBuffer
634

    
635
        mov     edx, 3                  // sizeof int24
636
        mov     ebx, destinationStride
637
        imul    ebx, edx
638

    
639
        fwait
640
        fstcw   savedFpuControlWord
641
        fldcw   fpuControlWord_
642

    
643
        fld     int24Scaler_             // stack:  (int)0x7FFFFF
644

    
645
    Float32_To_Int24_Clip_loop:
646

    
647
        mov     edx, dword ptr [esi]    // load floating point value into integer register
648

    
649
        and     edx, 0x7FFFFFFF         // mask off sign
650
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
651

    
652
        jg      Float32_To_Int24_Clip_clamp
653

    
654
        // load unscaled value into st(0)
655
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFFFF
656
        add     esi, eax                // increment source ptr
657
        //lea     esi, [esi+eax]
658
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFFFF, (int)0x7FFFFF
659
        fistp   tempInt32               // pop st(0) into tempInt32, stack:  (int)0x7FFFFF
660
        mov     edx, tempInt32
661
        jmp     Float32_To_Int24_Clip_store
662
    
663
    Float32_To_Int24_Clip_clamp:
664
        mov     edx, dword ptr [esi]    // load floating point value into integer register
665
        shr     edx, 31                 // move sign bit into bit 0
666
        add     esi, eax                // increment source ptr
667
        //lea     esi, [esi+eax]
668
        add     edx, 0x7FFFFF           // convert to maximum range integers
669

    
670
    Float32_To_Int24_Clip_store:
671

    
672
        mov     byte ptr [edi], DL
673
        shr     edx, 8
674
        //mov     byte ptr [edi+1], DL
675
        //mov     byte ptr [edi+2], DH
676
        mov     word ptr [edi+1], DX
677

    
678
        //add     edi, ebx                // increment destination ptr
679
        lea     edi, [edi+ebx]
680

    
681
        cmp     esi, ecx                // has src ptr reached end?
682
        jne     Float32_To_Int24_Clip_loop
683

    
684
        ffree   st(0)
685
        fincstp
686

    
687
        fwait
688
        fnclex
689
        fldcw   savedFpuControlWord
690
    }
691
}
692

    
693
/* -------------------------------------------------------------------------- */
694

    
695
static void Float32_To_Int24_DitherClip(
696
    void *destinationBuffer, signed int destinationStride,
697
    void *sourceBuffer, signed int sourceStride,
698
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
699
{
700
/*
701
    float *src = (float*)sourceBuffer;
702
    unsigned char *dest = (unsigned char*)destinationBuffer;
703
    signed long temp;
704
    
705
    while( count-- )
706
    {
707
        // convert to 32 bit and drop the low 8 bits
708

709
        // FIXME: the dither amplitude here appears to be too small by 8 bits
710
        double dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
711
        // use smaller scaler to prevent overflow when we add the dither
712
        double dithered = ((double)*src * (2147483646.0)) + dither;
713
        PA_CLIP_( dithered, -2147483648., 2147483647.  );
714
        
715
        temp = (signed long) dithered;
716

717
        dest[0] = (unsigned char)(temp >> 8);
718
        dest[1] = (unsigned char)(temp >> 16);
719
        dest[2] = (unsigned char)(temp >> 24);
720

721
        src += sourceStride;
722
        dest += destinationStride * 3;
723
    }
724
*/
725

    
726
    short savedFpuControlWord;
727

    
728
    // spill storage:
729
    signed long sourceByteStride;
730
    signed long highpassedDither;
731

    
732
    // dither state:
733
    unsigned long ditherPrevious = ditherGenerator->previous;
734
    unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
735
    unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
736
    
737
    signed long tempInt32;
738
                 
739
    __asm{
740
        // esi -> source ptr
741
        // eax -> source byte stride
742
        // edi -> destination ptr
743
        // ebx -> destination byte stride
744
        // ecx -> source end ptr
745
        // edx -> temp
746

    
747
        mov     esi, sourceBuffer
748

    
749
        mov     edx, 4                  // sizeof float32
750
        mov     eax, sourceStride
751
        imul    eax, edx
752

    
753
        mov     ecx, count
754
        imul    ecx, eax
755
        add     ecx, esi
756

    
757
        mov     edi, destinationBuffer
758

    
759
        mov     edx, 3                  // sizeof int24
760
        mov     ebx, destinationStride
761
        imul    ebx, edx
762

    
763
        fwait
764
        fstcw   savedFpuControlWord
765
        fldcw   fpuControlWord_
766

    
767
        fld     ditheredInt24Scaler_    // stack:  int scaler
768

    
769
    Float32_To_Int24_DitherClip_loop:
770

    
771
        mov     edx, dword ptr [esi]    // load floating point value into integer register
772

    
773
        and     edx, 0x7FFFFFFF         // mask off sign
774
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
775

    
776
        jg      Float32_To_Int24_DitherClip_clamp
777

    
778
        // load unscaled value into st(0)
779
        fld     dword ptr [esi]         // stack:  value, int scaler
780
        add     esi, eax                // increment source ptr
781
        //lea     esi, [esi+eax]
782
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
783

    
784
    /*
785
        // call PaUtil_GenerateFloatTriangularDither with C calling convention
786
        mov     sourceByteStride, eax   // save eax
787
        mov     sourceEnd, ecx          // save ecx
788
        push    ditherGenerator         // pass ditherGenerator parameter on stack
789
            call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
790
            pop     edx                     // clear parameter off stack
791
        mov     ecx, sourceEnd          // restore ecx
792
        mov     eax, sourceByteStride   // restore eax
793
    */
794
    
795
    // generate dither
796
        mov     sourceByteStride, eax   // save eax
797
        mov     edx, 196314165
798
        mov     eax, ditherRandSeed1
799
        mul     edx                     // eax:edx = eax * 196314165
800
        //add     eax, 907633515
801
        lea     eax, [eax+907633515]
802
        mov     ditherRandSeed1, eax
803
        mov     edx, 196314165
804
        mov     eax, ditherRandSeed2
805
        mul     edx                     // eax:edx = eax * 196314165
806
        //add     eax, 907633515
807
        lea     eax, [eax+907633515]
808
        mov     edx, ditherRandSeed1
809
        shr     edx, PA_DITHER_SHIFT_
810
        mov     ditherRandSeed2, eax
811
        shr     eax, PA_DITHER_SHIFT_
812
        //add     eax, edx                // eax -> current
813
        lea     eax, [eax+edx]
814
        mov     edx, ditherPrevious
815
        neg     edx
816
        lea     edx, [eax+edx]          // highpass = current - previous
817
        mov     highpassedDither, edx
818
        mov     ditherPrevious, eax     // previous = current
819
        mov     eax, sourceByteStride   // restore eax
820
        fild    highpassedDither
821
        fmul    const_float_dither_scale_
822
    // end generate dither, dither signal in st(0)
823

    
824
        faddp   st(1), st(0)            // stack: dither * value*(int scaler), int scaler
825
        fistp   tempInt32               // pop st(0) into tempInt32, stack:  int scaler
826
        mov     edx, tempInt32
827
        jmp     Float32_To_Int24_DitherClip_store
828
    
829
    Float32_To_Int24_DitherClip_clamp:
830
        mov     edx, dword ptr [esi]    // load floating point value into integer register
831
        shr     edx, 31                 // move sign bit into bit 0
832
        add     esi, eax                // increment source ptr
833
        //lea     esi, [esi+eax]
834
        add     edx, 0x7FFFFF           // convert to maximum range integers
835

    
836
    Float32_To_Int24_DitherClip_store:
837

    
838
        mov     byte ptr [edi], DL
839
        shr     edx, 8
840
        //mov     byte ptr [edi+1], DL
841
        //mov     byte ptr [edi+2], DH
842
        mov     word ptr [edi+1], DX
843

    
844
        //add     edi, ebx                // increment destination ptr
845
        lea     edi, [edi+ebx]
846

    
847
        cmp     esi, ecx                // has src ptr reached end?
848
        jne     Float32_To_Int24_DitherClip_loop
849

    
850
        ffree   st(0)
851
        fincstp
852

    
853
        fwait
854
        fnclex
855
        fldcw   savedFpuControlWord
856
    }
857

    
858
    ditherGenerator->previous = ditherPrevious;
859
    ditherGenerator->randSeed1 = ditherRandSeed1;
860
    ditherGenerator->randSeed2 = ditherRandSeed2;
861
}
862

    
863
/* -------------------------------------------------------------------------- */
864

    
865
static void Float32_To_Int16(
866
    void *destinationBuffer, signed int destinationStride,
867
    void *sourceBuffer, signed int sourceStride,
868
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
869
{
870
/*
871
    float *src = (float*)sourceBuffer;
872
    signed short *dest =  (signed short*)destinationBuffer;
873
    (void)ditherGenerator; // unused parameter
874

875
    while( count-- )
876
    {
877

878
        short samp = (short) (*src * (32767.0f));
879
        *dest = samp;
880

881
        src += sourceStride;
882
        dest += destinationStride;
883
    }
884
*/
885

    
886
    short savedFpuControlWord;
887
   
888
    (void) ditherGenerator; /* unused parameter */
889

    
890
    __asm{
891
        // esi -> source ptr
892
        // eax -> source byte stride
893
        // edi -> destination ptr
894
        // ebx -> destination byte stride
895
        // ecx -> source end ptr
896
        // edx -> temp
897

    
898
        mov     esi, sourceBuffer
899

    
900
        mov     edx, 4                  // sizeof float32
901
        mov     eax, sourceStride
902
        imul    eax, edx                // source byte stride
903

    
904
        mov     ecx, count
905
        imul    ecx, eax
906
        add     ecx, esi                // source end ptr = count * source byte stride + source ptr
907

    
908
        mov     edi, destinationBuffer
909

    
910
        mov     edx, 2                  // sizeof int16
911
        mov     ebx, destinationStride
912
        imul    ebx, edx                // destination byte stride
913

    
914
        fwait
915
        fstcw   savedFpuControlWord
916
        fldcw   fpuControlWord_
917

    
918
        fld     int16Scaler_            // stack:  (int)0x7FFF
919

    
920
    Float32_To_Int16_loop:
921

    
922
        // load unscaled value into st(0)
923
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFF
924
        add     esi, eax                // increment source ptr
925
        //lea     esi, [esi+eax]
926
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFF, (int)0x7FFF
927
        fistp   word ptr [edi]          // store scaled int into dest, stack:  (int)0x7FFF
928

    
929
        add     edi, ebx                // increment destination ptr
930
        //lea     edi, [edi+ebx]
931
        
932
        cmp     esi, ecx                // has src ptr reached end?
933
        jne     Float32_To_Int16_loop
934

    
935
        ffree   st(0)
936
        fincstp
937

    
938
        fwait
939
        fnclex
940
        fldcw   savedFpuControlWord
941
    }
942
}
943

    
944
/* -------------------------------------------------------------------------- */
945

    
946
static void Float32_To_Int16_Clip(
947
    void *destinationBuffer, signed int destinationStride,
948
    void *sourceBuffer, signed int sourceStride,
949
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
950
{
951
/*
952
    float *src = (float*)sourceBuffer;
953
    signed short *dest =  (signed short*)destinationBuffer;
954
    (void)ditherGenerator; // unused parameter
955

956
    while( count-- )
957
    {
958
        long samp = (signed long) (*src * (32767.0f));
959
        PA_CLIP_( samp, -0x8000, 0x7FFF );
960
        *dest = (signed short) samp;
961

962
        src += sourceStride;
963
        dest += destinationStride;
964
    }
965
*/
966

    
967
    short savedFpuControlWord;
968
   
969
    (void) ditherGenerator; /* unused parameter */
970

    
971
    __asm{
972
        // esi -> source ptr
973
        // eax -> source byte stride
974
        // edi -> destination ptr
975
        // ebx -> destination byte stride
976
        // ecx -> source end ptr
977
        // edx -> temp
978

    
979
        mov     esi, sourceBuffer
980

    
981
        mov     edx, 4                  // sizeof float32
982
        mov     eax, sourceStride
983
        imul    eax, edx                // source byte stride
984

    
985
        mov     ecx, count
986
        imul    ecx, eax
987
        add     ecx, esi                // source end ptr = count * source byte stride + source ptr
988

    
989
        mov     edi, destinationBuffer
990

    
991
        mov     edx, 2                  // sizeof int16
992
        mov     ebx, destinationStride
993
        imul    ebx, edx                // destination byte stride
994

    
995
        fwait
996
        fstcw   savedFpuControlWord
997
        fldcw   fpuControlWord_
998

    
999
        fld     int16Scaler_            // stack:  (int)0x7FFF
1000

    
1001
    Float32_To_Int16_Clip_loop:
1002

    
1003
        mov     edx, dword ptr [esi]    // load floating point value into integer register
1004

    
1005
        and     edx, 0x7FFFFFFF         // mask off sign
1006
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
1007

    
1008
        jg      Float32_To_Int16_Clip_clamp
1009

    
1010
        // load unscaled value into st(0)
1011
        fld     dword ptr [esi]         // stack:  value, (int)0x7FFF
1012
        add     esi, eax                // increment source ptr
1013
        //lea     esi, [esi+eax]
1014
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*0x7FFF, (int)0x7FFF
1015
        fistp   word ptr [edi]          // store scaled int into dest, stack:  (int)0x7FFF
1016
        jmp     Float32_To_Int16_Clip_stored
1017
    
1018
    Float32_To_Int16_Clip_clamp:
1019
        mov     edx, dword ptr [esi]    // load floating point value into integer register
1020
        shr     edx, 31                 // move sign bit into bit 0
1021
        add     esi, eax                // increment source ptr
1022
        //lea     esi, [esi+eax]
1023
        add     dx, 0x7FFF              // convert to maximum range integers
1024
        mov     word ptr [edi], dx      // store clamped into into dest
1025

    
1026
    Float32_To_Int16_Clip_stored:
1027

    
1028
        add     edi, ebx                // increment destination ptr
1029
        //lea     edi, [edi+ebx]
1030
        
1031
        cmp     esi, ecx                // has src ptr reached end?
1032
        jne     Float32_To_Int16_Clip_loop
1033

    
1034
        ffree   st(0)
1035
        fincstp
1036

    
1037
        fwait
1038
        fnclex
1039
        fldcw   savedFpuControlWord
1040
    }
1041
}
1042

    
1043
/* -------------------------------------------------------------------------- */
1044

    
1045
static void Float32_To_Int16_DitherClip(
1046
    void *destinationBuffer, signed int destinationStride,
1047
    void *sourceBuffer, signed int sourceStride,
1048
    unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
1049
{
1050
/*
1051
    float *src = (float*)sourceBuffer;
1052
    signed short *dest =  (signed short*)destinationBuffer;
1053
    (void)ditherGenerator; // unused parameter
1054

1055
    while( count-- )
1056
    {
1057

1058
        float dither  = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
1059
        // use smaller scaler to prevent overflow when we add the dither 
1060
        float dithered = (*src * (32766.0f)) + dither;
1061
        signed long samp = (signed long) dithered;
1062
        PA_CLIP_( samp, -0x8000, 0x7FFF );
1063
        *dest = (signed short) samp;
1064

1065
        src += sourceStride;
1066
        dest += destinationStride;
1067
    }
1068
*/
1069

    
1070
    short savedFpuControlWord;
1071

    
1072
    // spill storage:
1073
    signed long sourceByteStride;
1074
    signed long highpassedDither;
1075

    
1076
    // dither state:
1077
    unsigned long ditherPrevious = ditherGenerator->previous;
1078
    unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
1079
    unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
1080

    
1081
    __asm{
1082
        // esi -> source ptr
1083
        // eax -> source byte stride
1084
        // edi -> destination ptr
1085
        // ebx -> destination byte stride
1086
        // ecx -> source end ptr
1087
        // edx -> temp
1088

    
1089
        mov     esi, sourceBuffer
1090

    
1091
        mov     edx, 4                  // sizeof float32
1092
        mov     eax, sourceStride
1093
        imul    eax, edx                // source byte stride
1094

    
1095
        mov     ecx, count
1096
        imul    ecx, eax
1097
        add     ecx, esi                // source end ptr = count * source byte stride + source ptr
1098

    
1099
        mov     edi, destinationBuffer
1100

    
1101
        mov     edx, 2                  // sizeof int16
1102
        mov     ebx, destinationStride
1103
        imul    ebx, edx                // destination byte stride
1104

    
1105
        fwait
1106
        fstcw   savedFpuControlWord
1107
        fldcw   fpuControlWord_
1108

    
1109
        fld     ditheredInt16Scaler_    // stack:  int scaler
1110

    
1111
    Float32_To_Int16_DitherClip_loop:
1112

    
1113
        mov     edx, dword ptr [esi]    // load floating point value into integer register
1114

    
1115
        and     edx, 0x7FFFFFFF         // mask off sign
1116
        cmp     edx, 0x3F800000         // greater than 1.0 or less than -1.0
1117

    
1118
        jg      Float32_To_Int16_DitherClip_clamp
1119

    
1120
        // load unscaled value into st(0)
1121
        fld     dword ptr [esi]         // stack:  value, int scaler
1122
        add     esi, eax                // increment source ptr
1123
        //lea     esi, [esi+eax]
1124
        fmul    st(0), st(1)            // st(0) *= st(1), stack:  value*(int scaler), int scaler
1125

    
1126
        /*
1127
        // call PaUtil_GenerateFloatTriangularDither with C calling convention
1128
        mov     sourceByteStride, eax   // save eax
1129
        mov     sourceEnd, ecx          // save ecx
1130
        push    ditherGenerator         // pass ditherGenerator parameter on stack
1131
            call    PaUtil_GenerateFloatTriangularDither  // stack:  dither, value*(int scaler), int scaler
1132
            pop     edx                     // clear parameter off stack
1133
        mov     ecx, sourceEnd          // restore ecx
1134
        mov     eax, sourceByteStride   // restore eax
1135
        */
1136

    
1137
    // generate dither
1138
        mov     sourceByteStride, eax   // save eax
1139
        mov     edx, 196314165
1140
        mov     eax, ditherRandSeed1
1141
        mul     edx                     // eax:edx = eax * 196314165
1142
        //add     eax, 907633515
1143
        lea     eax, [eax+907633515]
1144
        mov     ditherRandSeed1, eax
1145
        mov     edx, 196314165
1146
        mov     eax, ditherRandSeed2
1147
        mul     edx                     // eax:edx = eax * 196314165
1148
        //add     eax, 907633515
1149
        lea     eax, [eax+907633515]
1150
        mov     edx, ditherRandSeed1
1151
        shr     edx, PA_DITHER_SHIFT_
1152
        mov     ditherRandSeed2, eax
1153
        shr     eax, PA_DITHER_SHIFT_
1154
        //add     eax, edx                // eax -> current
1155
        lea     eax, [eax+edx]            // current = randSeed1>>x + randSeed2>>x
1156
        mov     edx, ditherPrevious
1157
        neg     edx
1158
        lea     edx, [eax+edx]          // highpass = current - previous
1159
        mov     highpassedDither, edx
1160
        mov     ditherPrevious, eax     // previous = current
1161
        mov     eax, sourceByteStride   // restore eax
1162
        fild    highpassedDither
1163
        fmul    const_float_dither_scale_
1164
    // end generate dither, dither signal in st(0)
1165
        
1166
        faddp   st(1), st(0)            // stack: dither * value*(int scaler), int scaler
1167
        fistp   word ptr [edi]          // store scaled int into dest, stack:  int scaler
1168
        jmp     Float32_To_Int16_DitherClip_stored
1169
    
1170
    Float32_To_Int16_DitherClip_clamp:
1171
        mov     edx, dword ptr [esi]    // load floating point value into integer register
1172
        shr     edx, 31                 // move sign bit into bit 0
1173
        add     esi, eax                // increment source ptr
1174
        //lea     esi, [esi+eax]
1175
        add     dx, 0x7FFF              // convert to maximum range integers
1176
        mov     word ptr [edi], dx      // store clamped into into dest
1177

    
1178
    Float32_To_Int16_DitherClip_stored:
1179

    
1180
        add     edi, ebx                // increment destination ptr
1181
        //lea     edi, [edi+ebx]
1182
        
1183
        cmp     esi, ecx                // has src ptr reached end?
1184
        jne     Float32_To_Int16_DitherClip_loop
1185

    
1186
        ffree   st(0)
1187
        fincstp
1188

    
1189
        fwait
1190
        fnclex
1191
        fldcw   savedFpuControlWord
1192
    }
1193

    
1194
    ditherGenerator->previous = ditherPrevious;
1195
    ditherGenerator->randSeed1 = ditherRandSeed1;
1196
    ditherGenerator->randSeed2 = ditherRandSeed2;
1197
}
1198

    
1199
/* -------------------------------------------------------------------------- */
1200

    
1201
void PaUtil_InitializeX86PlainConverters( void )
1202
{
1203
    paConverters.Float32_To_Int32 = Float32_To_Int32;
1204
    paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
1205
    paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
1206

    
1207
    paConverters.Float32_To_Int24 = Float32_To_Int24;
1208
    paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
1209
    paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
1210
    
1211
    paConverters.Float32_To_Int16 = Float32_To_Int16;
1212
    paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
1213
    paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
1214
}
1215

    
1216
#endif
1217

    
1218
/* -------------------------------------------------------------------------- */