annotate src/zlib-1.2.7/contrib/masmx86/inffas32.asm @ 56:af97cad61ff0

Add updated build of PortAudio for OSX
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 03 Jan 2017 15:10:52 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
Chris@4 2 ; *
Chris@4 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
Chris@4 4 ; *
Chris@4 5 ; * Copyright (C) 1995-2003 Mark Adler
Chris@4 6 ; * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 7 ; *
Chris@4 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@4 9 ; * Please use the copyright conditions above.
Chris@4 10 ; *
Chris@4 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@4 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@4 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@4 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@4 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@4 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@4 17 ; * versions of this and inffast.S can be found at
Chris@4 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@4 19 ; *
Chris@4 20 ; * 2005 : modification by Gilles Vollant
Chris@4 21 ; */
Chris@4 22 ; For Visual C++ 4.x and higher and ML 6.x and higher
Chris@4 23 ; ml.exe is in directory \MASM611C of Win95 DDK
Chris@4 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
Chris@4 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
Chris@4 26 ;
Chris@4 27 ;
Chris@4 28 ; compile with command line option
Chris@4 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
Chris@4 30
Chris@4 31 ; if you define NO_GZIP (see inflate.h), compile with
Chris@4 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
Chris@4 33
Chris@4 34
Chris@4 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
Chris@4 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
Chris@4 37 ; in inflate_state in inflate.h)
Chris@4 38 zlib1222sup equ 8
Chris@4 39
Chris@4 40
Chris@4 41 IFDEF GUNZIP
Chris@4 42 INFLATE_MODE_TYPE equ 11
Chris@4 43 INFLATE_MODE_BAD equ 26
Chris@4 44 ELSE
Chris@4 45 IFNDEF NO_GUNZIP
Chris@4 46 INFLATE_MODE_TYPE equ 11
Chris@4 47 INFLATE_MODE_BAD equ 26
Chris@4 48 ELSE
Chris@4 49 INFLATE_MODE_TYPE equ 3
Chris@4 50 INFLATE_MODE_BAD equ 17
Chris@4 51 ENDIF
Chris@4 52 ENDIF
Chris@4 53
Chris@4 54
Chris@4 55 ; 75 "inffast.S"
Chris@4 56 ;FILE "inffast.S"
Chris@4 57
Chris@4 58 ;;;GLOBAL _inflate_fast
Chris@4 59
Chris@4 60 ;;;SECTION .text
Chris@4 61
Chris@4 62
Chris@4 63
Chris@4 64 .586p
Chris@4 65 .mmx
Chris@4 66
Chris@4 67 name inflate_fast_x86
Chris@4 68 .MODEL FLAT
Chris@4 69
Chris@4 70 _DATA segment
Chris@4 71 inflate_fast_use_mmx:
Chris@4 72 dd 1
Chris@4 73
Chris@4 74
Chris@4 75 _TEXT segment
Chris@4 76
Chris@4 77
Chris@4 78
Chris@4 79 ALIGN 4
Chris@4 80 db 'Fast decoding Code from Chris Anderson'
Chris@4 81 db 0
Chris@4 82
Chris@4 83 ALIGN 4
Chris@4 84 invalid_literal_length_code_msg:
Chris@4 85 db 'invalid literal/length code'
Chris@4 86 db 0
Chris@4 87
Chris@4 88 ALIGN 4
Chris@4 89 invalid_distance_code_msg:
Chris@4 90 db 'invalid distance code'
Chris@4 91 db 0
Chris@4 92
Chris@4 93 ALIGN 4
Chris@4 94 invalid_distance_too_far_msg:
Chris@4 95 db 'invalid distance too far back'
Chris@4 96 db 0
Chris@4 97
Chris@4 98
Chris@4 99 ALIGN 4
Chris@4 100 inflate_fast_mask:
Chris@4 101 dd 0
Chris@4 102 dd 1
Chris@4 103 dd 3
Chris@4 104 dd 7
Chris@4 105 dd 15
Chris@4 106 dd 31
Chris@4 107 dd 63
Chris@4 108 dd 127
Chris@4 109 dd 255
Chris@4 110 dd 511
Chris@4 111 dd 1023
Chris@4 112 dd 2047
Chris@4 113 dd 4095
Chris@4 114 dd 8191
Chris@4 115 dd 16383
Chris@4 116 dd 32767
Chris@4 117 dd 65535
Chris@4 118 dd 131071
Chris@4 119 dd 262143
Chris@4 120 dd 524287
Chris@4 121 dd 1048575
Chris@4 122 dd 2097151
Chris@4 123 dd 4194303
Chris@4 124 dd 8388607
Chris@4 125 dd 16777215
Chris@4 126 dd 33554431
Chris@4 127 dd 67108863
Chris@4 128 dd 134217727
Chris@4 129 dd 268435455
Chris@4 130 dd 536870911
Chris@4 131 dd 1073741823
Chris@4 132 dd 2147483647
Chris@4 133 dd 4294967295
Chris@4 134
Chris@4 135
Chris@4 136 mode_state equ 0 ;/* state->mode */
Chris@4 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
Chris@4 138 write_state equ (36+4+zlib1222sup) ;/* state->write */
Chris@4 139 window_state equ (40+4+zlib1222sup) ;/* state->window */
Chris@4 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
Chris@4 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
Chris@4 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
Chris@4 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
Chris@4 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
Chris@4 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
Chris@4 146
Chris@4 147
Chris@4 148 ;;SECTION .text
Chris@4 149 ; 205 "inffast.S"
Chris@4 150 ;GLOBAL inflate_fast_use_mmx
Chris@4 151
Chris@4 152 ;SECTION .data
Chris@4 153
Chris@4 154
Chris@4 155 ; GLOBAL inflate_fast_use_mmx:object
Chris@4 156 ;.size inflate_fast_use_mmx, 4
Chris@4 157 ; 226 "inffast.S"
Chris@4 158 ;SECTION .text
Chris@4 159
Chris@4 160 ALIGN 4
Chris@4 161 _inflate_fast proc near
Chris@4 162 .FPO (16, 4, 0, 0, 1, 0)
Chris@4 163 push edi
Chris@4 164 push esi
Chris@4 165 push ebp
Chris@4 166 push ebx
Chris@4 167 pushfd
Chris@4 168 sub esp,64
Chris@4 169 cld
Chris@4 170
Chris@4 171
Chris@4 172
Chris@4 173
Chris@4 174 mov esi, [esp+88]
Chris@4 175 mov edi, [esi+28]
Chris@4 176
Chris@4 177
Chris@4 178
Chris@4 179
Chris@4 180
Chris@4 181
Chris@4 182
Chris@4 183 mov edx, [esi+4]
Chris@4 184 mov eax, [esi+0]
Chris@4 185
Chris@4 186 add edx,eax
Chris@4 187 sub edx,11
Chris@4 188
Chris@4 189 mov [esp+44],eax
Chris@4 190 mov [esp+20],edx
Chris@4 191
Chris@4 192 mov ebp, [esp+92]
Chris@4 193 mov ecx, [esi+16]
Chris@4 194 mov ebx, [esi+12]
Chris@4 195
Chris@4 196 sub ebp,ecx
Chris@4 197 neg ebp
Chris@4 198 add ebp,ebx
Chris@4 199
Chris@4 200 sub ecx,257
Chris@4 201 add ecx,ebx
Chris@4 202
Chris@4 203 mov [esp+60],ebx
Chris@4 204 mov [esp+40],ebp
Chris@4 205 mov [esp+16],ecx
Chris@4 206 ; 285 "inffast.S"
Chris@4 207 mov eax, [edi+lencode_state]
Chris@4 208 mov ecx, [edi+distcode_state]
Chris@4 209
Chris@4 210 mov [esp+8],eax
Chris@4 211 mov [esp+12],ecx
Chris@4 212
Chris@4 213 mov eax,1
Chris@4 214 mov ecx, [edi+lenbits_state]
Chris@4 215 shl eax,cl
Chris@4 216 dec eax
Chris@4 217 mov [esp+0],eax
Chris@4 218
Chris@4 219 mov eax,1
Chris@4 220 mov ecx, [edi+distbits_state]
Chris@4 221 shl eax,cl
Chris@4 222 dec eax
Chris@4 223 mov [esp+4],eax
Chris@4 224
Chris@4 225 mov eax, [edi+wsize_state]
Chris@4 226 mov ecx, [edi+write_state]
Chris@4 227 mov edx, [edi+window_state]
Chris@4 228
Chris@4 229 mov [esp+52],eax
Chris@4 230 mov [esp+48],ecx
Chris@4 231 mov [esp+56],edx
Chris@4 232
Chris@4 233 mov ebp, [edi+hold_state]
Chris@4 234 mov ebx, [edi+bits_state]
Chris@4 235 ; 321 "inffast.S"
Chris@4 236 mov esi, [esp+44]
Chris@4 237 mov ecx, [esp+20]
Chris@4 238 cmp ecx,esi
Chris@4 239 ja L_align_long
Chris@4 240
Chris@4 241 add ecx,11
Chris@4 242 sub ecx,esi
Chris@4 243 mov eax,12
Chris@4 244 sub eax,ecx
Chris@4 245 lea edi, [esp+28]
Chris@4 246 rep movsb
Chris@4 247 mov ecx,eax
Chris@4 248 xor eax,eax
Chris@4 249 rep stosb
Chris@4 250 lea esi, [esp+28]
Chris@4 251 mov [esp+20],esi
Chris@4 252 jmp L_is_aligned
Chris@4 253
Chris@4 254
Chris@4 255 L_align_long:
Chris@4 256 test esi,3
Chris@4 257 jz L_is_aligned
Chris@4 258 xor eax,eax
Chris@4 259 mov al, [esi]
Chris@4 260 inc esi
Chris@4 261 mov ecx,ebx
Chris@4 262 add ebx,8
Chris@4 263 shl eax,cl
Chris@4 264 or ebp,eax
Chris@4 265 jmp L_align_long
Chris@4 266
Chris@4 267 L_is_aligned:
Chris@4 268 mov edi, [esp+60]
Chris@4 269 ; 366 "inffast.S"
Chris@4 270 L_check_mmx:
Chris@4 271 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 272 je L_init_mmx
Chris@4 273 ja L_do_loop
Chris@4 274
Chris@4 275 push eax
Chris@4 276 push ebx
Chris@4 277 push ecx
Chris@4 278 push edx
Chris@4 279 pushfd
Chris@4 280 mov eax, [esp]
Chris@4 281 xor dword ptr [esp],0200000h
Chris@4 282
Chris@4 283
Chris@4 284
Chris@4 285
Chris@4 286 popfd
Chris@4 287 pushfd
Chris@4 288 pop edx
Chris@4 289 xor edx,eax
Chris@4 290 jz L_dont_use_mmx
Chris@4 291 xor eax,eax
Chris@4 292 cpuid
Chris@4 293 cmp ebx,0756e6547h
Chris@4 294 jne L_dont_use_mmx
Chris@4 295 cmp ecx,06c65746eh
Chris@4 296 jne L_dont_use_mmx
Chris@4 297 cmp edx,049656e69h
Chris@4 298 jne L_dont_use_mmx
Chris@4 299 mov eax,1
Chris@4 300 cpuid
Chris@4 301 shr eax,8
Chris@4 302 and eax,15
Chris@4 303 cmp eax,6
Chris@4 304 jne L_dont_use_mmx
Chris@4 305 test edx,0800000h
Chris@4 306 jnz L_use_mmx
Chris@4 307 jmp L_dont_use_mmx
Chris@4 308 L_use_mmx:
Chris@4 309 mov dword ptr [inflate_fast_use_mmx],2
Chris@4 310 jmp L_check_mmx_pop
Chris@4 311 L_dont_use_mmx:
Chris@4 312 mov dword ptr [inflate_fast_use_mmx],3
Chris@4 313 L_check_mmx_pop:
Chris@4 314 pop edx
Chris@4 315 pop ecx
Chris@4 316 pop ebx
Chris@4 317 pop eax
Chris@4 318 jmp L_check_mmx
Chris@4 319 ; 426 "inffast.S"
Chris@4 320 ALIGN 4
Chris@4 321 L_do_loop:
Chris@4 322 ; 437 "inffast.S"
Chris@4 323 cmp bl,15
Chris@4 324 ja L_get_length_code
Chris@4 325
Chris@4 326 xor eax,eax
Chris@4 327 lodsw
Chris@4 328 mov cl,bl
Chris@4 329 add bl,16
Chris@4 330 shl eax,cl
Chris@4 331 or ebp,eax
Chris@4 332
Chris@4 333 L_get_length_code:
Chris@4 334 mov edx, [esp+0]
Chris@4 335 mov ecx, [esp+8]
Chris@4 336 and edx,ebp
Chris@4 337 mov eax, [ecx+edx*4]
Chris@4 338
Chris@4 339 L_dolen:
Chris@4 340
Chris@4 341
Chris@4 342
Chris@4 343
Chris@4 344
Chris@4 345
Chris@4 346 mov cl,ah
Chris@4 347 sub bl,ah
Chris@4 348 shr ebp,cl
Chris@4 349
Chris@4 350
Chris@4 351
Chris@4 352
Chris@4 353
Chris@4 354
Chris@4 355 test al,al
Chris@4 356 jnz L_test_for_length_base
Chris@4 357
Chris@4 358 shr eax,16
Chris@4 359 stosb
Chris@4 360
Chris@4 361 L_while_test:
Chris@4 362
Chris@4 363
Chris@4 364 cmp [esp+16],edi
Chris@4 365 jbe L_break_loop
Chris@4 366
Chris@4 367 cmp [esp+20],esi
Chris@4 368 ja L_do_loop
Chris@4 369 jmp L_break_loop
Chris@4 370
Chris@4 371 L_test_for_length_base:
Chris@4 372 ; 502 "inffast.S"
Chris@4 373 mov edx,eax
Chris@4 374 shr edx,16
Chris@4 375 mov cl,al
Chris@4 376
Chris@4 377 test al,16
Chris@4 378 jz L_test_for_second_level_length
Chris@4 379 and cl,15
Chris@4 380 jz L_save_len
Chris@4 381 cmp bl,cl
Chris@4 382 jae L_add_bits_to_len
Chris@4 383
Chris@4 384 mov ch,cl
Chris@4 385 xor eax,eax
Chris@4 386 lodsw
Chris@4 387 mov cl,bl
Chris@4 388 add bl,16
Chris@4 389 shl eax,cl
Chris@4 390 or ebp,eax
Chris@4 391 mov cl,ch
Chris@4 392
Chris@4 393 L_add_bits_to_len:
Chris@4 394 mov eax,1
Chris@4 395 shl eax,cl
Chris@4 396 dec eax
Chris@4 397 sub bl,cl
Chris@4 398 and eax,ebp
Chris@4 399 shr ebp,cl
Chris@4 400 add edx,eax
Chris@4 401
Chris@4 402 L_save_len:
Chris@4 403 mov [esp+24],edx
Chris@4 404
Chris@4 405
Chris@4 406 L_decode_distance:
Chris@4 407 ; 549 "inffast.S"
Chris@4 408 cmp bl,15
Chris@4 409 ja L_get_distance_code
Chris@4 410
Chris@4 411 xor eax,eax
Chris@4 412 lodsw
Chris@4 413 mov cl,bl
Chris@4 414 add bl,16
Chris@4 415 shl eax,cl
Chris@4 416 or ebp,eax
Chris@4 417
Chris@4 418 L_get_distance_code:
Chris@4 419 mov edx, [esp+4]
Chris@4 420 mov ecx, [esp+12]
Chris@4 421 and edx,ebp
Chris@4 422 mov eax, [ecx+edx*4]
Chris@4 423
Chris@4 424
Chris@4 425 L_dodist:
Chris@4 426 mov edx,eax
Chris@4 427 shr edx,16
Chris@4 428 mov cl,ah
Chris@4 429 sub bl,ah
Chris@4 430 shr ebp,cl
Chris@4 431 ; 584 "inffast.S"
Chris@4 432 mov cl,al
Chris@4 433
Chris@4 434 test al,16
Chris@4 435 jz L_test_for_second_level_dist
Chris@4 436 and cl,15
Chris@4 437 jz L_check_dist_one
Chris@4 438 cmp bl,cl
Chris@4 439 jae L_add_bits_to_dist
Chris@4 440
Chris@4 441 mov ch,cl
Chris@4 442 xor eax,eax
Chris@4 443 lodsw
Chris@4 444 mov cl,bl
Chris@4 445 add bl,16
Chris@4 446 shl eax,cl
Chris@4 447 or ebp,eax
Chris@4 448 mov cl,ch
Chris@4 449
Chris@4 450 L_add_bits_to_dist:
Chris@4 451 mov eax,1
Chris@4 452 shl eax,cl
Chris@4 453 dec eax
Chris@4 454 sub bl,cl
Chris@4 455 and eax,ebp
Chris@4 456 shr ebp,cl
Chris@4 457 add edx,eax
Chris@4 458 jmp L_check_window
Chris@4 459
Chris@4 460 L_check_window:
Chris@4 461 ; 625 "inffast.S"
Chris@4 462 mov [esp+44],esi
Chris@4 463 mov eax,edi
Chris@4 464 sub eax, [esp+40]
Chris@4 465
Chris@4 466 cmp eax,edx
Chris@4 467 jb L_clip_window
Chris@4 468
Chris@4 469 mov ecx, [esp+24]
Chris@4 470 mov esi,edi
Chris@4 471 sub esi,edx
Chris@4 472
Chris@4 473 sub ecx,3
Chris@4 474 mov al, [esi]
Chris@4 475 mov [edi],al
Chris@4 476 mov al, [esi+1]
Chris@4 477 mov dl, [esi+2]
Chris@4 478 add esi,3
Chris@4 479 mov [edi+1],al
Chris@4 480 mov [edi+2],dl
Chris@4 481 add edi,3
Chris@4 482 rep movsb
Chris@4 483
Chris@4 484 mov esi, [esp+44]
Chris@4 485 jmp L_while_test
Chris@4 486
Chris@4 487 ALIGN 4
Chris@4 488 L_check_dist_one:
Chris@4 489 cmp edx,1
Chris@4 490 jne L_check_window
Chris@4 491 cmp [esp+40],edi
Chris@4 492 je L_check_window
Chris@4 493
Chris@4 494 dec edi
Chris@4 495 mov ecx, [esp+24]
Chris@4 496 mov al, [edi]
Chris@4 497 sub ecx,3
Chris@4 498
Chris@4 499 mov [edi+1],al
Chris@4 500 mov [edi+2],al
Chris@4 501 mov [edi+3],al
Chris@4 502 add edi,4
Chris@4 503 rep stosb
Chris@4 504
Chris@4 505 jmp L_while_test
Chris@4 506
Chris@4 507 ALIGN 4
Chris@4 508 L_test_for_second_level_length:
Chris@4 509
Chris@4 510
Chris@4 511
Chris@4 512
Chris@4 513 test al,64
Chris@4 514 jnz L_test_for_end_of_block
Chris@4 515
Chris@4 516 mov eax,1
Chris@4 517 shl eax,cl
Chris@4 518 dec eax
Chris@4 519 and eax,ebp
Chris@4 520 add eax,edx
Chris@4 521 mov edx, [esp+8]
Chris@4 522 mov eax, [edx+eax*4]
Chris@4 523 jmp L_dolen
Chris@4 524
Chris@4 525 ALIGN 4
Chris@4 526 L_test_for_second_level_dist:
Chris@4 527
Chris@4 528
Chris@4 529
Chris@4 530
Chris@4 531 test al,64
Chris@4 532 jnz L_invalid_distance_code
Chris@4 533
Chris@4 534 mov eax,1
Chris@4 535 shl eax,cl
Chris@4 536 dec eax
Chris@4 537 and eax,ebp
Chris@4 538 add eax,edx
Chris@4 539 mov edx, [esp+12]
Chris@4 540 mov eax, [edx+eax*4]
Chris@4 541 jmp L_dodist
Chris@4 542
Chris@4 543 ALIGN 4
Chris@4 544 L_clip_window:
Chris@4 545 ; 721 "inffast.S"
Chris@4 546 mov ecx,eax
Chris@4 547 mov eax, [esp+52]
Chris@4 548 neg ecx
Chris@4 549 mov esi, [esp+56]
Chris@4 550
Chris@4 551 cmp eax,edx
Chris@4 552 jb L_invalid_distance_too_far
Chris@4 553
Chris@4 554 add ecx,edx
Chris@4 555 cmp dword ptr [esp+48],0
Chris@4 556 jne L_wrap_around_window
Chris@4 557
Chris@4 558 sub eax,ecx
Chris@4 559 add esi,eax
Chris@4 560 ; 749 "inffast.S"
Chris@4 561 mov eax, [esp+24]
Chris@4 562 cmp eax,ecx
Chris@4 563 jbe L_do_copy1
Chris@4 564
Chris@4 565 sub eax,ecx
Chris@4 566 rep movsb
Chris@4 567 mov esi,edi
Chris@4 568 sub esi,edx
Chris@4 569 jmp L_do_copy1
Chris@4 570
Chris@4 571 cmp eax,ecx
Chris@4 572 jbe L_do_copy1
Chris@4 573
Chris@4 574 sub eax,ecx
Chris@4 575 rep movsb
Chris@4 576 mov esi,edi
Chris@4 577 sub esi,edx
Chris@4 578 jmp L_do_copy1
Chris@4 579
Chris@4 580 L_wrap_around_window:
Chris@4 581 ; 793 "inffast.S"
Chris@4 582 mov eax, [esp+48]
Chris@4 583 cmp ecx,eax
Chris@4 584 jbe L_contiguous_in_window
Chris@4 585
Chris@4 586 add esi, [esp+52]
Chris@4 587 add esi,eax
Chris@4 588 sub esi,ecx
Chris@4 589 sub ecx,eax
Chris@4 590
Chris@4 591
Chris@4 592 mov eax, [esp+24]
Chris@4 593 cmp eax,ecx
Chris@4 594 jbe L_do_copy1
Chris@4 595
Chris@4 596 sub eax,ecx
Chris@4 597 rep movsb
Chris@4 598 mov esi, [esp+56]
Chris@4 599 mov ecx, [esp+48]
Chris@4 600 cmp eax,ecx
Chris@4 601 jbe L_do_copy1
Chris@4 602
Chris@4 603 sub eax,ecx
Chris@4 604 rep movsb
Chris@4 605 mov esi,edi
Chris@4 606 sub esi,edx
Chris@4 607 jmp L_do_copy1
Chris@4 608
Chris@4 609 L_contiguous_in_window:
Chris@4 610 ; 836 "inffast.S"
Chris@4 611 add esi,eax
Chris@4 612 sub esi,ecx
Chris@4 613
Chris@4 614
Chris@4 615 mov eax, [esp+24]
Chris@4 616 cmp eax,ecx
Chris@4 617 jbe L_do_copy1
Chris@4 618
Chris@4 619 sub eax,ecx
Chris@4 620 rep movsb
Chris@4 621 mov esi,edi
Chris@4 622 sub esi,edx
Chris@4 623
Chris@4 624 L_do_copy1:
Chris@4 625 ; 862 "inffast.S"
Chris@4 626 mov ecx,eax
Chris@4 627 rep movsb
Chris@4 628
Chris@4 629 mov esi, [esp+44]
Chris@4 630 jmp L_while_test
Chris@4 631 ; 878 "inffast.S"
Chris@4 632 ALIGN 4
Chris@4 633 L_init_mmx:
Chris@4 634 emms
Chris@4 635
Chris@4 636
Chris@4 637
Chris@4 638
Chris@4 639
Chris@4 640 movd mm0,ebp
Chris@4 641 mov ebp,ebx
Chris@4 642 ; 896 "inffast.S"
Chris@4 643 movd mm4,dword ptr [esp+0]
Chris@4 644 movq mm3,mm4
Chris@4 645 movd mm5,dword ptr [esp+4]
Chris@4 646 movq mm2,mm5
Chris@4 647 pxor mm1,mm1
Chris@4 648 mov ebx, [esp+8]
Chris@4 649 jmp L_do_loop_mmx
Chris@4 650
Chris@4 651 ALIGN 4
Chris@4 652 L_do_loop_mmx:
Chris@4 653 psrlq mm0,mm1
Chris@4 654
Chris@4 655 cmp ebp,32
Chris@4 656 ja L_get_length_code_mmx
Chris@4 657
Chris@4 658 movd mm6,ebp
Chris@4 659 movd mm7,dword ptr [esi]
Chris@4 660 add esi,4
Chris@4 661 psllq mm7,mm6
Chris@4 662 add ebp,32
Chris@4 663 por mm0,mm7
Chris@4 664
Chris@4 665 L_get_length_code_mmx:
Chris@4 666 pand mm4,mm0
Chris@4 667 movd eax,mm4
Chris@4 668 movq mm4,mm3
Chris@4 669 mov eax, [ebx+eax*4]
Chris@4 670
Chris@4 671 L_dolen_mmx:
Chris@4 672 movzx ecx,ah
Chris@4 673 movd mm1,ecx
Chris@4 674 sub ebp,ecx
Chris@4 675
Chris@4 676 test al,al
Chris@4 677 jnz L_test_for_length_base_mmx
Chris@4 678
Chris@4 679 shr eax,16
Chris@4 680 stosb
Chris@4 681
Chris@4 682 L_while_test_mmx:
Chris@4 683
Chris@4 684
Chris@4 685 cmp [esp+16],edi
Chris@4 686 jbe L_break_loop
Chris@4 687
Chris@4 688 cmp [esp+20],esi
Chris@4 689 ja L_do_loop_mmx
Chris@4 690 jmp L_break_loop
Chris@4 691
Chris@4 692 L_test_for_length_base_mmx:
Chris@4 693
Chris@4 694 mov edx,eax
Chris@4 695 shr edx,16
Chris@4 696
Chris@4 697 test al,16
Chris@4 698 jz L_test_for_second_level_length_mmx
Chris@4 699 and eax,15
Chris@4 700 jz L_decode_distance_mmx
Chris@4 701
Chris@4 702 psrlq mm0,mm1
Chris@4 703 movd mm1,eax
Chris@4 704 movd ecx,mm0
Chris@4 705 sub ebp,eax
Chris@4 706 and ecx, [inflate_fast_mask+eax*4]
Chris@4 707 add edx,ecx
Chris@4 708
Chris@4 709 L_decode_distance_mmx:
Chris@4 710 psrlq mm0,mm1
Chris@4 711
Chris@4 712 cmp ebp,32
Chris@4 713 ja L_get_dist_code_mmx
Chris@4 714
Chris@4 715 movd mm6,ebp
Chris@4 716 movd mm7,dword ptr [esi]
Chris@4 717 add esi,4
Chris@4 718 psllq mm7,mm6
Chris@4 719 add ebp,32
Chris@4 720 por mm0,mm7
Chris@4 721
Chris@4 722 L_get_dist_code_mmx:
Chris@4 723 mov ebx, [esp+12]
Chris@4 724 pand mm5,mm0
Chris@4 725 movd eax,mm5
Chris@4 726 movq mm5,mm2
Chris@4 727 mov eax, [ebx+eax*4]
Chris@4 728
Chris@4 729 L_dodist_mmx:
Chris@4 730
Chris@4 731 movzx ecx,ah
Chris@4 732 mov ebx,eax
Chris@4 733 shr ebx,16
Chris@4 734 sub ebp,ecx
Chris@4 735 movd mm1,ecx
Chris@4 736
Chris@4 737 test al,16
Chris@4 738 jz L_test_for_second_level_dist_mmx
Chris@4 739 and eax,15
Chris@4 740 jz L_check_dist_one_mmx
Chris@4 741
Chris@4 742 L_add_bits_to_dist_mmx:
Chris@4 743 psrlq mm0,mm1
Chris@4 744 movd mm1,eax
Chris@4 745 movd ecx,mm0
Chris@4 746 sub ebp,eax
Chris@4 747 and ecx, [inflate_fast_mask+eax*4]
Chris@4 748 add ebx,ecx
Chris@4 749
Chris@4 750 L_check_window_mmx:
Chris@4 751 mov [esp+44],esi
Chris@4 752 mov eax,edi
Chris@4 753 sub eax, [esp+40]
Chris@4 754
Chris@4 755 cmp eax,ebx
Chris@4 756 jb L_clip_window_mmx
Chris@4 757
Chris@4 758 mov ecx,edx
Chris@4 759 mov esi,edi
Chris@4 760 sub esi,ebx
Chris@4 761
Chris@4 762 sub ecx,3
Chris@4 763 mov al, [esi]
Chris@4 764 mov [edi],al
Chris@4 765 mov al, [esi+1]
Chris@4 766 mov dl, [esi+2]
Chris@4 767 add esi,3
Chris@4 768 mov [edi+1],al
Chris@4 769 mov [edi+2],dl
Chris@4 770 add edi,3
Chris@4 771 rep movsb
Chris@4 772
Chris@4 773 mov esi, [esp+44]
Chris@4 774 mov ebx, [esp+8]
Chris@4 775 jmp L_while_test_mmx
Chris@4 776
Chris@4 777 ALIGN 4
Chris@4 778 L_check_dist_one_mmx:
Chris@4 779 cmp ebx,1
Chris@4 780 jne L_check_window_mmx
Chris@4 781 cmp [esp+40],edi
Chris@4 782 je L_check_window_mmx
Chris@4 783
Chris@4 784 dec edi
Chris@4 785 mov ecx,edx
Chris@4 786 mov al, [edi]
Chris@4 787 sub ecx,3
Chris@4 788
Chris@4 789 mov [edi+1],al
Chris@4 790 mov [edi+2],al
Chris@4 791 mov [edi+3],al
Chris@4 792 add edi,4
Chris@4 793 rep stosb
Chris@4 794
Chris@4 795 mov ebx, [esp+8]
Chris@4 796 jmp L_while_test_mmx
Chris@4 797
Chris@4 798 ALIGN 4
Chris@4 799 L_test_for_second_level_length_mmx:
Chris@4 800 test al,64
Chris@4 801 jnz L_test_for_end_of_block
Chris@4 802
Chris@4 803 and eax,15
Chris@4 804 psrlq mm0,mm1
Chris@4 805 movd ecx,mm0
Chris@4 806 and ecx, [inflate_fast_mask+eax*4]
Chris@4 807 add ecx,edx
Chris@4 808 mov eax, [ebx+ecx*4]
Chris@4 809 jmp L_dolen_mmx
Chris@4 810
Chris@4 811 ALIGN 4
Chris@4 812 L_test_for_second_level_dist_mmx:
Chris@4 813 test al,64
Chris@4 814 jnz L_invalid_distance_code
Chris@4 815
Chris@4 816 and eax,15
Chris@4 817 psrlq mm0,mm1
Chris@4 818 movd ecx,mm0
Chris@4 819 and ecx, [inflate_fast_mask+eax*4]
Chris@4 820 mov eax, [esp+12]
Chris@4 821 add ecx,ebx
Chris@4 822 mov eax, [eax+ecx*4]
Chris@4 823 jmp L_dodist_mmx
Chris@4 824
Chris@4 825 ALIGN 4
Chris@4 826 L_clip_window_mmx:
Chris@4 827
Chris@4 828 mov ecx,eax
Chris@4 829 mov eax, [esp+52]
Chris@4 830 neg ecx
Chris@4 831 mov esi, [esp+56]
Chris@4 832
Chris@4 833 cmp eax,ebx
Chris@4 834 jb L_invalid_distance_too_far
Chris@4 835
Chris@4 836 add ecx,ebx
Chris@4 837 cmp dword ptr [esp+48],0
Chris@4 838 jne L_wrap_around_window_mmx
Chris@4 839
Chris@4 840 sub eax,ecx
Chris@4 841 add esi,eax
Chris@4 842
Chris@4 843 cmp edx,ecx
Chris@4 844 jbe L_do_copy1_mmx
Chris@4 845
Chris@4 846 sub edx,ecx
Chris@4 847 rep movsb
Chris@4 848 mov esi,edi
Chris@4 849 sub esi,ebx
Chris@4 850 jmp L_do_copy1_mmx
Chris@4 851
Chris@4 852 cmp edx,ecx
Chris@4 853 jbe L_do_copy1_mmx
Chris@4 854
Chris@4 855 sub edx,ecx
Chris@4 856 rep movsb
Chris@4 857 mov esi,edi
Chris@4 858 sub esi,ebx
Chris@4 859 jmp L_do_copy1_mmx
Chris@4 860
Chris@4 861 L_wrap_around_window_mmx:
Chris@4 862
Chris@4 863 mov eax, [esp+48]
Chris@4 864 cmp ecx,eax
Chris@4 865 jbe L_contiguous_in_window_mmx
Chris@4 866
Chris@4 867 add esi, [esp+52]
Chris@4 868 add esi,eax
Chris@4 869 sub esi,ecx
Chris@4 870 sub ecx,eax
Chris@4 871
Chris@4 872
Chris@4 873 cmp edx,ecx
Chris@4 874 jbe L_do_copy1_mmx
Chris@4 875
Chris@4 876 sub edx,ecx
Chris@4 877 rep movsb
Chris@4 878 mov esi, [esp+56]
Chris@4 879 mov ecx, [esp+48]
Chris@4 880 cmp edx,ecx
Chris@4 881 jbe L_do_copy1_mmx
Chris@4 882
Chris@4 883 sub edx,ecx
Chris@4 884 rep movsb
Chris@4 885 mov esi,edi
Chris@4 886 sub esi,ebx
Chris@4 887 jmp L_do_copy1_mmx
Chris@4 888
Chris@4 889 L_contiguous_in_window_mmx:
Chris@4 890
Chris@4 891 add esi,eax
Chris@4 892 sub esi,ecx
Chris@4 893
Chris@4 894
Chris@4 895 cmp edx,ecx
Chris@4 896 jbe L_do_copy1_mmx
Chris@4 897
Chris@4 898 sub edx,ecx
Chris@4 899 rep movsb
Chris@4 900 mov esi,edi
Chris@4 901 sub esi,ebx
Chris@4 902
Chris@4 903 L_do_copy1_mmx:
Chris@4 904
Chris@4 905
Chris@4 906 mov ecx,edx
Chris@4 907 rep movsb
Chris@4 908
Chris@4 909 mov esi, [esp+44]
Chris@4 910 mov ebx, [esp+8]
Chris@4 911 jmp L_while_test_mmx
Chris@4 912 ; 1174 "inffast.S"
Chris@4 913 L_invalid_distance_code:
Chris@4 914
Chris@4 915
Chris@4 916
Chris@4 917
Chris@4 918
Chris@4 919 mov ecx, invalid_distance_code_msg
Chris@4 920 mov edx,INFLATE_MODE_BAD
Chris@4 921 jmp L_update_stream_state
Chris@4 922
Chris@4 923 L_test_for_end_of_block:
Chris@4 924
Chris@4 925
Chris@4 926
Chris@4 927
Chris@4 928
Chris@4 929 test al,32
Chris@4 930 jz L_invalid_literal_length_code
Chris@4 931
Chris@4 932 mov ecx,0
Chris@4 933 mov edx,INFLATE_MODE_TYPE
Chris@4 934 jmp L_update_stream_state
Chris@4 935
Chris@4 936 L_invalid_literal_length_code:
Chris@4 937
Chris@4 938
Chris@4 939
Chris@4 940
Chris@4 941
Chris@4 942 mov ecx, invalid_literal_length_code_msg
Chris@4 943 mov edx,INFLATE_MODE_BAD
Chris@4 944 jmp L_update_stream_state
Chris@4 945
Chris@4 946 L_invalid_distance_too_far:
Chris@4 947
Chris@4 948
Chris@4 949
Chris@4 950 mov esi, [esp+44]
Chris@4 951 mov ecx, invalid_distance_too_far_msg
Chris@4 952 mov edx,INFLATE_MODE_BAD
Chris@4 953 jmp L_update_stream_state
Chris@4 954
Chris@4 955 L_update_stream_state:
Chris@4 956
Chris@4 957 mov eax, [esp+88]
Chris@4 958 test ecx,ecx
Chris@4 959 jz L_skip_msg
Chris@4 960 mov [eax+24],ecx
Chris@4 961 L_skip_msg:
Chris@4 962 mov eax, [eax+28]
Chris@4 963 mov [eax+mode_state],edx
Chris@4 964 jmp L_break_loop
Chris@4 965
Chris@4 966 ALIGN 4
Chris@4 967 L_break_loop:
Chris@4 968 ; 1243 "inffast.S"
Chris@4 969 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 970 jne L_update_next_in
Chris@4 971
Chris@4 972
Chris@4 973
Chris@4 974 mov ebx,ebp
Chris@4 975
Chris@4 976 L_update_next_in:
Chris@4 977 ; 1266 "inffast.S"
Chris@4 978 mov eax, [esp+88]
Chris@4 979 mov ecx,ebx
Chris@4 980 mov edx, [eax+28]
Chris@4 981 shr ecx,3
Chris@4 982 sub esi,ecx
Chris@4 983 shl ecx,3
Chris@4 984 sub ebx,ecx
Chris@4 985 mov [eax+12],edi
Chris@4 986 mov [edx+bits_state],ebx
Chris@4 987 mov ecx,ebx
Chris@4 988
Chris@4 989 lea ebx, [esp+28]
Chris@4 990 cmp [esp+20],ebx
Chris@4 991 jne L_buf_not_used
Chris@4 992
Chris@4 993 sub esi,ebx
Chris@4 994 mov ebx, [eax+0]
Chris@4 995 mov [esp+20],ebx
Chris@4 996 add esi,ebx
Chris@4 997 mov ebx, [eax+4]
Chris@4 998 sub ebx,11
Chris@4 999 add [esp+20],ebx
Chris@4 1000
Chris@4 1001 L_buf_not_used:
Chris@4 1002 mov [eax+0],esi
Chris@4 1003
Chris@4 1004 mov ebx,1
Chris@4 1005 shl ebx,cl
Chris@4 1006 dec ebx
Chris@4 1007
Chris@4 1008
Chris@4 1009
Chris@4 1010
Chris@4 1011
Chris@4 1012 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 1013 jne L_update_hold
Chris@4 1014
Chris@4 1015
Chris@4 1016
Chris@4 1017 psrlq mm0,mm1
Chris@4 1018 movd ebp,mm0
Chris@4 1019
Chris@4 1020 emms
Chris@4 1021
Chris@4 1022 L_update_hold:
Chris@4 1023
Chris@4 1024
Chris@4 1025
Chris@4 1026 and ebp,ebx
Chris@4 1027 mov [edx+hold_state],ebp
Chris@4 1028
Chris@4 1029
Chris@4 1030
Chris@4 1031
Chris@4 1032 mov ebx, [esp+20]
Chris@4 1033 cmp ebx,esi
Chris@4 1034 jbe L_last_is_smaller
Chris@4 1035
Chris@4 1036 sub ebx,esi
Chris@4 1037 add ebx,11
Chris@4 1038 mov [eax+4],ebx
Chris@4 1039 jmp L_fixup_out
Chris@4 1040 L_last_is_smaller:
Chris@4 1041 sub esi,ebx
Chris@4 1042 neg esi
Chris@4 1043 add esi,11
Chris@4 1044 mov [eax+4],esi
Chris@4 1045
Chris@4 1046
Chris@4 1047
Chris@4 1048
Chris@4 1049 L_fixup_out:
Chris@4 1050
Chris@4 1051 mov ebx, [esp+16]
Chris@4 1052 cmp ebx,edi
Chris@4 1053 jbe L_end_is_smaller
Chris@4 1054
Chris@4 1055 sub ebx,edi
Chris@4 1056 add ebx,257
Chris@4 1057 mov [eax+16],ebx
Chris@4 1058 jmp L_done
Chris@4 1059 L_end_is_smaller:
Chris@4 1060 sub edi,ebx
Chris@4 1061 neg edi
Chris@4 1062 add edi,257
Chris@4 1063 mov [eax+16],edi
Chris@4 1064
Chris@4 1065
Chris@4 1066
Chris@4 1067
Chris@4 1068
Chris@4 1069 L_done:
Chris@4 1070 add esp,64
Chris@4 1071 popfd
Chris@4 1072 pop ebx
Chris@4 1073 pop ebp
Chris@4 1074 pop esi
Chris@4 1075 pop edi
Chris@4 1076 ret
Chris@4 1077 _inflate_fast endp
Chris@4 1078
Chris@4 1079 _TEXT ends
Chris@4 1080 end