annotate src/zlib-1.2.8/contrib/masmx86/inffas32.asm @ 56:af97cad61ff0

Add updated build of PortAudio for OSX
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 03 Jan 2017 15:10:52 +0000
parents 5ea0608b923f
children
rev   line source
Chris@43 1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
Chris@43 2 ; *
Chris@43 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
Chris@43 4 ; *
Chris@43 5 ; * Copyright (C) 1995-2003 Mark Adler
Chris@43 6 ; * For conditions of distribution and use, see copyright notice in zlib.h
Chris@43 7 ; *
Chris@43 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@43 9 ; * Please use the copyright conditions above.
Chris@43 10 ; *
Chris@43 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@43 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@43 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@43 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@43 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@43 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@43 17 ; * versions of this and inffast.S can be found at
Chris@43 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@43 19 ; *
Chris@43 20 ; * 2005 : modification by Gilles Vollant
Chris@43 21 ; */
Chris@43 22 ; For Visual C++ 4.x and higher and ML 6.x and higher
Chris@43 23 ; ml.exe is in directory \MASM611C of Win95 DDK
Chris@43 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
Chris@43 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
Chris@43 26 ;
Chris@43 27 ;
Chris@43 28 ; compile with command line option
Chris@43 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
Chris@43 30
Chris@43 31 ; if you define NO_GZIP (see inflate.h), compile with
Chris@43 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
Chris@43 33
Chris@43 34
Chris@43 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
Chris@43 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
Chris@43 37 ; in inflate_state in inflate.h)
Chris@43 38 zlib1222sup equ 8
Chris@43 39
Chris@43 40
Chris@43 41 IFDEF GUNZIP
Chris@43 42 INFLATE_MODE_TYPE equ 11
Chris@43 43 INFLATE_MODE_BAD equ 26
Chris@43 44 ELSE
Chris@43 45 IFNDEF NO_GUNZIP
Chris@43 46 INFLATE_MODE_TYPE equ 11
Chris@43 47 INFLATE_MODE_BAD equ 26
Chris@43 48 ELSE
Chris@43 49 INFLATE_MODE_TYPE equ 3
Chris@43 50 INFLATE_MODE_BAD equ 17
Chris@43 51 ENDIF
Chris@43 52 ENDIF
Chris@43 53
Chris@43 54
Chris@43 55 ; 75 "inffast.S"
Chris@43 56 ;FILE "inffast.S"
Chris@43 57
Chris@43 58 ;;;GLOBAL _inflate_fast
Chris@43 59
Chris@43 60 ;;;SECTION .text
Chris@43 61
Chris@43 62
Chris@43 63
Chris@43 64 .586p
Chris@43 65 .mmx
Chris@43 66
Chris@43 67 name inflate_fast_x86
Chris@43 68 .MODEL FLAT
Chris@43 69
Chris@43 70 _DATA segment
Chris@43 71 inflate_fast_use_mmx:
Chris@43 72 dd 1
Chris@43 73
Chris@43 74
Chris@43 75 _TEXT segment
Chris@43 76
Chris@43 77
Chris@43 78
Chris@43 79 ALIGN 4
Chris@43 80 db 'Fast decoding Code from Chris Anderson'
Chris@43 81 db 0
Chris@43 82
Chris@43 83 ALIGN 4
Chris@43 84 invalid_literal_length_code_msg:
Chris@43 85 db 'invalid literal/length code'
Chris@43 86 db 0
Chris@43 87
Chris@43 88 ALIGN 4
Chris@43 89 invalid_distance_code_msg:
Chris@43 90 db 'invalid distance code'
Chris@43 91 db 0
Chris@43 92
Chris@43 93 ALIGN 4
Chris@43 94 invalid_distance_too_far_msg:
Chris@43 95 db 'invalid distance too far back'
Chris@43 96 db 0
Chris@43 97
Chris@43 98
Chris@43 99 ALIGN 4
Chris@43 100 inflate_fast_mask:
Chris@43 101 dd 0
Chris@43 102 dd 1
Chris@43 103 dd 3
Chris@43 104 dd 7
Chris@43 105 dd 15
Chris@43 106 dd 31
Chris@43 107 dd 63
Chris@43 108 dd 127
Chris@43 109 dd 255
Chris@43 110 dd 511
Chris@43 111 dd 1023
Chris@43 112 dd 2047
Chris@43 113 dd 4095
Chris@43 114 dd 8191
Chris@43 115 dd 16383
Chris@43 116 dd 32767
Chris@43 117 dd 65535
Chris@43 118 dd 131071
Chris@43 119 dd 262143
Chris@43 120 dd 524287
Chris@43 121 dd 1048575
Chris@43 122 dd 2097151
Chris@43 123 dd 4194303
Chris@43 124 dd 8388607
Chris@43 125 dd 16777215
Chris@43 126 dd 33554431
Chris@43 127 dd 67108863
Chris@43 128 dd 134217727
Chris@43 129 dd 268435455
Chris@43 130 dd 536870911
Chris@43 131 dd 1073741823
Chris@43 132 dd 2147483647
Chris@43 133 dd 4294967295
Chris@43 134
Chris@43 135
Chris@43 136 mode_state equ 0 ;/* state->mode */
Chris@43 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
Chris@43 138 write_state equ (36+4+zlib1222sup) ;/* state->write */
Chris@43 139 window_state equ (40+4+zlib1222sup) ;/* state->window */
Chris@43 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
Chris@43 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
Chris@43 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
Chris@43 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
Chris@43 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
Chris@43 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
Chris@43 146
Chris@43 147
Chris@43 148 ;;SECTION .text
Chris@43 149 ; 205 "inffast.S"
Chris@43 150 ;GLOBAL inflate_fast_use_mmx
Chris@43 151
Chris@43 152 ;SECTION .data
Chris@43 153
Chris@43 154
Chris@43 155 ; GLOBAL inflate_fast_use_mmx:object
Chris@43 156 ;.size inflate_fast_use_mmx, 4
Chris@43 157 ; 226 "inffast.S"
Chris@43 158 ;SECTION .text
Chris@43 159
Chris@43 160 ALIGN 4
Chris@43 161 _inflate_fast proc near
Chris@43 162 .FPO (16, 4, 0, 0, 1, 0)
Chris@43 163 push edi
Chris@43 164 push esi
Chris@43 165 push ebp
Chris@43 166 push ebx
Chris@43 167 pushfd
Chris@43 168 sub esp,64
Chris@43 169 cld
Chris@43 170
Chris@43 171
Chris@43 172
Chris@43 173
Chris@43 174 mov esi, [esp+88]
Chris@43 175 mov edi, [esi+28]
Chris@43 176
Chris@43 177
Chris@43 178
Chris@43 179
Chris@43 180
Chris@43 181
Chris@43 182
Chris@43 183 mov edx, [esi+4]
Chris@43 184 mov eax, [esi+0]
Chris@43 185
Chris@43 186 add edx,eax
Chris@43 187 sub edx,11
Chris@43 188
Chris@43 189 mov [esp+44],eax
Chris@43 190 mov [esp+20],edx
Chris@43 191
Chris@43 192 mov ebp, [esp+92]
Chris@43 193 mov ecx, [esi+16]
Chris@43 194 mov ebx, [esi+12]
Chris@43 195
Chris@43 196 sub ebp,ecx
Chris@43 197 neg ebp
Chris@43 198 add ebp,ebx
Chris@43 199
Chris@43 200 sub ecx,257
Chris@43 201 add ecx,ebx
Chris@43 202
Chris@43 203 mov [esp+60],ebx
Chris@43 204 mov [esp+40],ebp
Chris@43 205 mov [esp+16],ecx
Chris@43 206 ; 285 "inffast.S"
Chris@43 207 mov eax, [edi+lencode_state]
Chris@43 208 mov ecx, [edi+distcode_state]
Chris@43 209
Chris@43 210 mov [esp+8],eax
Chris@43 211 mov [esp+12],ecx
Chris@43 212
Chris@43 213 mov eax,1
Chris@43 214 mov ecx, [edi+lenbits_state]
Chris@43 215 shl eax,cl
Chris@43 216 dec eax
Chris@43 217 mov [esp+0],eax
Chris@43 218
Chris@43 219 mov eax,1
Chris@43 220 mov ecx, [edi+distbits_state]
Chris@43 221 shl eax,cl
Chris@43 222 dec eax
Chris@43 223 mov [esp+4],eax
Chris@43 224
Chris@43 225 mov eax, [edi+wsize_state]
Chris@43 226 mov ecx, [edi+write_state]
Chris@43 227 mov edx, [edi+window_state]
Chris@43 228
Chris@43 229 mov [esp+52],eax
Chris@43 230 mov [esp+48],ecx
Chris@43 231 mov [esp+56],edx
Chris@43 232
Chris@43 233 mov ebp, [edi+hold_state]
Chris@43 234 mov ebx, [edi+bits_state]
Chris@43 235 ; 321 "inffast.S"
Chris@43 236 mov esi, [esp+44]
Chris@43 237 mov ecx, [esp+20]
Chris@43 238 cmp ecx,esi
Chris@43 239 ja L_align_long
Chris@43 240
Chris@43 241 add ecx,11
Chris@43 242 sub ecx,esi
Chris@43 243 mov eax,12
Chris@43 244 sub eax,ecx
Chris@43 245 lea edi, [esp+28]
Chris@43 246 rep movsb
Chris@43 247 mov ecx,eax
Chris@43 248 xor eax,eax
Chris@43 249 rep stosb
Chris@43 250 lea esi, [esp+28]
Chris@43 251 mov [esp+20],esi
Chris@43 252 jmp L_is_aligned
Chris@43 253
Chris@43 254
Chris@43 255 L_align_long:
Chris@43 256 test esi,3
Chris@43 257 jz L_is_aligned
Chris@43 258 xor eax,eax
Chris@43 259 mov al, [esi]
Chris@43 260 inc esi
Chris@43 261 mov ecx,ebx
Chris@43 262 add ebx,8
Chris@43 263 shl eax,cl
Chris@43 264 or ebp,eax
Chris@43 265 jmp L_align_long
Chris@43 266
Chris@43 267 L_is_aligned:
Chris@43 268 mov edi, [esp+60]
Chris@43 269 ; 366 "inffast.S"
Chris@43 270 L_check_mmx:
Chris@43 271 cmp dword ptr [inflate_fast_use_mmx],2
Chris@43 272 je L_init_mmx
Chris@43 273 ja L_do_loop
Chris@43 274
Chris@43 275 push eax
Chris@43 276 push ebx
Chris@43 277 push ecx
Chris@43 278 push edx
Chris@43 279 pushfd
Chris@43 280 mov eax, [esp]
Chris@43 281 xor dword ptr [esp],0200000h
Chris@43 282
Chris@43 283
Chris@43 284
Chris@43 285
Chris@43 286 popfd
Chris@43 287 pushfd
Chris@43 288 pop edx
Chris@43 289 xor edx,eax
Chris@43 290 jz L_dont_use_mmx
Chris@43 291 xor eax,eax
Chris@43 292 cpuid
Chris@43 293 cmp ebx,0756e6547h
Chris@43 294 jne L_dont_use_mmx
Chris@43 295 cmp ecx,06c65746eh
Chris@43 296 jne L_dont_use_mmx
Chris@43 297 cmp edx,049656e69h
Chris@43 298 jne L_dont_use_mmx
Chris@43 299 mov eax,1
Chris@43 300 cpuid
Chris@43 301 shr eax,8
Chris@43 302 and eax,15
Chris@43 303 cmp eax,6
Chris@43 304 jne L_dont_use_mmx
Chris@43 305 test edx,0800000h
Chris@43 306 jnz L_use_mmx
Chris@43 307 jmp L_dont_use_mmx
Chris@43 308 L_use_mmx:
Chris@43 309 mov dword ptr [inflate_fast_use_mmx],2
Chris@43 310 jmp L_check_mmx_pop
Chris@43 311 L_dont_use_mmx:
Chris@43 312 mov dword ptr [inflate_fast_use_mmx],3
Chris@43 313 L_check_mmx_pop:
Chris@43 314 pop edx
Chris@43 315 pop ecx
Chris@43 316 pop ebx
Chris@43 317 pop eax
Chris@43 318 jmp L_check_mmx
Chris@43 319 ; 426 "inffast.S"
Chris@43 320 ALIGN 4
Chris@43 321 L_do_loop:
Chris@43 322 ; 437 "inffast.S"
Chris@43 323 cmp bl,15
Chris@43 324 ja L_get_length_code
Chris@43 325
Chris@43 326 xor eax,eax
Chris@43 327 lodsw
Chris@43 328 mov cl,bl
Chris@43 329 add bl,16
Chris@43 330 shl eax,cl
Chris@43 331 or ebp,eax
Chris@43 332
Chris@43 333 L_get_length_code:
Chris@43 334 mov edx, [esp+0]
Chris@43 335 mov ecx, [esp+8]
Chris@43 336 and edx,ebp
Chris@43 337 mov eax, [ecx+edx*4]
Chris@43 338
Chris@43 339 L_dolen:
Chris@43 340
Chris@43 341
Chris@43 342
Chris@43 343
Chris@43 344
Chris@43 345
Chris@43 346 mov cl,ah
Chris@43 347 sub bl,ah
Chris@43 348 shr ebp,cl
Chris@43 349
Chris@43 350
Chris@43 351
Chris@43 352
Chris@43 353
Chris@43 354
Chris@43 355 test al,al
Chris@43 356 jnz L_test_for_length_base
Chris@43 357
Chris@43 358 shr eax,16
Chris@43 359 stosb
Chris@43 360
Chris@43 361 L_while_test:
Chris@43 362
Chris@43 363
Chris@43 364 cmp [esp+16],edi
Chris@43 365 jbe L_break_loop
Chris@43 366
Chris@43 367 cmp [esp+20],esi
Chris@43 368 ja L_do_loop
Chris@43 369 jmp L_break_loop
Chris@43 370
Chris@43 371 L_test_for_length_base:
Chris@43 372 ; 502 "inffast.S"
Chris@43 373 mov edx,eax
Chris@43 374 shr edx,16
Chris@43 375 mov cl,al
Chris@43 376
Chris@43 377 test al,16
Chris@43 378 jz L_test_for_second_level_length
Chris@43 379 and cl,15
Chris@43 380 jz L_save_len
Chris@43 381 cmp bl,cl
Chris@43 382 jae L_add_bits_to_len
Chris@43 383
Chris@43 384 mov ch,cl
Chris@43 385 xor eax,eax
Chris@43 386 lodsw
Chris@43 387 mov cl,bl
Chris@43 388 add bl,16
Chris@43 389 shl eax,cl
Chris@43 390 or ebp,eax
Chris@43 391 mov cl,ch
Chris@43 392
Chris@43 393 L_add_bits_to_len:
Chris@43 394 mov eax,1
Chris@43 395 shl eax,cl
Chris@43 396 dec eax
Chris@43 397 sub bl,cl
Chris@43 398 and eax,ebp
Chris@43 399 shr ebp,cl
Chris@43 400 add edx,eax
Chris@43 401
Chris@43 402 L_save_len:
Chris@43 403 mov [esp+24],edx
Chris@43 404
Chris@43 405
Chris@43 406 L_decode_distance:
Chris@43 407 ; 549 "inffast.S"
Chris@43 408 cmp bl,15
Chris@43 409 ja L_get_distance_code
Chris@43 410
Chris@43 411 xor eax,eax
Chris@43 412 lodsw
Chris@43 413 mov cl,bl
Chris@43 414 add bl,16
Chris@43 415 shl eax,cl
Chris@43 416 or ebp,eax
Chris@43 417
Chris@43 418 L_get_distance_code:
Chris@43 419 mov edx, [esp+4]
Chris@43 420 mov ecx, [esp+12]
Chris@43 421 and edx,ebp
Chris@43 422 mov eax, [ecx+edx*4]
Chris@43 423
Chris@43 424
Chris@43 425 L_dodist:
Chris@43 426 mov edx,eax
Chris@43 427 shr edx,16
Chris@43 428 mov cl,ah
Chris@43 429 sub bl,ah
Chris@43 430 shr ebp,cl
Chris@43 431 ; 584 "inffast.S"
Chris@43 432 mov cl,al
Chris@43 433
Chris@43 434 test al,16
Chris@43 435 jz L_test_for_second_level_dist
Chris@43 436 and cl,15
Chris@43 437 jz L_check_dist_one
Chris@43 438 cmp bl,cl
Chris@43 439 jae L_add_bits_to_dist
Chris@43 440
Chris@43 441 mov ch,cl
Chris@43 442 xor eax,eax
Chris@43 443 lodsw
Chris@43 444 mov cl,bl
Chris@43 445 add bl,16
Chris@43 446 shl eax,cl
Chris@43 447 or ebp,eax
Chris@43 448 mov cl,ch
Chris@43 449
Chris@43 450 L_add_bits_to_dist:
Chris@43 451 mov eax,1
Chris@43 452 shl eax,cl
Chris@43 453 dec eax
Chris@43 454 sub bl,cl
Chris@43 455 and eax,ebp
Chris@43 456 shr ebp,cl
Chris@43 457 add edx,eax
Chris@43 458 jmp L_check_window
Chris@43 459
Chris@43 460 L_check_window:
Chris@43 461 ; 625 "inffast.S"
Chris@43 462 mov [esp+44],esi
Chris@43 463 mov eax,edi
Chris@43 464 sub eax, [esp+40]
Chris@43 465
Chris@43 466 cmp eax,edx
Chris@43 467 jb L_clip_window
Chris@43 468
Chris@43 469 mov ecx, [esp+24]
Chris@43 470 mov esi,edi
Chris@43 471 sub esi,edx
Chris@43 472
Chris@43 473 sub ecx,3
Chris@43 474 mov al, [esi]
Chris@43 475 mov [edi],al
Chris@43 476 mov al, [esi+1]
Chris@43 477 mov dl, [esi+2]
Chris@43 478 add esi,3
Chris@43 479 mov [edi+1],al
Chris@43 480 mov [edi+2],dl
Chris@43 481 add edi,3
Chris@43 482 rep movsb
Chris@43 483
Chris@43 484 mov esi, [esp+44]
Chris@43 485 jmp L_while_test
Chris@43 486
Chris@43 487 ALIGN 4
Chris@43 488 L_check_dist_one:
Chris@43 489 cmp edx,1
Chris@43 490 jne L_check_window
Chris@43 491 cmp [esp+40],edi
Chris@43 492 je L_check_window
Chris@43 493
Chris@43 494 dec edi
Chris@43 495 mov ecx, [esp+24]
Chris@43 496 mov al, [edi]
Chris@43 497 sub ecx,3
Chris@43 498
Chris@43 499 mov [edi+1],al
Chris@43 500 mov [edi+2],al
Chris@43 501 mov [edi+3],al
Chris@43 502 add edi,4
Chris@43 503 rep stosb
Chris@43 504
Chris@43 505 jmp L_while_test
Chris@43 506
Chris@43 507 ALIGN 4
Chris@43 508 L_test_for_second_level_length:
Chris@43 509
Chris@43 510
Chris@43 511
Chris@43 512
Chris@43 513 test al,64
Chris@43 514 jnz L_test_for_end_of_block
Chris@43 515
Chris@43 516 mov eax,1
Chris@43 517 shl eax,cl
Chris@43 518 dec eax
Chris@43 519 and eax,ebp
Chris@43 520 add eax,edx
Chris@43 521 mov edx, [esp+8]
Chris@43 522 mov eax, [edx+eax*4]
Chris@43 523 jmp L_dolen
Chris@43 524
Chris@43 525 ALIGN 4
Chris@43 526 L_test_for_second_level_dist:
Chris@43 527
Chris@43 528
Chris@43 529
Chris@43 530
Chris@43 531 test al,64
Chris@43 532 jnz L_invalid_distance_code
Chris@43 533
Chris@43 534 mov eax,1
Chris@43 535 shl eax,cl
Chris@43 536 dec eax
Chris@43 537 and eax,ebp
Chris@43 538 add eax,edx
Chris@43 539 mov edx, [esp+12]
Chris@43 540 mov eax, [edx+eax*4]
Chris@43 541 jmp L_dodist
Chris@43 542
Chris@43 543 ALIGN 4
Chris@43 544 L_clip_window:
Chris@43 545 ; 721 "inffast.S"
Chris@43 546 mov ecx,eax
Chris@43 547 mov eax, [esp+52]
Chris@43 548 neg ecx
Chris@43 549 mov esi, [esp+56]
Chris@43 550
Chris@43 551 cmp eax,edx
Chris@43 552 jb L_invalid_distance_too_far
Chris@43 553
Chris@43 554 add ecx,edx
Chris@43 555 cmp dword ptr [esp+48],0
Chris@43 556 jne L_wrap_around_window
Chris@43 557
Chris@43 558 sub eax,ecx
Chris@43 559 add esi,eax
Chris@43 560 ; 749 "inffast.S"
Chris@43 561 mov eax, [esp+24]
Chris@43 562 cmp eax,ecx
Chris@43 563 jbe L_do_copy1
Chris@43 564
Chris@43 565 sub eax,ecx
Chris@43 566 rep movsb
Chris@43 567 mov esi,edi
Chris@43 568 sub esi,edx
Chris@43 569 jmp L_do_copy1
Chris@43 570
Chris@43 571 cmp eax,ecx
Chris@43 572 jbe L_do_copy1
Chris@43 573
Chris@43 574 sub eax,ecx
Chris@43 575 rep movsb
Chris@43 576 mov esi,edi
Chris@43 577 sub esi,edx
Chris@43 578 jmp L_do_copy1
Chris@43 579
Chris@43 580 L_wrap_around_window:
Chris@43 581 ; 793 "inffast.S"
Chris@43 582 mov eax, [esp+48]
Chris@43 583 cmp ecx,eax
Chris@43 584 jbe L_contiguous_in_window
Chris@43 585
Chris@43 586 add esi, [esp+52]
Chris@43 587 add esi,eax
Chris@43 588 sub esi,ecx
Chris@43 589 sub ecx,eax
Chris@43 590
Chris@43 591
Chris@43 592 mov eax, [esp+24]
Chris@43 593 cmp eax,ecx
Chris@43 594 jbe L_do_copy1
Chris@43 595
Chris@43 596 sub eax,ecx
Chris@43 597 rep movsb
Chris@43 598 mov esi, [esp+56]
Chris@43 599 mov ecx, [esp+48]
Chris@43 600 cmp eax,ecx
Chris@43 601 jbe L_do_copy1
Chris@43 602
Chris@43 603 sub eax,ecx
Chris@43 604 rep movsb
Chris@43 605 mov esi,edi
Chris@43 606 sub esi,edx
Chris@43 607 jmp L_do_copy1
Chris@43 608
Chris@43 609 L_contiguous_in_window:
Chris@43 610 ; 836 "inffast.S"
Chris@43 611 add esi,eax
Chris@43 612 sub esi,ecx
Chris@43 613
Chris@43 614
Chris@43 615 mov eax, [esp+24]
Chris@43 616 cmp eax,ecx
Chris@43 617 jbe L_do_copy1
Chris@43 618
Chris@43 619 sub eax,ecx
Chris@43 620 rep movsb
Chris@43 621 mov esi,edi
Chris@43 622 sub esi,edx
Chris@43 623
Chris@43 624 L_do_copy1:
Chris@43 625 ; 862 "inffast.S"
Chris@43 626 mov ecx,eax
Chris@43 627 rep movsb
Chris@43 628
Chris@43 629 mov esi, [esp+44]
Chris@43 630 jmp L_while_test
Chris@43 631 ; 878 "inffast.S"
Chris@43 632 ALIGN 4
Chris@43 633 L_init_mmx:
Chris@43 634 emms
Chris@43 635
Chris@43 636
Chris@43 637
Chris@43 638
Chris@43 639
Chris@43 640 movd mm0,ebp
Chris@43 641 mov ebp,ebx
Chris@43 642 ; 896 "inffast.S"
Chris@43 643 movd mm4,dword ptr [esp+0]
Chris@43 644 movq mm3,mm4
Chris@43 645 movd mm5,dword ptr [esp+4]
Chris@43 646 movq mm2,mm5
Chris@43 647 pxor mm1,mm1
Chris@43 648 mov ebx, [esp+8]
Chris@43 649 jmp L_do_loop_mmx
Chris@43 650
Chris@43 651 ALIGN 4
Chris@43 652 L_do_loop_mmx:
Chris@43 653 psrlq mm0,mm1
Chris@43 654
Chris@43 655 cmp ebp,32
Chris@43 656 ja L_get_length_code_mmx
Chris@43 657
Chris@43 658 movd mm6,ebp
Chris@43 659 movd mm7,dword ptr [esi]
Chris@43 660 add esi,4
Chris@43 661 psllq mm7,mm6
Chris@43 662 add ebp,32
Chris@43 663 por mm0,mm7
Chris@43 664
Chris@43 665 L_get_length_code_mmx:
Chris@43 666 pand mm4,mm0
Chris@43 667 movd eax,mm4
Chris@43 668 movq mm4,mm3
Chris@43 669 mov eax, [ebx+eax*4]
Chris@43 670
Chris@43 671 L_dolen_mmx:
Chris@43 672 movzx ecx,ah
Chris@43 673 movd mm1,ecx
Chris@43 674 sub ebp,ecx
Chris@43 675
Chris@43 676 test al,al
Chris@43 677 jnz L_test_for_length_base_mmx
Chris@43 678
Chris@43 679 shr eax,16
Chris@43 680 stosb
Chris@43 681
Chris@43 682 L_while_test_mmx:
Chris@43 683
Chris@43 684
Chris@43 685 cmp [esp+16],edi
Chris@43 686 jbe L_break_loop
Chris@43 687
Chris@43 688 cmp [esp+20],esi
Chris@43 689 ja L_do_loop_mmx
Chris@43 690 jmp L_break_loop
Chris@43 691
Chris@43 692 L_test_for_length_base_mmx:
Chris@43 693
Chris@43 694 mov edx,eax
Chris@43 695 shr edx,16
Chris@43 696
Chris@43 697 test al,16
Chris@43 698 jz L_test_for_second_level_length_mmx
Chris@43 699 and eax,15
Chris@43 700 jz L_decode_distance_mmx
Chris@43 701
Chris@43 702 psrlq mm0,mm1
Chris@43 703 movd mm1,eax
Chris@43 704 movd ecx,mm0
Chris@43 705 sub ebp,eax
Chris@43 706 and ecx, [inflate_fast_mask+eax*4]
Chris@43 707 add edx,ecx
Chris@43 708
Chris@43 709 L_decode_distance_mmx:
Chris@43 710 psrlq mm0,mm1
Chris@43 711
Chris@43 712 cmp ebp,32
Chris@43 713 ja L_get_dist_code_mmx
Chris@43 714
Chris@43 715 movd mm6,ebp
Chris@43 716 movd mm7,dword ptr [esi]
Chris@43 717 add esi,4
Chris@43 718 psllq mm7,mm6
Chris@43 719 add ebp,32
Chris@43 720 por mm0,mm7
Chris@43 721
Chris@43 722 L_get_dist_code_mmx:
Chris@43 723 mov ebx, [esp+12]
Chris@43 724 pand mm5,mm0
Chris@43 725 movd eax,mm5
Chris@43 726 movq mm5,mm2
Chris@43 727 mov eax, [ebx+eax*4]
Chris@43 728
Chris@43 729 L_dodist_mmx:
Chris@43 730
Chris@43 731 movzx ecx,ah
Chris@43 732 mov ebx,eax
Chris@43 733 shr ebx,16
Chris@43 734 sub ebp,ecx
Chris@43 735 movd mm1,ecx
Chris@43 736
Chris@43 737 test al,16
Chris@43 738 jz L_test_for_second_level_dist_mmx
Chris@43 739 and eax,15
Chris@43 740 jz L_check_dist_one_mmx
Chris@43 741
Chris@43 742 L_add_bits_to_dist_mmx:
Chris@43 743 psrlq mm0,mm1
Chris@43 744 movd mm1,eax
Chris@43 745 movd ecx,mm0
Chris@43 746 sub ebp,eax
Chris@43 747 and ecx, [inflate_fast_mask+eax*4]
Chris@43 748 add ebx,ecx
Chris@43 749
Chris@43 750 L_check_window_mmx:
Chris@43 751 mov [esp+44],esi
Chris@43 752 mov eax,edi
Chris@43 753 sub eax, [esp+40]
Chris@43 754
Chris@43 755 cmp eax,ebx
Chris@43 756 jb L_clip_window_mmx
Chris@43 757
Chris@43 758 mov ecx,edx
Chris@43 759 mov esi,edi
Chris@43 760 sub esi,ebx
Chris@43 761
Chris@43 762 sub ecx,3
Chris@43 763 mov al, [esi]
Chris@43 764 mov [edi],al
Chris@43 765 mov al, [esi+1]
Chris@43 766 mov dl, [esi+2]
Chris@43 767 add esi,3
Chris@43 768 mov [edi+1],al
Chris@43 769 mov [edi+2],dl
Chris@43 770 add edi,3
Chris@43 771 rep movsb
Chris@43 772
Chris@43 773 mov esi, [esp+44]
Chris@43 774 mov ebx, [esp+8]
Chris@43 775 jmp L_while_test_mmx
Chris@43 776
Chris@43 777 ALIGN 4
Chris@43 778 L_check_dist_one_mmx:
Chris@43 779 cmp ebx,1
Chris@43 780 jne L_check_window_mmx
Chris@43 781 cmp [esp+40],edi
Chris@43 782 je L_check_window_mmx
Chris@43 783
Chris@43 784 dec edi
Chris@43 785 mov ecx,edx
Chris@43 786 mov al, [edi]
Chris@43 787 sub ecx,3
Chris@43 788
Chris@43 789 mov [edi+1],al
Chris@43 790 mov [edi+2],al
Chris@43 791 mov [edi+3],al
Chris@43 792 add edi,4
Chris@43 793 rep stosb
Chris@43 794
Chris@43 795 mov ebx, [esp+8]
Chris@43 796 jmp L_while_test_mmx
Chris@43 797
Chris@43 798 ALIGN 4
Chris@43 799 L_test_for_second_level_length_mmx:
Chris@43 800 test al,64
Chris@43 801 jnz L_test_for_end_of_block
Chris@43 802
Chris@43 803 and eax,15
Chris@43 804 psrlq mm0,mm1
Chris@43 805 movd ecx,mm0
Chris@43 806 and ecx, [inflate_fast_mask+eax*4]
Chris@43 807 add ecx,edx
Chris@43 808 mov eax, [ebx+ecx*4]
Chris@43 809 jmp L_dolen_mmx
Chris@43 810
Chris@43 811 ALIGN 4
Chris@43 812 L_test_for_second_level_dist_mmx:
Chris@43 813 test al,64
Chris@43 814 jnz L_invalid_distance_code
Chris@43 815
Chris@43 816 and eax,15
Chris@43 817 psrlq mm0,mm1
Chris@43 818 movd ecx,mm0
Chris@43 819 and ecx, [inflate_fast_mask+eax*4]
Chris@43 820 mov eax, [esp+12]
Chris@43 821 add ecx,ebx
Chris@43 822 mov eax, [eax+ecx*4]
Chris@43 823 jmp L_dodist_mmx
Chris@43 824
Chris@43 825 ALIGN 4
Chris@43 826 L_clip_window_mmx:
Chris@43 827
Chris@43 828 mov ecx,eax
Chris@43 829 mov eax, [esp+52]
Chris@43 830 neg ecx
Chris@43 831 mov esi, [esp+56]
Chris@43 832
Chris@43 833 cmp eax,ebx
Chris@43 834 jb L_invalid_distance_too_far
Chris@43 835
Chris@43 836 add ecx,ebx
Chris@43 837 cmp dword ptr [esp+48],0
Chris@43 838 jne L_wrap_around_window_mmx
Chris@43 839
Chris@43 840 sub eax,ecx
Chris@43 841 add esi,eax
Chris@43 842
Chris@43 843 cmp edx,ecx
Chris@43 844 jbe L_do_copy1_mmx
Chris@43 845
Chris@43 846 sub edx,ecx
Chris@43 847 rep movsb
Chris@43 848 mov esi,edi
Chris@43 849 sub esi,ebx
Chris@43 850 jmp L_do_copy1_mmx
Chris@43 851
Chris@43 852 cmp edx,ecx
Chris@43 853 jbe L_do_copy1_mmx
Chris@43 854
Chris@43 855 sub edx,ecx
Chris@43 856 rep movsb
Chris@43 857 mov esi,edi
Chris@43 858 sub esi,ebx
Chris@43 859 jmp L_do_copy1_mmx
Chris@43 860
Chris@43 861 L_wrap_around_window_mmx:
Chris@43 862
Chris@43 863 mov eax, [esp+48]
Chris@43 864 cmp ecx,eax
Chris@43 865 jbe L_contiguous_in_window_mmx
Chris@43 866
Chris@43 867 add esi, [esp+52]
Chris@43 868 add esi,eax
Chris@43 869 sub esi,ecx
Chris@43 870 sub ecx,eax
Chris@43 871
Chris@43 872
Chris@43 873 cmp edx,ecx
Chris@43 874 jbe L_do_copy1_mmx
Chris@43 875
Chris@43 876 sub edx,ecx
Chris@43 877 rep movsb
Chris@43 878 mov esi, [esp+56]
Chris@43 879 mov ecx, [esp+48]
Chris@43 880 cmp edx,ecx
Chris@43 881 jbe L_do_copy1_mmx
Chris@43 882
Chris@43 883 sub edx,ecx
Chris@43 884 rep movsb
Chris@43 885 mov esi,edi
Chris@43 886 sub esi,ebx
Chris@43 887 jmp L_do_copy1_mmx
Chris@43 888
Chris@43 889 L_contiguous_in_window_mmx:
Chris@43 890
Chris@43 891 add esi,eax
Chris@43 892 sub esi,ecx
Chris@43 893
Chris@43 894
Chris@43 895 cmp edx,ecx
Chris@43 896 jbe L_do_copy1_mmx
Chris@43 897
Chris@43 898 sub edx,ecx
Chris@43 899 rep movsb
Chris@43 900 mov esi,edi
Chris@43 901 sub esi,ebx
Chris@43 902
Chris@43 903 L_do_copy1_mmx:
Chris@43 904
Chris@43 905
Chris@43 906 mov ecx,edx
Chris@43 907 rep movsb
Chris@43 908
Chris@43 909 mov esi, [esp+44]
Chris@43 910 mov ebx, [esp+8]
Chris@43 911 jmp L_while_test_mmx
Chris@43 912 ; 1174 "inffast.S"
Chris@43 913 L_invalid_distance_code:
Chris@43 914
Chris@43 915
Chris@43 916
Chris@43 917
Chris@43 918
Chris@43 919 mov ecx, invalid_distance_code_msg
Chris@43 920 mov edx,INFLATE_MODE_BAD
Chris@43 921 jmp L_update_stream_state
Chris@43 922
Chris@43 923 L_test_for_end_of_block:
Chris@43 924
Chris@43 925
Chris@43 926
Chris@43 927
Chris@43 928
Chris@43 929 test al,32
Chris@43 930 jz L_invalid_literal_length_code
Chris@43 931
Chris@43 932 mov ecx,0
Chris@43 933 mov edx,INFLATE_MODE_TYPE
Chris@43 934 jmp L_update_stream_state
Chris@43 935
Chris@43 936 L_invalid_literal_length_code:
Chris@43 937
Chris@43 938
Chris@43 939
Chris@43 940
Chris@43 941
Chris@43 942 mov ecx, invalid_literal_length_code_msg
Chris@43 943 mov edx,INFLATE_MODE_BAD
Chris@43 944 jmp L_update_stream_state
Chris@43 945
Chris@43 946 L_invalid_distance_too_far:
Chris@43 947
Chris@43 948
Chris@43 949
Chris@43 950 mov esi, [esp+44]
Chris@43 951 mov ecx, invalid_distance_too_far_msg
Chris@43 952 mov edx,INFLATE_MODE_BAD
Chris@43 953 jmp L_update_stream_state
Chris@43 954
Chris@43 955 L_update_stream_state:
Chris@43 956
Chris@43 957 mov eax, [esp+88]
Chris@43 958 test ecx,ecx
Chris@43 959 jz L_skip_msg
Chris@43 960 mov [eax+24],ecx
Chris@43 961 L_skip_msg:
Chris@43 962 mov eax, [eax+28]
Chris@43 963 mov [eax+mode_state],edx
Chris@43 964 jmp L_break_loop
Chris@43 965
Chris@43 966 ALIGN 4
Chris@43 967 L_break_loop:
Chris@43 968 ; 1243 "inffast.S"
Chris@43 969 cmp dword ptr [inflate_fast_use_mmx],2
Chris@43 970 jne L_update_next_in
Chris@43 971
Chris@43 972
Chris@43 973
Chris@43 974 mov ebx,ebp
Chris@43 975
Chris@43 976 L_update_next_in:
Chris@43 977 ; 1266 "inffast.S"
Chris@43 978 mov eax, [esp+88]
Chris@43 979 mov ecx,ebx
Chris@43 980 mov edx, [eax+28]
Chris@43 981 shr ecx,3
Chris@43 982 sub esi,ecx
Chris@43 983 shl ecx,3
Chris@43 984 sub ebx,ecx
Chris@43 985 mov [eax+12],edi
Chris@43 986 mov [edx+bits_state],ebx
Chris@43 987 mov ecx,ebx
Chris@43 988
Chris@43 989 lea ebx, [esp+28]
Chris@43 990 cmp [esp+20],ebx
Chris@43 991 jne L_buf_not_used
Chris@43 992
Chris@43 993 sub esi,ebx
Chris@43 994 mov ebx, [eax+0]
Chris@43 995 mov [esp+20],ebx
Chris@43 996 add esi,ebx
Chris@43 997 mov ebx, [eax+4]
Chris@43 998 sub ebx,11
Chris@43 999 add [esp+20],ebx
Chris@43 1000
Chris@43 1001 L_buf_not_used:
Chris@43 1002 mov [eax+0],esi
Chris@43 1003
Chris@43 1004 mov ebx,1
Chris@43 1005 shl ebx,cl
Chris@43 1006 dec ebx
Chris@43 1007
Chris@43 1008
Chris@43 1009
Chris@43 1010
Chris@43 1011
Chris@43 1012 cmp dword ptr [inflate_fast_use_mmx],2
Chris@43 1013 jne L_update_hold
Chris@43 1014
Chris@43 1015
Chris@43 1016
Chris@43 1017 psrlq mm0,mm1
Chris@43 1018 movd ebp,mm0
Chris@43 1019
Chris@43 1020 emms
Chris@43 1021
Chris@43 1022 L_update_hold:
Chris@43 1023
Chris@43 1024
Chris@43 1025
Chris@43 1026 and ebp,ebx
Chris@43 1027 mov [edx+hold_state],ebp
Chris@43 1028
Chris@43 1029
Chris@43 1030
Chris@43 1031
Chris@43 1032 mov ebx, [esp+20]
Chris@43 1033 cmp ebx,esi
Chris@43 1034 jbe L_last_is_smaller
Chris@43 1035
Chris@43 1036 sub ebx,esi
Chris@43 1037 add ebx,11
Chris@43 1038 mov [eax+4],ebx
Chris@43 1039 jmp L_fixup_out
Chris@43 1040 L_last_is_smaller:
Chris@43 1041 sub esi,ebx
Chris@43 1042 neg esi
Chris@43 1043 add esi,11
Chris@43 1044 mov [eax+4],esi
Chris@43 1045
Chris@43 1046
Chris@43 1047
Chris@43 1048
Chris@43 1049 L_fixup_out:
Chris@43 1050
Chris@43 1051 mov ebx, [esp+16]
Chris@43 1052 cmp ebx,edi
Chris@43 1053 jbe L_end_is_smaller
Chris@43 1054
Chris@43 1055 sub ebx,edi
Chris@43 1056 add ebx,257
Chris@43 1057 mov [eax+16],ebx
Chris@43 1058 jmp L_done
Chris@43 1059 L_end_is_smaller:
Chris@43 1060 sub edi,ebx
Chris@43 1061 neg edi
Chris@43 1062 add edi,257
Chris@43 1063 mov [eax+16],edi
Chris@43 1064
Chris@43 1065
Chris@43 1066
Chris@43 1067
Chris@43 1068
Chris@43 1069 L_done:
Chris@43 1070 add esp,64
Chris@43 1071 popfd
Chris@43 1072 pop ebx
Chris@43 1073 pop ebp
Chris@43 1074 pop esi
Chris@43 1075 pop edi
Chris@43 1076 ret
Chris@43 1077 _inflate_fast endp
Chris@43 1078
Chris@43 1079 _TEXT ends
Chris@43 1080 end