annotate src/zlib-1.2.7/contrib/masmx86/inffas32.asm @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
Chris@4 2 ; *
Chris@4 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
Chris@4 4 ; *
Chris@4 5 ; * Copyright (C) 1995-2003 Mark Adler
Chris@4 6 ; * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 7 ; *
Chris@4 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@4 9 ; * Please use the copyright conditions above.
Chris@4 10 ; *
Chris@4 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@4 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@4 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@4 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@4 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@4 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@4 17 ; * versions of this and inffast.S can be found at
Chris@4 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@4 19 ; *
Chris@4 20 ; * 2005 : modification by Gilles Vollant
Chris@4 21 ; */
Chris@4 22 ; For Visual C++ 4.x and higher and ML 6.x and higher
Chris@4 23 ; ml.exe is in directory \MASM611C of Win95 DDK
Chris@4 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
Chris@4 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
Chris@4 26 ;
Chris@4 27 ;
Chris@4 28 ; compile with command line option
Chris@4 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
Chris@4 30
Chris@4 31 ; if you define NO_GZIP (see inflate.h), compile with
Chris@4 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
Chris@4 33
Chris@4 34
Chris@4 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
Chris@4 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
Chris@4 37 ; in inflate_state in inflate.h)
Chris@4 38 zlib1222sup equ 8
Chris@4 39
Chris@4 40
Chris@4 41 IFDEF GUNZIP
Chris@4 42 INFLATE_MODE_TYPE equ 11
Chris@4 43 INFLATE_MODE_BAD equ 26
Chris@4 44 ELSE
Chris@4 45 IFNDEF NO_GUNZIP
Chris@4 46 INFLATE_MODE_TYPE equ 11
Chris@4 47 INFLATE_MODE_BAD equ 26
Chris@4 48 ELSE
Chris@4 49 INFLATE_MODE_TYPE equ 3
Chris@4 50 INFLATE_MODE_BAD equ 17
Chris@4 51 ENDIF
Chris@4 52 ENDIF
Chris@4 53
Chris@4 54
Chris@4 55 ; 75 "inffast.S"
Chris@4 56 ;FILE "inffast.S"
Chris@4 57
Chris@4 58 ;;;GLOBAL _inflate_fast
Chris@4 59
Chris@4 60 ;;;SECTION .text
Chris@4 61
Chris@4 62
Chris@4 63
Chris@4 64 .586p
Chris@4 65 .mmx
Chris@4 66
Chris@4 67 name inflate_fast_x86
Chris@4 68 .MODEL FLAT
Chris@4 69
Chris@4 70 _DATA segment
Chris@4 71 inflate_fast_use_mmx:
Chris@4 72 dd 1
Chris@4 73
Chris@4 74
Chris@4 75 _TEXT segment
Chris@4 76
Chris@4 77
Chris@4 78
Chris@4 79 ALIGN 4
Chris@4 80 db 'Fast decoding Code from Chris Anderson'
Chris@4 81 db 0
Chris@4 82
Chris@4 83 ALIGN 4
Chris@4 84 invalid_literal_length_code_msg:
Chris@4 85 db 'invalid literal/length code'
Chris@4 86 db 0
Chris@4 87
Chris@4 88 ALIGN 4
Chris@4 89 invalid_distance_code_msg:
Chris@4 90 db 'invalid distance code'
Chris@4 91 db 0
Chris@4 92
Chris@4 93 ALIGN 4
Chris@4 94 invalid_distance_too_far_msg:
Chris@4 95 db 'invalid distance too far back'
Chris@4 96 db 0
Chris@4 97
Chris@4 98
Chris@4 99 ALIGN 4
Chris@4 100 inflate_fast_mask:
Chris@4 101 dd 0
Chris@4 102 dd 1
Chris@4 103 dd 3
Chris@4 104 dd 7
Chris@4 105 dd 15
Chris@4 106 dd 31
Chris@4 107 dd 63
Chris@4 108 dd 127
Chris@4 109 dd 255
Chris@4 110 dd 511
Chris@4 111 dd 1023
Chris@4 112 dd 2047
Chris@4 113 dd 4095
Chris@4 114 dd 8191
Chris@4 115 dd 16383
Chris@4 116 dd 32767
Chris@4 117 dd 65535
Chris@4 118 dd 131071
Chris@4 119 dd 262143
Chris@4 120 dd 524287
Chris@4 121 dd 1048575
Chris@4 122 dd 2097151
Chris@4 123 dd 4194303
Chris@4 124 dd 8388607
Chris@4 125 dd 16777215
Chris@4 126 dd 33554431
Chris@4 127 dd 67108863
Chris@4 128 dd 134217727
Chris@4 129 dd 268435455
Chris@4 130 dd 536870911
Chris@4 131 dd 1073741823
Chris@4 132 dd 2147483647
Chris@4 133 dd 4294967295
Chris@4 134
Chris@4 135
Chris@4 136 mode_state equ 0 ;/* state->mode */
Chris@4 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
Chris@4 138 write_state equ (36+4+zlib1222sup) ;/* state->write */
Chris@4 139 window_state equ (40+4+zlib1222sup) ;/* state->window */
Chris@4 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
Chris@4 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
Chris@4 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
Chris@4 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
Chris@4 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
Chris@4 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
Chris@4 146
Chris@4 147
Chris@4 148 ;;SECTION .text
Chris@4 149 ; 205 "inffast.S"
Chris@4 150 ;GLOBAL inflate_fast_use_mmx
Chris@4 151
Chris@4 152 ;SECTION .data
Chris@4 153
Chris@4 154
Chris@4 155 ; GLOBAL inflate_fast_use_mmx:object
Chris@4 156 ;.size inflate_fast_use_mmx, 4
Chris@4 157 ; 226 "inffast.S"
Chris@4 158 ;SECTION .text
Chris@4 159
Chris@4 160 ALIGN 4
Chris@4 161 _inflate_fast proc near
Chris@4 162 .FPO (16, 4, 0, 0, 1, 0)
Chris@4 163 push edi
Chris@4 164 push esi
Chris@4 165 push ebp
Chris@4 166 push ebx
Chris@4 167 pushfd
Chris@4 168 sub esp,64
Chris@4 169 cld
Chris@4 170
Chris@4 171
Chris@4 172
Chris@4 173
Chris@4 174 mov esi, [esp+88]
Chris@4 175 mov edi, [esi+28]
Chris@4 176
Chris@4 177
Chris@4 178
Chris@4 179
Chris@4 180
Chris@4 181
Chris@4 182
Chris@4 183 mov edx, [esi+4]
Chris@4 184 mov eax, [esi+0]
Chris@4 185
Chris@4 186 add edx,eax
Chris@4 187 sub edx,11
Chris@4 188
Chris@4 189 mov [esp+44],eax
Chris@4 190 mov [esp+20],edx
Chris@4 191
Chris@4 192 mov ebp, [esp+92]
Chris@4 193 mov ecx, [esi+16]
Chris@4 194 mov ebx, [esi+12]
Chris@4 195
Chris@4 196 sub ebp,ecx
Chris@4 197 neg ebp
Chris@4 198 add ebp,ebx
Chris@4 199
Chris@4 200 sub ecx,257
Chris@4 201 add ecx,ebx
Chris@4 202
Chris@4 203 mov [esp+60],ebx
Chris@4 204 mov [esp+40],ebp
Chris@4 205 mov [esp+16],ecx
Chris@4 206 ; 285 "inffast.S"
Chris@4 207 mov eax, [edi+lencode_state]
Chris@4 208 mov ecx, [edi+distcode_state]
Chris@4 209
Chris@4 210 mov [esp+8],eax
Chris@4 211 mov [esp+12],ecx
Chris@4 212
Chris@4 213 mov eax,1
Chris@4 214 mov ecx, [edi+lenbits_state]
Chris@4 215 shl eax,cl
Chris@4 216 dec eax
Chris@4 217 mov [esp+0],eax
Chris@4 218
Chris@4 219 mov eax,1
Chris@4 220 mov ecx, [edi+distbits_state]
Chris@4 221 shl eax,cl
Chris@4 222 dec eax
Chris@4 223 mov [esp+4],eax
Chris@4 224
Chris@4 225 mov eax, [edi+wsize_state]
Chris@4 226 mov ecx, [edi+write_state]
Chris@4 227 mov edx, [edi+window_state]
Chris@4 228
Chris@4 229 mov [esp+52],eax
Chris@4 230 mov [esp+48],ecx
Chris@4 231 mov [esp+56],edx
Chris@4 232
Chris@4 233 mov ebp, [edi+hold_state]
Chris@4 234 mov ebx, [edi+bits_state]
Chris@4 235 ; 321 "inffast.S"
Chris@4 236 mov esi, [esp+44]
Chris@4 237 mov ecx, [esp+20]
Chris@4 238 cmp ecx,esi
Chris@4 239 ja L_align_long
Chris@4 240
Chris@4 241 add ecx,11
Chris@4 242 sub ecx,esi
Chris@4 243 mov eax,12
Chris@4 244 sub eax,ecx
Chris@4 245 lea edi, [esp+28]
Chris@4 246 rep movsb
Chris@4 247 mov ecx,eax
Chris@4 248 xor eax,eax
Chris@4 249 rep stosb
Chris@4 250 lea esi, [esp+28]
Chris@4 251 mov [esp+20],esi
Chris@4 252 jmp L_is_aligned
Chris@4 253
Chris@4 254
Chris@4 255 L_align_long:
Chris@4 256 test esi,3
Chris@4 257 jz L_is_aligned
Chris@4 258 xor eax,eax
Chris@4 259 mov al, [esi]
Chris@4 260 inc esi
Chris@4 261 mov ecx,ebx
Chris@4 262 add ebx,8
Chris@4 263 shl eax,cl
Chris@4 264 or ebp,eax
Chris@4 265 jmp L_align_long
Chris@4 266
Chris@4 267 L_is_aligned:
Chris@4 268 mov edi, [esp+60]
Chris@4 269 ; 366 "inffast.S"
Chris@4 270 L_check_mmx:
Chris@4 271 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 272 je L_init_mmx
Chris@4 273 ja L_do_loop
Chris@4 274
Chris@4 275 push eax
Chris@4 276 push ebx
Chris@4 277 push ecx
Chris@4 278 push edx
Chris@4 279 pushfd
Chris@4 280 mov eax, [esp]
Chris@4 281 xor dword ptr [esp],0200000h
Chris@4 282
Chris@4 283
Chris@4 284
Chris@4 285
Chris@4 286 popfd
Chris@4 287 pushfd
Chris@4 288 pop edx
Chris@4 289 xor edx,eax
Chris@4 290 jz L_dont_use_mmx
Chris@4 291 xor eax,eax
Chris@4 292 cpuid
Chris@4 293 cmp ebx,0756e6547h
Chris@4 294 jne L_dont_use_mmx
Chris@4 295 cmp ecx,06c65746eh
Chris@4 296 jne L_dont_use_mmx
Chris@4 297 cmp edx,049656e69h
Chris@4 298 jne L_dont_use_mmx
Chris@4 299 mov eax,1
Chris@4 300 cpuid
Chris@4 301 shr eax,8
Chris@4 302 and eax,15
Chris@4 303 cmp eax,6
Chris@4 304 jne L_dont_use_mmx
Chris@4 305 test edx,0800000h
Chris@4 306 jnz L_use_mmx
Chris@4 307 jmp L_dont_use_mmx
Chris@4 308 L_use_mmx:
Chris@4 309 mov dword ptr [inflate_fast_use_mmx],2
Chris@4 310 jmp L_check_mmx_pop
Chris@4 311 L_dont_use_mmx:
Chris@4 312 mov dword ptr [inflate_fast_use_mmx],3
Chris@4 313 L_check_mmx_pop:
Chris@4 314 pop edx
Chris@4 315 pop ecx
Chris@4 316 pop ebx
Chris@4 317 pop eax
Chris@4 318 jmp L_check_mmx
Chris@4 319 ; 426 "inffast.S"
Chris@4 320 ALIGN 4
Chris@4 321 L_do_loop:
Chris@4 322 ; 437 "inffast.S"
Chris@4 323 cmp bl,15
Chris@4 324 ja L_get_length_code
Chris@4 325
Chris@4 326 xor eax,eax
Chris@4 327 lodsw
Chris@4 328 mov cl,bl
Chris@4 329 add bl,16
Chris@4 330 shl eax,cl
Chris@4 331 or ebp,eax
Chris@4 332
Chris@4 333 L_get_length_code:
Chris@4 334 mov edx, [esp+0]
Chris@4 335 mov ecx, [esp+8]
Chris@4 336 and edx,ebp
Chris@4 337 mov eax, [ecx+edx*4]
Chris@4 338
Chris@4 339 L_dolen:
Chris@4 340
Chris@4 341
Chris@4 342
Chris@4 343
Chris@4 344
Chris@4 345
Chris@4 346 mov cl,ah
Chris@4 347 sub bl,ah
Chris@4 348 shr ebp,cl
Chris@4 349
Chris@4 350
Chris@4 351
Chris@4 352
Chris@4 353
Chris@4 354
Chris@4 355 test al,al
Chris@4 356 jnz L_test_for_length_base
Chris@4 357
Chris@4 358 shr eax,16
Chris@4 359 stosb
Chris@4 360
Chris@4 361 L_while_test:
Chris@4 362
Chris@4 363
Chris@4 364 cmp [esp+16],edi
Chris@4 365 jbe L_break_loop
Chris@4 366
Chris@4 367 cmp [esp+20],esi
Chris@4 368 ja L_do_loop
Chris@4 369 jmp L_break_loop
Chris@4 370
Chris@4 371 L_test_for_length_base:
Chris@4 372 ; 502 "inffast.S"
Chris@4 373 mov edx,eax
Chris@4 374 shr edx,16
Chris@4 375 mov cl,al
Chris@4 376
Chris@4 377 test al,16
Chris@4 378 jz L_test_for_second_level_length
Chris@4 379 and cl,15
Chris@4 380 jz L_save_len
Chris@4 381 cmp bl,cl
Chris@4 382 jae L_add_bits_to_len
Chris@4 383
Chris@4 384 mov ch,cl
Chris@4 385 xor eax,eax
Chris@4 386 lodsw
Chris@4 387 mov cl,bl
Chris@4 388 add bl,16
Chris@4 389 shl eax,cl
Chris@4 390 or ebp,eax
Chris@4 391 mov cl,ch
Chris@4 392
Chris@4 393 L_add_bits_to_len:
Chris@4 394 mov eax,1
Chris@4 395 shl eax,cl
Chris@4 396 dec eax
Chris@4 397 sub bl,cl
Chris@4 398 and eax,ebp
Chris@4 399 shr ebp,cl
Chris@4 400 add edx,eax
Chris@4 401
Chris@4 402 L_save_len:
Chris@4 403 mov [esp+24],edx
Chris@4 404
Chris@4 405
Chris@4 406 L_decode_distance:
Chris@4 407 ; 549 "inffast.S"
Chris@4 408 cmp bl,15
Chris@4 409 ja L_get_distance_code
Chris@4 410
Chris@4 411 xor eax,eax
Chris@4 412 lodsw
Chris@4 413 mov cl,bl
Chris@4 414 add bl,16
Chris@4 415 shl eax,cl
Chris@4 416 or ebp,eax
Chris@4 417
Chris@4 418 L_get_distance_code:
Chris@4 419 mov edx, [esp+4]
Chris@4 420 mov ecx, [esp+12]
Chris@4 421 and edx,ebp
Chris@4 422 mov eax, [ecx+edx*4]
Chris@4 423
Chris@4 424
Chris@4 425 L_dodist:
Chris@4 426 mov edx,eax
Chris@4 427 shr edx,16
Chris@4 428 mov cl,ah
Chris@4 429 sub bl,ah
Chris@4 430 shr ebp,cl
Chris@4 431 ; 584 "inffast.S"
Chris@4 432 mov cl,al
Chris@4 433
Chris@4 434 test al,16
Chris@4 435 jz L_test_for_second_level_dist
Chris@4 436 and cl,15
Chris@4 437 jz L_check_dist_one
Chris@4 438 cmp bl,cl
Chris@4 439 jae L_add_bits_to_dist
Chris@4 440
Chris@4 441 mov ch,cl
Chris@4 442 xor eax,eax
Chris@4 443 lodsw
Chris@4 444 mov cl,bl
Chris@4 445 add bl,16
Chris@4 446 shl eax,cl
Chris@4 447 or ebp,eax
Chris@4 448 mov cl,ch
Chris@4 449
Chris@4 450 L_add_bits_to_dist:
Chris@4 451 mov eax,1
Chris@4 452 shl eax,cl
Chris@4 453 dec eax
Chris@4 454 sub bl,cl
Chris@4 455 and eax,ebp
Chris@4 456 shr ebp,cl
Chris@4 457 add edx,eax
Chris@4 458 jmp L_check_window
Chris@4 459
Chris@4 460 L_check_window:
Chris@4 461 ; 625 "inffast.S"
Chris@4 462 mov [esp+44],esi
Chris@4 463 mov eax,edi
Chris@4 464 sub eax, [esp+40]
Chris@4 465
Chris@4 466 cmp eax,edx
Chris@4 467 jb L_clip_window
Chris@4 468
Chris@4 469 mov ecx, [esp+24]
Chris@4 470 mov esi,edi
Chris@4 471 sub esi,edx
Chris@4 472
Chris@4 473 sub ecx,3
Chris@4 474 mov al, [esi]
Chris@4 475 mov [edi],al
Chris@4 476 mov al, [esi+1]
Chris@4 477 mov dl, [esi+2]
Chris@4 478 add esi,3
Chris@4 479 mov [edi+1],al
Chris@4 480 mov [edi+2],dl
Chris@4 481 add edi,3
Chris@4 482 rep movsb
Chris@4 483
Chris@4 484 mov esi, [esp+44]
Chris@4 485 jmp L_while_test
Chris@4 486
Chris@4 487 ALIGN 4
Chris@4 488 L_check_dist_one:
Chris@4 489 cmp edx,1
Chris@4 490 jne L_check_window
Chris@4 491 cmp [esp+40],edi
Chris@4 492 je L_check_window
Chris@4 493
Chris@4 494 dec edi
Chris@4 495 mov ecx, [esp+24]
Chris@4 496 mov al, [edi]
Chris@4 497 sub ecx,3
Chris@4 498
Chris@4 499 mov [edi+1],al
Chris@4 500 mov [edi+2],al
Chris@4 501 mov [edi+3],al
Chris@4 502 add edi,4
Chris@4 503 rep stosb
Chris@4 504
Chris@4 505 jmp L_while_test
Chris@4 506
Chris@4 507 ALIGN 4
Chris@4 508 L_test_for_second_level_length:
Chris@4 509
Chris@4 510
Chris@4 511
Chris@4 512
Chris@4 513 test al,64
Chris@4 514 jnz L_test_for_end_of_block
Chris@4 515
Chris@4 516 mov eax,1
Chris@4 517 shl eax,cl
Chris@4 518 dec eax
Chris@4 519 and eax,ebp
Chris@4 520 add eax,edx
Chris@4 521 mov edx, [esp+8]
Chris@4 522 mov eax, [edx+eax*4]
Chris@4 523 jmp L_dolen
Chris@4 524
Chris@4 525 ALIGN 4
Chris@4 526 L_test_for_second_level_dist:
Chris@4 527
Chris@4 528
Chris@4 529
Chris@4 530
Chris@4 531 test al,64
Chris@4 532 jnz L_invalid_distance_code
Chris@4 533
Chris@4 534 mov eax,1
Chris@4 535 shl eax,cl
Chris@4 536 dec eax
Chris@4 537 and eax,ebp
Chris@4 538 add eax,edx
Chris@4 539 mov edx, [esp+12]
Chris@4 540 mov eax, [edx+eax*4]
Chris@4 541 jmp L_dodist
Chris@4 542
Chris@4 543 ALIGN 4
Chris@4 544 L_clip_window:
Chris@4 545 ; 721 "inffast.S"
Chris@4 546 mov ecx,eax
Chris@4 547 mov eax, [esp+52]
Chris@4 548 neg ecx
Chris@4 549 mov esi, [esp+56]
Chris@4 550
Chris@4 551 cmp eax,edx
Chris@4 552 jb L_invalid_distance_too_far
Chris@4 553
Chris@4 554 add ecx,edx
Chris@4 555 cmp dword ptr [esp+48],0
Chris@4 556 jne L_wrap_around_window
Chris@4 557
Chris@4 558 sub eax,ecx
Chris@4 559 add esi,eax
Chris@4 560 ; 749 "inffast.S"
Chris@4 561 mov eax, [esp+24]
Chris@4 562 cmp eax,ecx
Chris@4 563 jbe L_do_copy1
Chris@4 564
Chris@4 565 sub eax,ecx
Chris@4 566 rep movsb
Chris@4 567 mov esi,edi
Chris@4 568 sub esi,edx
Chris@4 569 jmp L_do_copy1
Chris@4 570
Chris@4 571 cmp eax,ecx
Chris@4 572 jbe L_do_copy1
Chris@4 573
Chris@4 574 sub eax,ecx
Chris@4 575 rep movsb
Chris@4 576 mov esi,edi
Chris@4 577 sub esi,edx
Chris@4 578 jmp L_do_copy1
Chris@4 579
Chris@4 580 L_wrap_around_window:
Chris@4 581 ; 793 "inffast.S"
Chris@4 582 mov eax, [esp+48]
Chris@4 583 cmp ecx,eax
Chris@4 584 jbe L_contiguous_in_window
Chris@4 585
Chris@4 586 add esi, [esp+52]
Chris@4 587 add esi,eax
Chris@4 588 sub esi,ecx
Chris@4 589 sub ecx,eax
Chris@4 590
Chris@4 591
Chris@4 592 mov eax, [esp+24]
Chris@4 593 cmp eax,ecx
Chris@4 594 jbe L_do_copy1
Chris@4 595
Chris@4 596 sub eax,ecx
Chris@4 597 rep movsb
Chris@4 598 mov esi, [esp+56]
Chris@4 599 mov ecx, [esp+48]
Chris@4 600 cmp eax,ecx
Chris@4 601 jbe L_do_copy1
Chris@4 602
Chris@4 603 sub eax,ecx
Chris@4 604 rep movsb
Chris@4 605 mov esi,edi
Chris@4 606 sub esi,edx
Chris@4 607 jmp L_do_copy1
Chris@4 608
Chris@4 609 L_contiguous_in_window:
Chris@4 610 ; 836 "inffast.S"
Chris@4 611 add esi,eax
Chris@4 612 sub esi,ecx
Chris@4 613
Chris@4 614
Chris@4 615 mov eax, [esp+24]
Chris@4 616 cmp eax,ecx
Chris@4 617 jbe L_do_copy1
Chris@4 618
Chris@4 619 sub eax,ecx
Chris@4 620 rep movsb
Chris@4 621 mov esi,edi
Chris@4 622 sub esi,edx
Chris@4 623
Chris@4 624 L_do_copy1:
Chris@4 625 ; 862 "inffast.S"
Chris@4 626 mov ecx,eax
Chris@4 627 rep movsb
Chris@4 628
Chris@4 629 mov esi, [esp+44]
Chris@4 630 jmp L_while_test
Chris@4 631 ; 878 "inffast.S"
Chris@4 632 ALIGN 4
Chris@4 633 L_init_mmx:
Chris@4 634 emms
Chris@4 635
Chris@4 636
Chris@4 637
Chris@4 638
Chris@4 639
Chris@4 640 movd mm0,ebp
Chris@4 641 mov ebp,ebx
Chris@4 642 ; 896 "inffast.S"
Chris@4 643 movd mm4,dword ptr [esp+0]
Chris@4 644 movq mm3,mm4
Chris@4 645 movd mm5,dword ptr [esp+4]
Chris@4 646 movq mm2,mm5
Chris@4 647 pxor mm1,mm1
Chris@4 648 mov ebx, [esp+8]
Chris@4 649 jmp L_do_loop_mmx
Chris@4 650
Chris@4 651 ALIGN 4
Chris@4 652 L_do_loop_mmx:
Chris@4 653 psrlq mm0,mm1
Chris@4 654
Chris@4 655 cmp ebp,32
Chris@4 656 ja L_get_length_code_mmx
Chris@4 657
Chris@4 658 movd mm6,ebp
Chris@4 659 movd mm7,dword ptr [esi]
Chris@4 660 add esi,4
Chris@4 661 psllq mm7,mm6
Chris@4 662 add ebp,32
Chris@4 663 por mm0,mm7
Chris@4 664
Chris@4 665 L_get_length_code_mmx:
Chris@4 666 pand mm4,mm0
Chris@4 667 movd eax,mm4
Chris@4 668 movq mm4,mm3
Chris@4 669 mov eax, [ebx+eax*4]
Chris@4 670
Chris@4 671 L_dolen_mmx:
Chris@4 672 movzx ecx,ah
Chris@4 673 movd mm1,ecx
Chris@4 674 sub ebp,ecx
Chris@4 675
Chris@4 676 test al,al
Chris@4 677 jnz L_test_for_length_base_mmx
Chris@4 678
Chris@4 679 shr eax,16
Chris@4 680 stosb
Chris@4 681
Chris@4 682 L_while_test_mmx:
Chris@4 683
Chris@4 684
Chris@4 685 cmp [esp+16],edi
Chris@4 686 jbe L_break_loop
Chris@4 687
Chris@4 688 cmp [esp+20],esi
Chris@4 689 ja L_do_loop_mmx
Chris@4 690 jmp L_break_loop
Chris@4 691
Chris@4 692 L_test_for_length_base_mmx:
Chris@4 693
Chris@4 694 mov edx,eax
Chris@4 695 shr edx,16
Chris@4 696
Chris@4 697 test al,16
Chris@4 698 jz L_test_for_second_level_length_mmx
Chris@4 699 and eax,15
Chris@4 700 jz L_decode_distance_mmx
Chris@4 701
Chris@4 702 psrlq mm0,mm1
Chris@4 703 movd mm1,eax
Chris@4 704 movd ecx,mm0
Chris@4 705 sub ebp,eax
Chris@4 706 and ecx, [inflate_fast_mask+eax*4]
Chris@4 707 add edx,ecx
Chris@4 708
Chris@4 709 L_decode_distance_mmx:
Chris@4 710 psrlq mm0,mm1
Chris@4 711
Chris@4 712 cmp ebp,32
Chris@4 713 ja L_get_dist_code_mmx
Chris@4 714
Chris@4 715 movd mm6,ebp
Chris@4 716 movd mm7,dword ptr [esi]
Chris@4 717 add esi,4
Chris@4 718 psllq mm7,mm6
Chris@4 719 add ebp,32
Chris@4 720 por mm0,mm7
Chris@4 721
Chris@4 722 L_get_dist_code_mmx:
Chris@4 723 mov ebx, [esp+12]
Chris@4 724 pand mm5,mm0
Chris@4 725 movd eax,mm5
Chris@4 726 movq mm5,mm2
Chris@4 727 mov eax, [ebx+eax*4]
Chris@4 728
Chris@4 729 L_dodist_mmx:
Chris@4 730
Chris@4 731 movzx ecx,ah
Chris@4 732 mov ebx,eax
Chris@4 733 shr ebx,16
Chris@4 734 sub ebp,ecx
Chris@4 735 movd mm1,ecx
Chris@4 736
Chris@4 737 test al,16
Chris@4 738 jz L_test_for_second_level_dist_mmx
Chris@4 739 and eax,15
Chris@4 740 jz L_check_dist_one_mmx
Chris@4 741
Chris@4 742 L_add_bits_to_dist_mmx:
Chris@4 743 psrlq mm0,mm1
Chris@4 744 movd mm1,eax
Chris@4 745 movd ecx,mm0
Chris@4 746 sub ebp,eax
Chris@4 747 and ecx, [inflate_fast_mask+eax*4]
Chris@4 748 add ebx,ecx
Chris@4 749
Chris@4 750 L_check_window_mmx:
Chris@4 751 mov [esp+44],esi
Chris@4 752 mov eax,edi
Chris@4 753 sub eax, [esp+40]
Chris@4 754
Chris@4 755 cmp eax,ebx
Chris@4 756 jb L_clip_window_mmx
Chris@4 757
Chris@4 758 mov ecx,edx
Chris@4 759 mov esi,edi
Chris@4 760 sub esi,ebx
Chris@4 761
Chris@4 762 sub ecx,3
Chris@4 763 mov al, [esi]
Chris@4 764 mov [edi],al
Chris@4 765 mov al, [esi+1]
Chris@4 766 mov dl, [esi+2]
Chris@4 767 add esi,3
Chris@4 768 mov [edi+1],al
Chris@4 769 mov [edi+2],dl
Chris@4 770 add edi,3
Chris@4 771 rep movsb
Chris@4 772
Chris@4 773 mov esi, [esp+44]
Chris@4 774 mov ebx, [esp+8]
Chris@4 775 jmp L_while_test_mmx
Chris@4 776
Chris@4 777 ALIGN 4
Chris@4 778 L_check_dist_one_mmx:
Chris@4 779 cmp ebx,1
Chris@4 780 jne L_check_window_mmx
Chris@4 781 cmp [esp+40],edi
Chris@4 782 je L_check_window_mmx
Chris@4 783
Chris@4 784 dec edi
Chris@4 785 mov ecx,edx
Chris@4 786 mov al, [edi]
Chris@4 787 sub ecx,3
Chris@4 788
Chris@4 789 mov [edi+1],al
Chris@4 790 mov [edi+2],al
Chris@4 791 mov [edi+3],al
Chris@4 792 add edi,4
Chris@4 793 rep stosb
Chris@4 794
Chris@4 795 mov ebx, [esp+8]
Chris@4 796 jmp L_while_test_mmx
Chris@4 797
Chris@4 798 ALIGN 4
Chris@4 799 L_test_for_second_level_length_mmx:
Chris@4 800 test al,64
Chris@4 801 jnz L_test_for_end_of_block
Chris@4 802
Chris@4 803 and eax,15
Chris@4 804 psrlq mm0,mm1
Chris@4 805 movd ecx,mm0
Chris@4 806 and ecx, [inflate_fast_mask+eax*4]
Chris@4 807 add ecx,edx
Chris@4 808 mov eax, [ebx+ecx*4]
Chris@4 809 jmp L_dolen_mmx
Chris@4 810
Chris@4 811 ALIGN 4
Chris@4 812 L_test_for_second_level_dist_mmx:
Chris@4 813 test al,64
Chris@4 814 jnz L_invalid_distance_code
Chris@4 815
Chris@4 816 and eax,15
Chris@4 817 psrlq mm0,mm1
Chris@4 818 movd ecx,mm0
Chris@4 819 and ecx, [inflate_fast_mask+eax*4]
Chris@4 820 mov eax, [esp+12]
Chris@4 821 add ecx,ebx
Chris@4 822 mov eax, [eax+ecx*4]
Chris@4 823 jmp L_dodist_mmx
Chris@4 824
Chris@4 825 ALIGN 4
Chris@4 826 L_clip_window_mmx:
Chris@4 827
Chris@4 828 mov ecx,eax
Chris@4 829 mov eax, [esp+52]
Chris@4 830 neg ecx
Chris@4 831 mov esi, [esp+56]
Chris@4 832
Chris@4 833 cmp eax,ebx
Chris@4 834 jb L_invalid_distance_too_far
Chris@4 835
Chris@4 836 add ecx,ebx
Chris@4 837 cmp dword ptr [esp+48],0
Chris@4 838 jne L_wrap_around_window_mmx
Chris@4 839
Chris@4 840 sub eax,ecx
Chris@4 841 add esi,eax
Chris@4 842
Chris@4 843 cmp edx,ecx
Chris@4 844 jbe L_do_copy1_mmx
Chris@4 845
Chris@4 846 sub edx,ecx
Chris@4 847 rep movsb
Chris@4 848 mov esi,edi
Chris@4 849 sub esi,ebx
Chris@4 850 jmp L_do_copy1_mmx
Chris@4 851
Chris@4 852 cmp edx,ecx
Chris@4 853 jbe L_do_copy1_mmx
Chris@4 854
Chris@4 855 sub edx,ecx
Chris@4 856 rep movsb
Chris@4 857 mov esi,edi
Chris@4 858 sub esi,ebx
Chris@4 859 jmp L_do_copy1_mmx
Chris@4 860
Chris@4 861 L_wrap_around_window_mmx:
Chris@4 862
Chris@4 863 mov eax, [esp+48]
Chris@4 864 cmp ecx,eax
Chris@4 865 jbe L_contiguous_in_window_mmx
Chris@4 866
Chris@4 867 add esi, [esp+52]
Chris@4 868 add esi,eax
Chris@4 869 sub esi,ecx
Chris@4 870 sub ecx,eax
Chris@4 871
Chris@4 872
Chris@4 873 cmp edx,ecx
Chris@4 874 jbe L_do_copy1_mmx
Chris@4 875
Chris@4 876 sub edx,ecx
Chris@4 877 rep movsb
Chris@4 878 mov esi, [esp+56]
Chris@4 879 mov ecx, [esp+48]
Chris@4 880 cmp edx,ecx
Chris@4 881 jbe L_do_copy1_mmx
Chris@4 882
Chris@4 883 sub edx,ecx
Chris@4 884 rep movsb
Chris@4 885 mov esi,edi
Chris@4 886 sub esi,ebx
Chris@4 887 jmp L_do_copy1_mmx
Chris@4 888
Chris@4 889 L_contiguous_in_window_mmx:
Chris@4 890
Chris@4 891 add esi,eax
Chris@4 892 sub esi,ecx
Chris@4 893
Chris@4 894
Chris@4 895 cmp edx,ecx
Chris@4 896 jbe L_do_copy1_mmx
Chris@4 897
Chris@4 898 sub edx,ecx
Chris@4 899 rep movsb
Chris@4 900 mov esi,edi
Chris@4 901 sub esi,ebx
Chris@4 902
Chris@4 903 L_do_copy1_mmx:
Chris@4 904
Chris@4 905
Chris@4 906 mov ecx,edx
Chris@4 907 rep movsb
Chris@4 908
Chris@4 909 mov esi, [esp+44]
Chris@4 910 mov ebx, [esp+8]
Chris@4 911 jmp L_while_test_mmx
Chris@4 912 ; 1174 "inffast.S"
Chris@4 913 L_invalid_distance_code:
Chris@4 914
Chris@4 915
Chris@4 916
Chris@4 917
Chris@4 918
Chris@4 919 mov ecx, invalid_distance_code_msg
Chris@4 920 mov edx,INFLATE_MODE_BAD
Chris@4 921 jmp L_update_stream_state
Chris@4 922
Chris@4 923 L_test_for_end_of_block:
Chris@4 924
Chris@4 925
Chris@4 926
Chris@4 927
Chris@4 928
Chris@4 929 test al,32
Chris@4 930 jz L_invalid_literal_length_code
Chris@4 931
Chris@4 932 mov ecx,0
Chris@4 933 mov edx,INFLATE_MODE_TYPE
Chris@4 934 jmp L_update_stream_state
Chris@4 935
Chris@4 936 L_invalid_literal_length_code:
Chris@4 937
Chris@4 938
Chris@4 939
Chris@4 940
Chris@4 941
Chris@4 942 mov ecx, invalid_literal_length_code_msg
Chris@4 943 mov edx,INFLATE_MODE_BAD
Chris@4 944 jmp L_update_stream_state
Chris@4 945
Chris@4 946 L_invalid_distance_too_far:
Chris@4 947
Chris@4 948
Chris@4 949
Chris@4 950 mov esi, [esp+44]
Chris@4 951 mov ecx, invalid_distance_too_far_msg
Chris@4 952 mov edx,INFLATE_MODE_BAD
Chris@4 953 jmp L_update_stream_state
Chris@4 954
Chris@4 955 L_update_stream_state:
Chris@4 956
Chris@4 957 mov eax, [esp+88]
Chris@4 958 test ecx,ecx
Chris@4 959 jz L_skip_msg
Chris@4 960 mov [eax+24],ecx
Chris@4 961 L_skip_msg:
Chris@4 962 mov eax, [eax+28]
Chris@4 963 mov [eax+mode_state],edx
Chris@4 964 jmp L_break_loop
Chris@4 965
Chris@4 966 ALIGN 4
Chris@4 967 L_break_loop:
Chris@4 968 ; 1243 "inffast.S"
Chris@4 969 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 970 jne L_update_next_in
Chris@4 971
Chris@4 972
Chris@4 973
Chris@4 974 mov ebx,ebp
Chris@4 975
Chris@4 976 L_update_next_in:
Chris@4 977 ; 1266 "inffast.S"
Chris@4 978 mov eax, [esp+88]
Chris@4 979 mov ecx,ebx
Chris@4 980 mov edx, [eax+28]
Chris@4 981 shr ecx,3
Chris@4 982 sub esi,ecx
Chris@4 983 shl ecx,3
Chris@4 984 sub ebx,ecx
Chris@4 985 mov [eax+12],edi
Chris@4 986 mov [edx+bits_state],ebx
Chris@4 987 mov ecx,ebx
Chris@4 988
Chris@4 989 lea ebx, [esp+28]
Chris@4 990 cmp [esp+20],ebx
Chris@4 991 jne L_buf_not_used
Chris@4 992
Chris@4 993 sub esi,ebx
Chris@4 994 mov ebx, [eax+0]
Chris@4 995 mov [esp+20],ebx
Chris@4 996 add esi,ebx
Chris@4 997 mov ebx, [eax+4]
Chris@4 998 sub ebx,11
Chris@4 999 add [esp+20],ebx
Chris@4 1000
Chris@4 1001 L_buf_not_used:
Chris@4 1002 mov [eax+0],esi
Chris@4 1003
Chris@4 1004 mov ebx,1
Chris@4 1005 shl ebx,cl
Chris@4 1006 dec ebx
Chris@4 1007
Chris@4 1008
Chris@4 1009
Chris@4 1010
Chris@4 1011
Chris@4 1012 cmp dword ptr [inflate_fast_use_mmx],2
Chris@4 1013 jne L_update_hold
Chris@4 1014
Chris@4 1015
Chris@4 1016
Chris@4 1017 psrlq mm0,mm1
Chris@4 1018 movd ebp,mm0
Chris@4 1019
Chris@4 1020 emms
Chris@4 1021
Chris@4 1022 L_update_hold:
Chris@4 1023
Chris@4 1024
Chris@4 1025
Chris@4 1026 and ebp,ebx
Chris@4 1027 mov [edx+hold_state],ebp
Chris@4 1028
Chris@4 1029
Chris@4 1030
Chris@4 1031
Chris@4 1032 mov ebx, [esp+20]
Chris@4 1033 cmp ebx,esi
Chris@4 1034 jbe L_last_is_smaller
Chris@4 1035
Chris@4 1036 sub ebx,esi
Chris@4 1037 add ebx,11
Chris@4 1038 mov [eax+4],ebx
Chris@4 1039 jmp L_fixup_out
Chris@4 1040 L_last_is_smaller:
Chris@4 1041 sub esi,ebx
Chris@4 1042 neg esi
Chris@4 1043 add esi,11
Chris@4 1044 mov [eax+4],esi
Chris@4 1045
Chris@4 1046
Chris@4 1047
Chris@4 1048
Chris@4 1049 L_fixup_out:
Chris@4 1050
Chris@4 1051 mov ebx, [esp+16]
Chris@4 1052 cmp ebx,edi
Chris@4 1053 jbe L_end_is_smaller
Chris@4 1054
Chris@4 1055 sub ebx,edi
Chris@4 1056 add ebx,257
Chris@4 1057 mov [eax+16],ebx
Chris@4 1058 jmp L_done
Chris@4 1059 L_end_is_smaller:
Chris@4 1060 sub edi,ebx
Chris@4 1061 neg edi
Chris@4 1062 add edi,257
Chris@4 1063 mov [eax+16],edi
Chris@4 1064
Chris@4 1065
Chris@4 1066
Chris@4 1067
Chris@4 1068
Chris@4 1069 L_done:
Chris@4 1070 add esp,64
Chris@4 1071 popfd
Chris@4 1072 pop ebx
Chris@4 1073 pop ebp
Chris@4 1074 pop esi
Chris@4 1075 pop edi
Chris@4 1076 ret
Chris@4 1077 _inflate_fast endp
Chris@4 1078
Chris@4 1079 _TEXT ends
Chris@4 1080 end