annotate src/zlib-1.2.8/contrib/masmx86/inffas32.asm @ 155:54abead6ecce

Opus for Windows (MSVC)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 25 Jan 2019 12:15:58 +0000
parents 5b4145a0d408
children
rev   line source
cannam@128 1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
cannam@128 2 ; *
cannam@128 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
cannam@128 4 ; *
cannam@128 5 ; * Copyright (C) 1995-2003 Mark Adler
cannam@128 6 ; * For conditions of distribution and use, see copyright notice in zlib.h
cannam@128 7 ; *
cannam@128 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
cannam@128 9 ; * Please use the copyright conditions above.
cannam@128 10 ; *
cannam@128 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
cannam@128 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
cannam@128 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
cannam@128 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
cannam@128 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
cannam@128 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
cannam@128 17 ; * versions of this and inffast.S can be found at
cannam@128 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
cannam@128 19 ; *
cannam@128 20 ; * 2005 : modification by Gilles Vollant
cannam@128 21 ; */
cannam@128 22 ; For Visual C++ 4.x and higher and ML 6.x and higher
cannam@128 23 ; ml.exe is in directory \MASM611C of Win95 DDK
cannam@128 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
cannam@128 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
cannam@128 26 ;
cannam@128 27 ;
cannam@128 28 ; compile with command line option
cannam@128 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
cannam@128 30
cannam@128 31 ; if you define NO_GZIP (see inflate.h), compile with
cannam@128 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
cannam@128 33
cannam@128 34
cannam@128 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
cannam@128 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
cannam@128 37 ; in inflate_state in inflate.h)
cannam@128 38 zlib1222sup equ 8
cannam@128 39
cannam@128 40
cannam@128 41 IFDEF GUNZIP
cannam@128 42 INFLATE_MODE_TYPE equ 11
cannam@128 43 INFLATE_MODE_BAD equ 26
cannam@128 44 ELSE
cannam@128 45 IFNDEF NO_GUNZIP
cannam@128 46 INFLATE_MODE_TYPE equ 11
cannam@128 47 INFLATE_MODE_BAD equ 26
cannam@128 48 ELSE
cannam@128 49 INFLATE_MODE_TYPE equ 3
cannam@128 50 INFLATE_MODE_BAD equ 17
cannam@128 51 ENDIF
cannam@128 52 ENDIF
cannam@128 53
cannam@128 54
cannam@128 55 ; 75 "inffast.S"
cannam@128 56 ;FILE "inffast.S"
cannam@128 57
cannam@128 58 ;;;GLOBAL _inflate_fast
cannam@128 59
cannam@128 60 ;;;SECTION .text
cannam@128 61
cannam@128 62
cannam@128 63
cannam@128 64 .586p
cannam@128 65 .mmx
cannam@128 66
cannam@128 67 name inflate_fast_x86
cannam@128 68 .MODEL FLAT
cannam@128 69
cannam@128 70 _DATA segment
cannam@128 71 inflate_fast_use_mmx:
cannam@128 72 dd 1
cannam@128 73
cannam@128 74
cannam@128 75 _TEXT segment
cannam@128 76
cannam@128 77
cannam@128 78
cannam@128 79 ALIGN 4
cannam@128 80 db 'Fast decoding Code from Chris Anderson'
cannam@128 81 db 0
cannam@128 82
cannam@128 83 ALIGN 4
cannam@128 84 invalid_literal_length_code_msg:
cannam@128 85 db 'invalid literal/length code'
cannam@128 86 db 0
cannam@128 87
cannam@128 88 ALIGN 4
cannam@128 89 invalid_distance_code_msg:
cannam@128 90 db 'invalid distance code'
cannam@128 91 db 0
cannam@128 92
cannam@128 93 ALIGN 4
cannam@128 94 invalid_distance_too_far_msg:
cannam@128 95 db 'invalid distance too far back'
cannam@128 96 db 0
cannam@128 97
cannam@128 98
cannam@128 99 ALIGN 4
cannam@128 100 inflate_fast_mask:
cannam@128 101 dd 0
cannam@128 102 dd 1
cannam@128 103 dd 3
cannam@128 104 dd 7
cannam@128 105 dd 15
cannam@128 106 dd 31
cannam@128 107 dd 63
cannam@128 108 dd 127
cannam@128 109 dd 255
cannam@128 110 dd 511
cannam@128 111 dd 1023
cannam@128 112 dd 2047
cannam@128 113 dd 4095
cannam@128 114 dd 8191
cannam@128 115 dd 16383
cannam@128 116 dd 32767
cannam@128 117 dd 65535
cannam@128 118 dd 131071
cannam@128 119 dd 262143
cannam@128 120 dd 524287
cannam@128 121 dd 1048575
cannam@128 122 dd 2097151
cannam@128 123 dd 4194303
cannam@128 124 dd 8388607
cannam@128 125 dd 16777215
cannam@128 126 dd 33554431
cannam@128 127 dd 67108863
cannam@128 128 dd 134217727
cannam@128 129 dd 268435455
cannam@128 130 dd 536870911
cannam@128 131 dd 1073741823
cannam@128 132 dd 2147483647
cannam@128 133 dd 4294967295
cannam@128 134
cannam@128 135
cannam@128 136 mode_state equ 0 ;/* state->mode */
cannam@128 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
cannam@128 138 write_state equ (36+4+zlib1222sup) ;/* state->write */
cannam@128 139 window_state equ (40+4+zlib1222sup) ;/* state->window */
cannam@128 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
cannam@128 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
cannam@128 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
cannam@128 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
cannam@128 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
cannam@128 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
cannam@128 146
cannam@128 147
cannam@128 148 ;;SECTION .text
cannam@128 149 ; 205 "inffast.S"
cannam@128 150 ;GLOBAL inflate_fast_use_mmx
cannam@128 151
cannam@128 152 ;SECTION .data
cannam@128 153
cannam@128 154
cannam@128 155 ; GLOBAL inflate_fast_use_mmx:object
cannam@128 156 ;.size inflate_fast_use_mmx, 4
cannam@128 157 ; 226 "inffast.S"
cannam@128 158 ;SECTION .text
cannam@128 159
cannam@128 160 ALIGN 4
cannam@128 161 _inflate_fast proc near
cannam@128 162 .FPO (16, 4, 0, 0, 1, 0)
cannam@128 163 push edi
cannam@128 164 push esi
cannam@128 165 push ebp
cannam@128 166 push ebx
cannam@128 167 pushfd
cannam@128 168 sub esp,64
cannam@128 169 cld
cannam@128 170
cannam@128 171
cannam@128 172
cannam@128 173
cannam@128 174 mov esi, [esp+88]
cannam@128 175 mov edi, [esi+28]
cannam@128 176
cannam@128 177
cannam@128 178
cannam@128 179
cannam@128 180
cannam@128 181
cannam@128 182
cannam@128 183 mov edx, [esi+4]
cannam@128 184 mov eax, [esi+0]
cannam@128 185
cannam@128 186 add edx,eax
cannam@128 187 sub edx,11
cannam@128 188
cannam@128 189 mov [esp+44],eax
cannam@128 190 mov [esp+20],edx
cannam@128 191
cannam@128 192 mov ebp, [esp+92]
cannam@128 193 mov ecx, [esi+16]
cannam@128 194 mov ebx, [esi+12]
cannam@128 195
cannam@128 196 sub ebp,ecx
cannam@128 197 neg ebp
cannam@128 198 add ebp,ebx
cannam@128 199
cannam@128 200 sub ecx,257
cannam@128 201 add ecx,ebx
cannam@128 202
cannam@128 203 mov [esp+60],ebx
cannam@128 204 mov [esp+40],ebp
cannam@128 205 mov [esp+16],ecx
cannam@128 206 ; 285 "inffast.S"
cannam@128 207 mov eax, [edi+lencode_state]
cannam@128 208 mov ecx, [edi+distcode_state]
cannam@128 209
cannam@128 210 mov [esp+8],eax
cannam@128 211 mov [esp+12],ecx
cannam@128 212
cannam@128 213 mov eax,1
cannam@128 214 mov ecx, [edi+lenbits_state]
cannam@128 215 shl eax,cl
cannam@128 216 dec eax
cannam@128 217 mov [esp+0],eax
cannam@128 218
cannam@128 219 mov eax,1
cannam@128 220 mov ecx, [edi+distbits_state]
cannam@128 221 shl eax,cl
cannam@128 222 dec eax
cannam@128 223 mov [esp+4],eax
cannam@128 224
cannam@128 225 mov eax, [edi+wsize_state]
cannam@128 226 mov ecx, [edi+write_state]
cannam@128 227 mov edx, [edi+window_state]
cannam@128 228
cannam@128 229 mov [esp+52],eax
cannam@128 230 mov [esp+48],ecx
cannam@128 231 mov [esp+56],edx
cannam@128 232
cannam@128 233 mov ebp, [edi+hold_state]
cannam@128 234 mov ebx, [edi+bits_state]
cannam@128 235 ; 321 "inffast.S"
cannam@128 236 mov esi, [esp+44]
cannam@128 237 mov ecx, [esp+20]
cannam@128 238 cmp ecx,esi
cannam@128 239 ja L_align_long
cannam@128 240
cannam@128 241 add ecx,11
cannam@128 242 sub ecx,esi
cannam@128 243 mov eax,12
cannam@128 244 sub eax,ecx
cannam@128 245 lea edi, [esp+28]
cannam@128 246 rep movsb
cannam@128 247 mov ecx,eax
cannam@128 248 xor eax,eax
cannam@128 249 rep stosb
cannam@128 250 lea esi, [esp+28]
cannam@128 251 mov [esp+20],esi
cannam@128 252 jmp L_is_aligned
cannam@128 253
cannam@128 254
cannam@128 255 L_align_long:
cannam@128 256 test esi,3
cannam@128 257 jz L_is_aligned
cannam@128 258 xor eax,eax
cannam@128 259 mov al, [esi]
cannam@128 260 inc esi
cannam@128 261 mov ecx,ebx
cannam@128 262 add ebx,8
cannam@128 263 shl eax,cl
cannam@128 264 or ebp,eax
cannam@128 265 jmp L_align_long
cannam@128 266
cannam@128 267 L_is_aligned:
cannam@128 268 mov edi, [esp+60]
cannam@128 269 ; 366 "inffast.S"
cannam@128 270 L_check_mmx:
cannam@128 271 cmp dword ptr [inflate_fast_use_mmx],2
cannam@128 272 je L_init_mmx
cannam@128 273 ja L_do_loop
cannam@128 274
cannam@128 275 push eax
cannam@128 276 push ebx
cannam@128 277 push ecx
cannam@128 278 push edx
cannam@128 279 pushfd
cannam@128 280 mov eax, [esp]
cannam@128 281 xor dword ptr [esp],0200000h
cannam@128 282
cannam@128 283
cannam@128 284
cannam@128 285
cannam@128 286 popfd
cannam@128 287 pushfd
cannam@128 288 pop edx
cannam@128 289 xor edx,eax
cannam@128 290 jz L_dont_use_mmx
cannam@128 291 xor eax,eax
cannam@128 292 cpuid
cannam@128 293 cmp ebx,0756e6547h
cannam@128 294 jne L_dont_use_mmx
cannam@128 295 cmp ecx,06c65746eh
cannam@128 296 jne L_dont_use_mmx
cannam@128 297 cmp edx,049656e69h
cannam@128 298 jne L_dont_use_mmx
cannam@128 299 mov eax,1
cannam@128 300 cpuid
cannam@128 301 shr eax,8
cannam@128 302 and eax,15
cannam@128 303 cmp eax,6
cannam@128 304 jne L_dont_use_mmx
cannam@128 305 test edx,0800000h
cannam@128 306 jnz L_use_mmx
cannam@128 307 jmp L_dont_use_mmx
cannam@128 308 L_use_mmx:
cannam@128 309 mov dword ptr [inflate_fast_use_mmx],2
cannam@128 310 jmp L_check_mmx_pop
cannam@128 311 L_dont_use_mmx:
cannam@128 312 mov dword ptr [inflate_fast_use_mmx],3
cannam@128 313 L_check_mmx_pop:
cannam@128 314 pop edx
cannam@128 315 pop ecx
cannam@128 316 pop ebx
cannam@128 317 pop eax
cannam@128 318 jmp L_check_mmx
cannam@128 319 ; 426 "inffast.S"
cannam@128 320 ALIGN 4
cannam@128 321 L_do_loop:
cannam@128 322 ; 437 "inffast.S"
cannam@128 323 cmp bl,15
cannam@128 324 ja L_get_length_code
cannam@128 325
cannam@128 326 xor eax,eax
cannam@128 327 lodsw
cannam@128 328 mov cl,bl
cannam@128 329 add bl,16
cannam@128 330 shl eax,cl
cannam@128 331 or ebp,eax
cannam@128 332
cannam@128 333 L_get_length_code:
cannam@128 334 mov edx, [esp+0]
cannam@128 335 mov ecx, [esp+8]
cannam@128 336 and edx,ebp
cannam@128 337 mov eax, [ecx+edx*4]
cannam@128 338
cannam@128 339 L_dolen:
cannam@128 340
cannam@128 341
cannam@128 342
cannam@128 343
cannam@128 344
cannam@128 345
cannam@128 346 mov cl,ah
cannam@128 347 sub bl,ah
cannam@128 348 shr ebp,cl
cannam@128 349
cannam@128 350
cannam@128 351
cannam@128 352
cannam@128 353
cannam@128 354
cannam@128 355 test al,al
cannam@128 356 jnz L_test_for_length_base
cannam@128 357
cannam@128 358 shr eax,16
cannam@128 359 stosb
cannam@128 360
cannam@128 361 L_while_test:
cannam@128 362
cannam@128 363
cannam@128 364 cmp [esp+16],edi
cannam@128 365 jbe L_break_loop
cannam@128 366
cannam@128 367 cmp [esp+20],esi
cannam@128 368 ja L_do_loop
cannam@128 369 jmp L_break_loop
cannam@128 370
cannam@128 371 L_test_for_length_base:
cannam@128 372 ; 502 "inffast.S"
cannam@128 373 mov edx,eax
cannam@128 374 shr edx,16
cannam@128 375 mov cl,al
cannam@128 376
cannam@128 377 test al,16
cannam@128 378 jz L_test_for_second_level_length
cannam@128 379 and cl,15
cannam@128 380 jz L_save_len
cannam@128 381 cmp bl,cl
cannam@128 382 jae L_add_bits_to_len
cannam@128 383
cannam@128 384 mov ch,cl
cannam@128 385 xor eax,eax
cannam@128 386 lodsw
cannam@128 387 mov cl,bl
cannam@128 388 add bl,16
cannam@128 389 shl eax,cl
cannam@128 390 or ebp,eax
cannam@128 391 mov cl,ch
cannam@128 392
cannam@128 393 L_add_bits_to_len:
cannam@128 394 mov eax,1
cannam@128 395 shl eax,cl
cannam@128 396 dec eax
cannam@128 397 sub bl,cl
cannam@128 398 and eax,ebp
cannam@128 399 shr ebp,cl
cannam@128 400 add edx,eax
cannam@128 401
cannam@128 402 L_save_len:
cannam@128 403 mov [esp+24],edx
cannam@128 404
cannam@128 405
cannam@128 406 L_decode_distance:
cannam@128 407 ; 549 "inffast.S"
cannam@128 408 cmp bl,15
cannam@128 409 ja L_get_distance_code
cannam@128 410
cannam@128 411 xor eax,eax
cannam@128 412 lodsw
cannam@128 413 mov cl,bl
cannam@128 414 add bl,16
cannam@128 415 shl eax,cl
cannam@128 416 or ebp,eax
cannam@128 417
cannam@128 418 L_get_distance_code:
cannam@128 419 mov edx, [esp+4]
cannam@128 420 mov ecx, [esp+12]
cannam@128 421 and edx,ebp
cannam@128 422 mov eax, [ecx+edx*4]
cannam@128 423
cannam@128 424
cannam@128 425 L_dodist:
cannam@128 426 mov edx,eax
cannam@128 427 shr edx,16
cannam@128 428 mov cl,ah
cannam@128 429 sub bl,ah
cannam@128 430 shr ebp,cl
cannam@128 431 ; 584 "inffast.S"
cannam@128 432 mov cl,al
cannam@128 433
cannam@128 434 test al,16
cannam@128 435 jz L_test_for_second_level_dist
cannam@128 436 and cl,15
cannam@128 437 jz L_check_dist_one
cannam@128 438 cmp bl,cl
cannam@128 439 jae L_add_bits_to_dist
cannam@128 440
cannam@128 441 mov ch,cl
cannam@128 442 xor eax,eax
cannam@128 443 lodsw
cannam@128 444 mov cl,bl
cannam@128 445 add bl,16
cannam@128 446 shl eax,cl
cannam@128 447 or ebp,eax
cannam@128 448 mov cl,ch
cannam@128 449
cannam@128 450 L_add_bits_to_dist:
cannam@128 451 mov eax,1
cannam@128 452 shl eax,cl
cannam@128 453 dec eax
cannam@128 454 sub bl,cl
cannam@128 455 and eax,ebp
cannam@128 456 shr ebp,cl
cannam@128 457 add edx,eax
cannam@128 458 jmp L_check_window
cannam@128 459
cannam@128 460 L_check_window:
cannam@128 461 ; 625 "inffast.S"
cannam@128 462 mov [esp+44],esi
cannam@128 463 mov eax,edi
cannam@128 464 sub eax, [esp+40]
cannam@128 465
cannam@128 466 cmp eax,edx
cannam@128 467 jb L_clip_window
cannam@128 468
cannam@128 469 mov ecx, [esp+24]
cannam@128 470 mov esi,edi
cannam@128 471 sub esi,edx
cannam@128 472
cannam@128 473 sub ecx,3
cannam@128 474 mov al, [esi]
cannam@128 475 mov [edi],al
cannam@128 476 mov al, [esi+1]
cannam@128 477 mov dl, [esi+2]
cannam@128 478 add esi,3
cannam@128 479 mov [edi+1],al
cannam@128 480 mov [edi+2],dl
cannam@128 481 add edi,3
cannam@128 482 rep movsb
cannam@128 483
cannam@128 484 mov esi, [esp+44]
cannam@128 485 jmp L_while_test
cannam@128 486
cannam@128 487 ALIGN 4
cannam@128 488 L_check_dist_one:
cannam@128 489 cmp edx,1
cannam@128 490 jne L_check_window
cannam@128 491 cmp [esp+40],edi
cannam@128 492 je L_check_window
cannam@128 493
cannam@128 494 dec edi
cannam@128 495 mov ecx, [esp+24]
cannam@128 496 mov al, [edi]
cannam@128 497 sub ecx,3
cannam@128 498
cannam@128 499 mov [edi+1],al
cannam@128 500 mov [edi+2],al
cannam@128 501 mov [edi+3],al
cannam@128 502 add edi,4
cannam@128 503 rep stosb
cannam@128 504
cannam@128 505 jmp L_while_test
cannam@128 506
cannam@128 507 ALIGN 4
cannam@128 508 L_test_for_second_level_length:
cannam@128 509
cannam@128 510
cannam@128 511
cannam@128 512
cannam@128 513 test al,64
cannam@128 514 jnz L_test_for_end_of_block
cannam@128 515
cannam@128 516 mov eax,1
cannam@128 517 shl eax,cl
cannam@128 518 dec eax
cannam@128 519 and eax,ebp
cannam@128 520 add eax,edx
cannam@128 521 mov edx, [esp+8]
cannam@128 522 mov eax, [edx+eax*4]
cannam@128 523 jmp L_dolen
cannam@128 524
cannam@128 525 ALIGN 4
cannam@128 526 L_test_for_second_level_dist:
cannam@128 527
cannam@128 528
cannam@128 529
cannam@128 530
cannam@128 531 test al,64
cannam@128 532 jnz L_invalid_distance_code
cannam@128 533
cannam@128 534 mov eax,1
cannam@128 535 shl eax,cl
cannam@128 536 dec eax
cannam@128 537 and eax,ebp
cannam@128 538 add eax,edx
cannam@128 539 mov edx, [esp+12]
cannam@128 540 mov eax, [edx+eax*4]
cannam@128 541 jmp L_dodist
cannam@128 542
cannam@128 543 ALIGN 4
cannam@128 544 L_clip_window:
cannam@128 545 ; 721 "inffast.S"
cannam@128 546 mov ecx,eax
cannam@128 547 mov eax, [esp+52]
cannam@128 548 neg ecx
cannam@128 549 mov esi, [esp+56]
cannam@128 550
cannam@128 551 cmp eax,edx
cannam@128 552 jb L_invalid_distance_too_far
cannam@128 553
cannam@128 554 add ecx,edx
cannam@128 555 cmp dword ptr [esp+48],0
cannam@128 556 jne L_wrap_around_window
cannam@128 557
cannam@128 558 sub eax,ecx
cannam@128 559 add esi,eax
cannam@128 560 ; 749 "inffast.S"
cannam@128 561 mov eax, [esp+24]
cannam@128 562 cmp eax,ecx
cannam@128 563 jbe L_do_copy1
cannam@128 564
cannam@128 565 sub eax,ecx
cannam@128 566 rep movsb
cannam@128 567 mov esi,edi
cannam@128 568 sub esi,edx
cannam@128 569 jmp L_do_copy1
cannam@128 570
cannam@128 571 cmp eax,ecx
cannam@128 572 jbe L_do_copy1
cannam@128 573
cannam@128 574 sub eax,ecx
cannam@128 575 rep movsb
cannam@128 576 mov esi,edi
cannam@128 577 sub esi,edx
cannam@128 578 jmp L_do_copy1
cannam@128 579
cannam@128 580 L_wrap_around_window:
cannam@128 581 ; 793 "inffast.S"
cannam@128 582 mov eax, [esp+48]
cannam@128 583 cmp ecx,eax
cannam@128 584 jbe L_contiguous_in_window
cannam@128 585
cannam@128 586 add esi, [esp+52]
cannam@128 587 add esi,eax
cannam@128 588 sub esi,ecx
cannam@128 589 sub ecx,eax
cannam@128 590
cannam@128 591
cannam@128 592 mov eax, [esp+24]
cannam@128 593 cmp eax,ecx
cannam@128 594 jbe L_do_copy1
cannam@128 595
cannam@128 596 sub eax,ecx
cannam@128 597 rep movsb
cannam@128 598 mov esi, [esp+56]
cannam@128 599 mov ecx, [esp+48]
cannam@128 600 cmp eax,ecx
cannam@128 601 jbe L_do_copy1
cannam@128 602
cannam@128 603 sub eax,ecx
cannam@128 604 rep movsb
cannam@128 605 mov esi,edi
cannam@128 606 sub esi,edx
cannam@128 607 jmp L_do_copy1
cannam@128 608
cannam@128 609 L_contiguous_in_window:
cannam@128 610 ; 836 "inffast.S"
cannam@128 611 add esi,eax
cannam@128 612 sub esi,ecx
cannam@128 613
cannam@128 614
cannam@128 615 mov eax, [esp+24]
cannam@128 616 cmp eax,ecx
cannam@128 617 jbe L_do_copy1
cannam@128 618
cannam@128 619 sub eax,ecx
cannam@128 620 rep movsb
cannam@128 621 mov esi,edi
cannam@128 622 sub esi,edx
cannam@128 623
cannam@128 624 L_do_copy1:
cannam@128 625 ; 862 "inffast.S"
cannam@128 626 mov ecx,eax
cannam@128 627 rep movsb
cannam@128 628
cannam@128 629 mov esi, [esp+44]
cannam@128 630 jmp L_while_test
cannam@128 631 ; 878 "inffast.S"
cannam@128 632 ALIGN 4
cannam@128 633 L_init_mmx:
cannam@128 634 emms
cannam@128 635
cannam@128 636
cannam@128 637
cannam@128 638
cannam@128 639
cannam@128 640 movd mm0,ebp
cannam@128 641 mov ebp,ebx
cannam@128 642 ; 896 "inffast.S"
cannam@128 643 movd mm4,dword ptr [esp+0]
cannam@128 644 movq mm3,mm4
cannam@128 645 movd mm5,dword ptr [esp+4]
cannam@128 646 movq mm2,mm5
cannam@128 647 pxor mm1,mm1
cannam@128 648 mov ebx, [esp+8]
cannam@128 649 jmp L_do_loop_mmx
cannam@128 650
cannam@128 651 ALIGN 4
cannam@128 652 L_do_loop_mmx:
cannam@128 653 psrlq mm0,mm1
cannam@128 654
cannam@128 655 cmp ebp,32
cannam@128 656 ja L_get_length_code_mmx
cannam@128 657
cannam@128 658 movd mm6,ebp
cannam@128 659 movd mm7,dword ptr [esi]
cannam@128 660 add esi,4
cannam@128 661 psllq mm7,mm6
cannam@128 662 add ebp,32
cannam@128 663 por mm0,mm7
cannam@128 664
cannam@128 665 L_get_length_code_mmx:
cannam@128 666 pand mm4,mm0
cannam@128 667 movd eax,mm4
cannam@128 668 movq mm4,mm3
cannam@128 669 mov eax, [ebx+eax*4]
cannam@128 670
cannam@128 671 L_dolen_mmx:
cannam@128 672 movzx ecx,ah
cannam@128 673 movd mm1,ecx
cannam@128 674 sub ebp,ecx
cannam@128 675
cannam@128 676 test al,al
cannam@128 677 jnz L_test_for_length_base_mmx
cannam@128 678
cannam@128 679 shr eax,16
cannam@128 680 stosb
cannam@128 681
cannam@128 682 L_while_test_mmx:
cannam@128 683
cannam@128 684
cannam@128 685 cmp [esp+16],edi
cannam@128 686 jbe L_break_loop
cannam@128 687
cannam@128 688 cmp [esp+20],esi
cannam@128 689 ja L_do_loop_mmx
cannam@128 690 jmp L_break_loop
cannam@128 691
cannam@128 692 L_test_for_length_base_mmx:
cannam@128 693
cannam@128 694 mov edx,eax
cannam@128 695 shr edx,16
cannam@128 696
cannam@128 697 test al,16
cannam@128 698 jz L_test_for_second_level_length_mmx
cannam@128 699 and eax,15
cannam@128 700 jz L_decode_distance_mmx
cannam@128 701
cannam@128 702 psrlq mm0,mm1
cannam@128 703 movd mm1,eax
cannam@128 704 movd ecx,mm0
cannam@128 705 sub ebp,eax
cannam@128 706 and ecx, [inflate_fast_mask+eax*4]
cannam@128 707 add edx,ecx
cannam@128 708
cannam@128 709 L_decode_distance_mmx:
cannam@128 710 psrlq mm0,mm1
cannam@128 711
cannam@128 712 cmp ebp,32
cannam@128 713 ja L_get_dist_code_mmx
cannam@128 714
cannam@128 715 movd mm6,ebp
cannam@128 716 movd mm7,dword ptr [esi]
cannam@128 717 add esi,4
cannam@128 718 psllq mm7,mm6
cannam@128 719 add ebp,32
cannam@128 720 por mm0,mm7
cannam@128 721
cannam@128 722 L_get_dist_code_mmx:
cannam@128 723 mov ebx, [esp+12]
cannam@128 724 pand mm5,mm0
cannam@128 725 movd eax,mm5
cannam@128 726 movq mm5,mm2
cannam@128 727 mov eax, [ebx+eax*4]
cannam@128 728
cannam@128 729 L_dodist_mmx:
cannam@128 730
cannam@128 731 movzx ecx,ah
cannam@128 732 mov ebx,eax
cannam@128 733 shr ebx,16
cannam@128 734 sub ebp,ecx
cannam@128 735 movd mm1,ecx
cannam@128 736
cannam@128 737 test al,16
cannam@128 738 jz L_test_for_second_level_dist_mmx
cannam@128 739 and eax,15
cannam@128 740 jz L_check_dist_one_mmx
cannam@128 741
cannam@128 742 L_add_bits_to_dist_mmx:
cannam@128 743 psrlq mm0,mm1
cannam@128 744 movd mm1,eax
cannam@128 745 movd ecx,mm0
cannam@128 746 sub ebp,eax
cannam@128 747 and ecx, [inflate_fast_mask+eax*4]
cannam@128 748 add ebx,ecx
cannam@128 749
cannam@128 750 L_check_window_mmx:
cannam@128 751 mov [esp+44],esi
cannam@128 752 mov eax,edi
cannam@128 753 sub eax, [esp+40]
cannam@128 754
cannam@128 755 cmp eax,ebx
cannam@128 756 jb L_clip_window_mmx
cannam@128 757
cannam@128 758 mov ecx,edx
cannam@128 759 mov esi,edi
cannam@128 760 sub esi,ebx
cannam@128 761
cannam@128 762 sub ecx,3
cannam@128 763 mov al, [esi]
cannam@128 764 mov [edi],al
cannam@128 765 mov al, [esi+1]
cannam@128 766 mov dl, [esi+2]
cannam@128 767 add esi,3
cannam@128 768 mov [edi+1],al
cannam@128 769 mov [edi+2],dl
cannam@128 770 add edi,3
cannam@128 771 rep movsb
cannam@128 772
cannam@128 773 mov esi, [esp+44]
cannam@128 774 mov ebx, [esp+8]
cannam@128 775 jmp L_while_test_mmx
cannam@128 776
cannam@128 777 ALIGN 4
cannam@128 778 L_check_dist_one_mmx:
cannam@128 779 cmp ebx,1
cannam@128 780 jne L_check_window_mmx
cannam@128 781 cmp [esp+40],edi
cannam@128 782 je L_check_window_mmx
cannam@128 783
cannam@128 784 dec edi
cannam@128 785 mov ecx,edx
cannam@128 786 mov al, [edi]
cannam@128 787 sub ecx,3
cannam@128 788
cannam@128 789 mov [edi+1],al
cannam@128 790 mov [edi+2],al
cannam@128 791 mov [edi+3],al
cannam@128 792 add edi,4
cannam@128 793 rep stosb
cannam@128 794
cannam@128 795 mov ebx, [esp+8]
cannam@128 796 jmp L_while_test_mmx
cannam@128 797
cannam@128 798 ALIGN 4
cannam@128 799 L_test_for_second_level_length_mmx:
cannam@128 800 test al,64
cannam@128 801 jnz L_test_for_end_of_block
cannam@128 802
cannam@128 803 and eax,15
cannam@128 804 psrlq mm0,mm1
cannam@128 805 movd ecx,mm0
cannam@128 806 and ecx, [inflate_fast_mask+eax*4]
cannam@128 807 add ecx,edx
cannam@128 808 mov eax, [ebx+ecx*4]
cannam@128 809 jmp L_dolen_mmx
cannam@128 810
cannam@128 811 ALIGN 4
cannam@128 812 L_test_for_second_level_dist_mmx:
cannam@128 813 test al,64
cannam@128 814 jnz L_invalid_distance_code
cannam@128 815
cannam@128 816 and eax,15
cannam@128 817 psrlq mm0,mm1
cannam@128 818 movd ecx,mm0
cannam@128 819 and ecx, [inflate_fast_mask+eax*4]
cannam@128 820 mov eax, [esp+12]
cannam@128 821 add ecx,ebx
cannam@128 822 mov eax, [eax+ecx*4]
cannam@128 823 jmp L_dodist_mmx
cannam@128 824
cannam@128 825 ALIGN 4
cannam@128 826 L_clip_window_mmx:
cannam@128 827
cannam@128 828 mov ecx,eax
cannam@128 829 mov eax, [esp+52]
cannam@128 830 neg ecx
cannam@128 831 mov esi, [esp+56]
cannam@128 832
cannam@128 833 cmp eax,ebx
cannam@128 834 jb L_invalid_distance_too_far
cannam@128 835
cannam@128 836 add ecx,ebx
cannam@128 837 cmp dword ptr [esp+48],0
cannam@128 838 jne L_wrap_around_window_mmx
cannam@128 839
cannam@128 840 sub eax,ecx
cannam@128 841 add esi,eax
cannam@128 842
cannam@128 843 cmp edx,ecx
cannam@128 844 jbe L_do_copy1_mmx
cannam@128 845
cannam@128 846 sub edx,ecx
cannam@128 847 rep movsb
cannam@128 848 mov esi,edi
cannam@128 849 sub esi,ebx
cannam@128 850 jmp L_do_copy1_mmx
cannam@128 851
cannam@128 852 cmp edx,ecx
cannam@128 853 jbe L_do_copy1_mmx
cannam@128 854
cannam@128 855 sub edx,ecx
cannam@128 856 rep movsb
cannam@128 857 mov esi,edi
cannam@128 858 sub esi,ebx
cannam@128 859 jmp L_do_copy1_mmx
cannam@128 860
cannam@128 861 L_wrap_around_window_mmx:
cannam@128 862
cannam@128 863 mov eax, [esp+48]
cannam@128 864 cmp ecx,eax
cannam@128 865 jbe L_contiguous_in_window_mmx
cannam@128 866
cannam@128 867 add esi, [esp+52]
cannam@128 868 add esi,eax
cannam@128 869 sub esi,ecx
cannam@128 870 sub ecx,eax
cannam@128 871
cannam@128 872
cannam@128 873 cmp edx,ecx
cannam@128 874 jbe L_do_copy1_mmx
cannam@128 875
cannam@128 876 sub edx,ecx
cannam@128 877 rep movsb
cannam@128 878 mov esi, [esp+56]
cannam@128 879 mov ecx, [esp+48]
cannam@128 880 cmp edx,ecx
cannam@128 881 jbe L_do_copy1_mmx
cannam@128 882
cannam@128 883 sub edx,ecx
cannam@128 884 rep movsb
cannam@128 885 mov esi,edi
cannam@128 886 sub esi,ebx
cannam@128 887 jmp L_do_copy1_mmx
cannam@128 888
cannam@128 889 L_contiguous_in_window_mmx:
cannam@128 890
cannam@128 891 add esi,eax
cannam@128 892 sub esi,ecx
cannam@128 893
cannam@128 894
cannam@128 895 cmp edx,ecx
cannam@128 896 jbe L_do_copy1_mmx
cannam@128 897
cannam@128 898 sub edx,ecx
cannam@128 899 rep movsb
cannam@128 900 mov esi,edi
cannam@128 901 sub esi,ebx
cannam@128 902
cannam@128 903 L_do_copy1_mmx:
cannam@128 904
cannam@128 905
cannam@128 906 mov ecx,edx
cannam@128 907 rep movsb
cannam@128 908
cannam@128 909 mov esi, [esp+44]
cannam@128 910 mov ebx, [esp+8]
cannam@128 911 jmp L_while_test_mmx
cannam@128 912 ; 1174 "inffast.S"
cannam@128 913 L_invalid_distance_code:
cannam@128 914
cannam@128 915
cannam@128 916
cannam@128 917
cannam@128 918
cannam@128 919 mov ecx, invalid_distance_code_msg
cannam@128 920 mov edx,INFLATE_MODE_BAD
cannam@128 921 jmp L_update_stream_state
cannam@128 922
cannam@128 923 L_test_for_end_of_block:
cannam@128 924
cannam@128 925
cannam@128 926
cannam@128 927
cannam@128 928
cannam@128 929 test al,32
cannam@128 930 jz L_invalid_literal_length_code
cannam@128 931
cannam@128 932 mov ecx,0
cannam@128 933 mov edx,INFLATE_MODE_TYPE
cannam@128 934 jmp L_update_stream_state
cannam@128 935
cannam@128 936 L_invalid_literal_length_code:
cannam@128 937
cannam@128 938
cannam@128 939
cannam@128 940
cannam@128 941
cannam@128 942 mov ecx, invalid_literal_length_code_msg
cannam@128 943 mov edx,INFLATE_MODE_BAD
cannam@128 944 jmp L_update_stream_state
cannam@128 945
cannam@128 946 L_invalid_distance_too_far:
cannam@128 947
cannam@128 948
cannam@128 949
cannam@128 950 mov esi, [esp+44]
cannam@128 951 mov ecx, invalid_distance_too_far_msg
cannam@128 952 mov edx,INFLATE_MODE_BAD
cannam@128 953 jmp L_update_stream_state
cannam@128 954
cannam@128 955 L_update_stream_state:
cannam@128 956
cannam@128 957 mov eax, [esp+88]
cannam@128 958 test ecx,ecx
cannam@128 959 jz L_skip_msg
cannam@128 960 mov [eax+24],ecx
cannam@128 961 L_skip_msg:
cannam@128 962 mov eax, [eax+28]
cannam@128 963 mov [eax+mode_state],edx
cannam@128 964 jmp L_break_loop
cannam@128 965
cannam@128 966 ALIGN 4
cannam@128 967 L_break_loop:
cannam@128 968 ; 1243 "inffast.S"
cannam@128 969 cmp dword ptr [inflate_fast_use_mmx],2
cannam@128 970 jne L_update_next_in
cannam@128 971
cannam@128 972
cannam@128 973
cannam@128 974 mov ebx,ebp
cannam@128 975
cannam@128 976 L_update_next_in:
cannam@128 977 ; 1266 "inffast.S"
cannam@128 978 mov eax, [esp+88]
cannam@128 979 mov ecx,ebx
cannam@128 980 mov edx, [eax+28]
cannam@128 981 shr ecx,3
cannam@128 982 sub esi,ecx
cannam@128 983 shl ecx,3
cannam@128 984 sub ebx,ecx
cannam@128 985 mov [eax+12],edi
cannam@128 986 mov [edx+bits_state],ebx
cannam@128 987 mov ecx,ebx
cannam@128 988
cannam@128 989 lea ebx, [esp+28]
cannam@128 990 cmp [esp+20],ebx
cannam@128 991 jne L_buf_not_used
cannam@128 992
cannam@128 993 sub esi,ebx
cannam@128 994 mov ebx, [eax+0]
cannam@128 995 mov [esp+20],ebx
cannam@128 996 add esi,ebx
cannam@128 997 mov ebx, [eax+4]
cannam@128 998 sub ebx,11
cannam@128 999 add [esp+20],ebx
cannam@128 1000
cannam@128 1001 L_buf_not_used:
cannam@128 1002 mov [eax+0],esi
cannam@128 1003
cannam@128 1004 mov ebx,1
cannam@128 1005 shl ebx,cl
cannam@128 1006 dec ebx
cannam@128 1007
cannam@128 1008
cannam@128 1009
cannam@128 1010
cannam@128 1011
cannam@128 1012 cmp dword ptr [inflate_fast_use_mmx],2
cannam@128 1013 jne L_update_hold
cannam@128 1014
cannam@128 1015
cannam@128 1016
cannam@128 1017 psrlq mm0,mm1
cannam@128 1018 movd ebp,mm0
cannam@128 1019
cannam@128 1020 emms
cannam@128 1021
cannam@128 1022 L_update_hold:
cannam@128 1023
cannam@128 1024
cannam@128 1025
cannam@128 1026 and ebp,ebx
cannam@128 1027 mov [edx+hold_state],ebp
cannam@128 1028
cannam@128 1029
cannam@128 1030
cannam@128 1031
cannam@128 1032 mov ebx, [esp+20]
cannam@128 1033 cmp ebx,esi
cannam@128 1034 jbe L_last_is_smaller
cannam@128 1035
cannam@128 1036 sub ebx,esi
cannam@128 1037 add ebx,11
cannam@128 1038 mov [eax+4],ebx
cannam@128 1039 jmp L_fixup_out
cannam@128 1040 L_last_is_smaller:
cannam@128 1041 sub esi,ebx
cannam@128 1042 neg esi
cannam@128 1043 add esi,11
cannam@128 1044 mov [eax+4],esi
cannam@128 1045
cannam@128 1046
cannam@128 1047
cannam@128 1048
cannam@128 1049 L_fixup_out:
cannam@128 1050
cannam@128 1051 mov ebx, [esp+16]
cannam@128 1052 cmp ebx,edi
cannam@128 1053 jbe L_end_is_smaller
cannam@128 1054
cannam@128 1055 sub ebx,edi
cannam@128 1056 add ebx,257
cannam@128 1057 mov [eax+16],ebx
cannam@128 1058 jmp L_done
cannam@128 1059 L_end_is_smaller:
cannam@128 1060 sub edi,ebx
cannam@128 1061 neg edi
cannam@128 1062 add edi,257
cannam@128 1063 mov [eax+16],edi
cannam@128 1064
cannam@128 1065
cannam@128 1066
cannam@128 1067
cannam@128 1068
cannam@128 1069 L_done:
cannam@128 1070 add esp,64
cannam@128 1071 popfd
cannam@128 1072 pop ebx
cannam@128 1073 pop ebp
cannam@128 1074 pop esi
cannam@128 1075 pop edi
cannam@128 1076 ret
cannam@128 1077 _inflate_fast endp
cannam@128 1078
cannam@128 1079 _TEXT ends
cannam@128 1080 end