annotate src/zlib-1.2.7/contrib/masmx86/inffas32.asm @ 168:ceec0dd9ec9c

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 07 Feb 2020 11:51:13 +0000
parents 8a15ff55d9af
children
rev   line source
cannam@89 1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
cannam@89 2 ; *
cannam@89 3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
cannam@89 4 ; *
cannam@89 5 ; * Copyright (C) 1995-2003 Mark Adler
cannam@89 6 ; * For conditions of distribution and use, see copyright notice in zlib.h
cannam@89 7 ; *
cannam@89 8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
cannam@89 9 ; * Please use the copyright conditions above.
cannam@89 10 ; *
cannam@89 11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
cannam@89 12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
cannam@89 13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
cannam@89 14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
cannam@89 15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
cannam@89 16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
cannam@89 17 ; * versions of this and inffast.S can be found at
cannam@89 18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
cannam@89 19 ; *
cannam@89 20 ; * 2005 : modification by Gilles Vollant
cannam@89 21 ; */
cannam@89 22 ; For Visual C++ 4.x and higher and ML 6.x and higher
cannam@89 23 ; ml.exe is in directory \MASM611C of Win95 DDK
cannam@89 24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
cannam@89 25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
cannam@89 26 ;
cannam@89 27 ;
cannam@89 28 ; compile with command line option
cannam@89 29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
cannam@89 30
cannam@89 31 ; if you define NO_GZIP (see inflate.h), compile with
cannam@89 32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
cannam@89 33
cannam@89 34
cannam@89 35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
cannam@89 36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
cannam@89 37 ; in inflate_state in inflate.h)
cannam@89 38 zlib1222sup equ 8
cannam@89 39
cannam@89 40
cannam@89 41 IFDEF GUNZIP
cannam@89 42 INFLATE_MODE_TYPE equ 11
cannam@89 43 INFLATE_MODE_BAD equ 26
cannam@89 44 ELSE
cannam@89 45 IFNDEF NO_GUNZIP
cannam@89 46 INFLATE_MODE_TYPE equ 11
cannam@89 47 INFLATE_MODE_BAD equ 26
cannam@89 48 ELSE
cannam@89 49 INFLATE_MODE_TYPE equ 3
cannam@89 50 INFLATE_MODE_BAD equ 17
cannam@89 51 ENDIF
cannam@89 52 ENDIF
cannam@89 53
cannam@89 54
cannam@89 55 ; 75 "inffast.S"
cannam@89 56 ;FILE "inffast.S"
cannam@89 57
cannam@89 58 ;;;GLOBAL _inflate_fast
cannam@89 59
cannam@89 60 ;;;SECTION .text
cannam@89 61
cannam@89 62
cannam@89 63
cannam@89 64 .586p
cannam@89 65 .mmx
cannam@89 66
cannam@89 67 name inflate_fast_x86
cannam@89 68 .MODEL FLAT
cannam@89 69
cannam@89 70 _DATA segment
cannam@89 71 inflate_fast_use_mmx:
cannam@89 72 dd 1
cannam@89 73
cannam@89 74
cannam@89 75 _TEXT segment
cannam@89 76
cannam@89 77
cannam@89 78
cannam@89 79 ALIGN 4
cannam@89 80 db 'Fast decoding Code from Chris Anderson'
cannam@89 81 db 0
cannam@89 82
cannam@89 83 ALIGN 4
cannam@89 84 invalid_literal_length_code_msg:
cannam@89 85 db 'invalid literal/length code'
cannam@89 86 db 0
cannam@89 87
cannam@89 88 ALIGN 4
cannam@89 89 invalid_distance_code_msg:
cannam@89 90 db 'invalid distance code'
cannam@89 91 db 0
cannam@89 92
cannam@89 93 ALIGN 4
cannam@89 94 invalid_distance_too_far_msg:
cannam@89 95 db 'invalid distance too far back'
cannam@89 96 db 0
cannam@89 97
cannam@89 98
cannam@89 99 ALIGN 4
cannam@89 100 inflate_fast_mask:
cannam@89 101 dd 0
cannam@89 102 dd 1
cannam@89 103 dd 3
cannam@89 104 dd 7
cannam@89 105 dd 15
cannam@89 106 dd 31
cannam@89 107 dd 63
cannam@89 108 dd 127
cannam@89 109 dd 255
cannam@89 110 dd 511
cannam@89 111 dd 1023
cannam@89 112 dd 2047
cannam@89 113 dd 4095
cannam@89 114 dd 8191
cannam@89 115 dd 16383
cannam@89 116 dd 32767
cannam@89 117 dd 65535
cannam@89 118 dd 131071
cannam@89 119 dd 262143
cannam@89 120 dd 524287
cannam@89 121 dd 1048575
cannam@89 122 dd 2097151
cannam@89 123 dd 4194303
cannam@89 124 dd 8388607
cannam@89 125 dd 16777215
cannam@89 126 dd 33554431
cannam@89 127 dd 67108863
cannam@89 128 dd 134217727
cannam@89 129 dd 268435455
cannam@89 130 dd 536870911
cannam@89 131 dd 1073741823
cannam@89 132 dd 2147483647
cannam@89 133 dd 4294967295
cannam@89 134
cannam@89 135
cannam@89 136 mode_state equ 0 ;/* state->mode */
cannam@89 137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
cannam@89 138 write_state equ (36+4+zlib1222sup) ;/* state->write */
cannam@89 139 window_state equ (40+4+zlib1222sup) ;/* state->window */
cannam@89 140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
cannam@89 141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
cannam@89 142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
cannam@89 143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
cannam@89 144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
cannam@89 145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
cannam@89 146
cannam@89 147
cannam@89 148 ;;SECTION .text
cannam@89 149 ; 205 "inffast.S"
cannam@89 150 ;GLOBAL inflate_fast_use_mmx
cannam@89 151
cannam@89 152 ;SECTION .data
cannam@89 153
cannam@89 154
cannam@89 155 ; GLOBAL inflate_fast_use_mmx:object
cannam@89 156 ;.size inflate_fast_use_mmx, 4
cannam@89 157 ; 226 "inffast.S"
cannam@89 158 ;SECTION .text
cannam@89 159
cannam@89 160 ALIGN 4
cannam@89 161 _inflate_fast proc near
cannam@89 162 .FPO (16, 4, 0, 0, 1, 0)
cannam@89 163 push edi
cannam@89 164 push esi
cannam@89 165 push ebp
cannam@89 166 push ebx
cannam@89 167 pushfd
cannam@89 168 sub esp,64
cannam@89 169 cld
cannam@89 170
cannam@89 171
cannam@89 172
cannam@89 173
cannam@89 174 mov esi, [esp+88]
cannam@89 175 mov edi, [esi+28]
cannam@89 176
cannam@89 177
cannam@89 178
cannam@89 179
cannam@89 180
cannam@89 181
cannam@89 182
cannam@89 183 mov edx, [esi+4]
cannam@89 184 mov eax, [esi+0]
cannam@89 185
cannam@89 186 add edx,eax
cannam@89 187 sub edx,11
cannam@89 188
cannam@89 189 mov [esp+44],eax
cannam@89 190 mov [esp+20],edx
cannam@89 191
cannam@89 192 mov ebp, [esp+92]
cannam@89 193 mov ecx, [esi+16]
cannam@89 194 mov ebx, [esi+12]
cannam@89 195
cannam@89 196 sub ebp,ecx
cannam@89 197 neg ebp
cannam@89 198 add ebp,ebx
cannam@89 199
cannam@89 200 sub ecx,257
cannam@89 201 add ecx,ebx
cannam@89 202
cannam@89 203 mov [esp+60],ebx
cannam@89 204 mov [esp+40],ebp
cannam@89 205 mov [esp+16],ecx
cannam@89 206 ; 285 "inffast.S"
cannam@89 207 mov eax, [edi+lencode_state]
cannam@89 208 mov ecx, [edi+distcode_state]
cannam@89 209
cannam@89 210 mov [esp+8],eax
cannam@89 211 mov [esp+12],ecx
cannam@89 212
cannam@89 213 mov eax,1
cannam@89 214 mov ecx, [edi+lenbits_state]
cannam@89 215 shl eax,cl
cannam@89 216 dec eax
cannam@89 217 mov [esp+0],eax
cannam@89 218
cannam@89 219 mov eax,1
cannam@89 220 mov ecx, [edi+distbits_state]
cannam@89 221 shl eax,cl
cannam@89 222 dec eax
cannam@89 223 mov [esp+4],eax
cannam@89 224
cannam@89 225 mov eax, [edi+wsize_state]
cannam@89 226 mov ecx, [edi+write_state]
cannam@89 227 mov edx, [edi+window_state]
cannam@89 228
cannam@89 229 mov [esp+52],eax
cannam@89 230 mov [esp+48],ecx
cannam@89 231 mov [esp+56],edx
cannam@89 232
cannam@89 233 mov ebp, [edi+hold_state]
cannam@89 234 mov ebx, [edi+bits_state]
cannam@89 235 ; 321 "inffast.S"
cannam@89 236 mov esi, [esp+44]
cannam@89 237 mov ecx, [esp+20]
cannam@89 238 cmp ecx,esi
cannam@89 239 ja L_align_long
cannam@89 240
cannam@89 241 add ecx,11
cannam@89 242 sub ecx,esi
cannam@89 243 mov eax,12
cannam@89 244 sub eax,ecx
cannam@89 245 lea edi, [esp+28]
cannam@89 246 rep movsb
cannam@89 247 mov ecx,eax
cannam@89 248 xor eax,eax
cannam@89 249 rep stosb
cannam@89 250 lea esi, [esp+28]
cannam@89 251 mov [esp+20],esi
cannam@89 252 jmp L_is_aligned
cannam@89 253
cannam@89 254
cannam@89 255 L_align_long:
cannam@89 256 test esi,3
cannam@89 257 jz L_is_aligned
cannam@89 258 xor eax,eax
cannam@89 259 mov al, [esi]
cannam@89 260 inc esi
cannam@89 261 mov ecx,ebx
cannam@89 262 add ebx,8
cannam@89 263 shl eax,cl
cannam@89 264 or ebp,eax
cannam@89 265 jmp L_align_long
cannam@89 266
cannam@89 267 L_is_aligned:
cannam@89 268 mov edi, [esp+60]
cannam@89 269 ; 366 "inffast.S"
cannam@89 270 L_check_mmx:
cannam@89 271 cmp dword ptr [inflate_fast_use_mmx],2
cannam@89 272 je L_init_mmx
cannam@89 273 ja L_do_loop
cannam@89 274
cannam@89 275 push eax
cannam@89 276 push ebx
cannam@89 277 push ecx
cannam@89 278 push edx
cannam@89 279 pushfd
cannam@89 280 mov eax, [esp]
cannam@89 281 xor dword ptr [esp],0200000h
cannam@89 282
cannam@89 283
cannam@89 284
cannam@89 285
cannam@89 286 popfd
cannam@89 287 pushfd
cannam@89 288 pop edx
cannam@89 289 xor edx,eax
cannam@89 290 jz L_dont_use_mmx
cannam@89 291 xor eax,eax
cannam@89 292 cpuid
cannam@89 293 cmp ebx,0756e6547h
cannam@89 294 jne L_dont_use_mmx
cannam@89 295 cmp ecx,06c65746eh
cannam@89 296 jne L_dont_use_mmx
cannam@89 297 cmp edx,049656e69h
cannam@89 298 jne L_dont_use_mmx
cannam@89 299 mov eax,1
cannam@89 300 cpuid
cannam@89 301 shr eax,8
cannam@89 302 and eax,15
cannam@89 303 cmp eax,6
cannam@89 304 jne L_dont_use_mmx
cannam@89 305 test edx,0800000h
cannam@89 306 jnz L_use_mmx
cannam@89 307 jmp L_dont_use_mmx
cannam@89 308 L_use_mmx:
cannam@89 309 mov dword ptr [inflate_fast_use_mmx],2
cannam@89 310 jmp L_check_mmx_pop
cannam@89 311 L_dont_use_mmx:
cannam@89 312 mov dword ptr [inflate_fast_use_mmx],3
cannam@89 313 L_check_mmx_pop:
cannam@89 314 pop edx
cannam@89 315 pop ecx
cannam@89 316 pop ebx
cannam@89 317 pop eax
cannam@89 318 jmp L_check_mmx
cannam@89 319 ; 426 "inffast.S"
cannam@89 320 ALIGN 4
cannam@89 321 L_do_loop:
cannam@89 322 ; 437 "inffast.S"
cannam@89 323 cmp bl,15
cannam@89 324 ja L_get_length_code
cannam@89 325
cannam@89 326 xor eax,eax
cannam@89 327 lodsw
cannam@89 328 mov cl,bl
cannam@89 329 add bl,16
cannam@89 330 shl eax,cl
cannam@89 331 or ebp,eax
cannam@89 332
cannam@89 333 L_get_length_code:
cannam@89 334 mov edx, [esp+0]
cannam@89 335 mov ecx, [esp+8]
cannam@89 336 and edx,ebp
cannam@89 337 mov eax, [ecx+edx*4]
cannam@89 338
cannam@89 339 L_dolen:
cannam@89 340
cannam@89 341
cannam@89 342
cannam@89 343
cannam@89 344
cannam@89 345
cannam@89 346 mov cl,ah
cannam@89 347 sub bl,ah
cannam@89 348 shr ebp,cl
cannam@89 349
cannam@89 350
cannam@89 351
cannam@89 352
cannam@89 353
cannam@89 354
cannam@89 355 test al,al
cannam@89 356 jnz L_test_for_length_base
cannam@89 357
cannam@89 358 shr eax,16
cannam@89 359 stosb
cannam@89 360
cannam@89 361 L_while_test:
cannam@89 362
cannam@89 363
cannam@89 364 cmp [esp+16],edi
cannam@89 365 jbe L_break_loop
cannam@89 366
cannam@89 367 cmp [esp+20],esi
cannam@89 368 ja L_do_loop
cannam@89 369 jmp L_break_loop
cannam@89 370
cannam@89 371 L_test_for_length_base:
cannam@89 372 ; 502 "inffast.S"
cannam@89 373 mov edx,eax
cannam@89 374 shr edx,16
cannam@89 375 mov cl,al
cannam@89 376
cannam@89 377 test al,16
cannam@89 378 jz L_test_for_second_level_length
cannam@89 379 and cl,15
cannam@89 380 jz L_save_len
cannam@89 381 cmp bl,cl
cannam@89 382 jae L_add_bits_to_len
cannam@89 383
cannam@89 384 mov ch,cl
cannam@89 385 xor eax,eax
cannam@89 386 lodsw
cannam@89 387 mov cl,bl
cannam@89 388 add bl,16
cannam@89 389 shl eax,cl
cannam@89 390 or ebp,eax
cannam@89 391 mov cl,ch
cannam@89 392
cannam@89 393 L_add_bits_to_len:
cannam@89 394 mov eax,1
cannam@89 395 shl eax,cl
cannam@89 396 dec eax
cannam@89 397 sub bl,cl
cannam@89 398 and eax,ebp
cannam@89 399 shr ebp,cl
cannam@89 400 add edx,eax
cannam@89 401
cannam@89 402 L_save_len:
cannam@89 403 mov [esp+24],edx
cannam@89 404
cannam@89 405
cannam@89 406 L_decode_distance:
cannam@89 407 ; 549 "inffast.S"
cannam@89 408 cmp bl,15
cannam@89 409 ja L_get_distance_code
cannam@89 410
cannam@89 411 xor eax,eax
cannam@89 412 lodsw
cannam@89 413 mov cl,bl
cannam@89 414 add bl,16
cannam@89 415 shl eax,cl
cannam@89 416 or ebp,eax
cannam@89 417
cannam@89 418 L_get_distance_code:
cannam@89 419 mov edx, [esp+4]
cannam@89 420 mov ecx, [esp+12]
cannam@89 421 and edx,ebp
cannam@89 422 mov eax, [ecx+edx*4]
cannam@89 423
cannam@89 424
cannam@89 425 L_dodist:
cannam@89 426 mov edx,eax
cannam@89 427 shr edx,16
cannam@89 428 mov cl,ah
cannam@89 429 sub bl,ah
cannam@89 430 shr ebp,cl
cannam@89 431 ; 584 "inffast.S"
cannam@89 432 mov cl,al
cannam@89 433
cannam@89 434 test al,16
cannam@89 435 jz L_test_for_second_level_dist
cannam@89 436 and cl,15
cannam@89 437 jz L_check_dist_one
cannam@89 438 cmp bl,cl
cannam@89 439 jae L_add_bits_to_dist
cannam@89 440
cannam@89 441 mov ch,cl
cannam@89 442 xor eax,eax
cannam@89 443 lodsw
cannam@89 444 mov cl,bl
cannam@89 445 add bl,16
cannam@89 446 shl eax,cl
cannam@89 447 or ebp,eax
cannam@89 448 mov cl,ch
cannam@89 449
cannam@89 450 L_add_bits_to_dist:
cannam@89 451 mov eax,1
cannam@89 452 shl eax,cl
cannam@89 453 dec eax
cannam@89 454 sub bl,cl
cannam@89 455 and eax,ebp
cannam@89 456 shr ebp,cl
cannam@89 457 add edx,eax
cannam@89 458 jmp L_check_window
cannam@89 459
cannam@89 460 L_check_window:
cannam@89 461 ; 625 "inffast.S"
cannam@89 462 mov [esp+44],esi
cannam@89 463 mov eax,edi
cannam@89 464 sub eax, [esp+40]
cannam@89 465
cannam@89 466 cmp eax,edx
cannam@89 467 jb L_clip_window
cannam@89 468
cannam@89 469 mov ecx, [esp+24]
cannam@89 470 mov esi,edi
cannam@89 471 sub esi,edx
cannam@89 472
cannam@89 473 sub ecx,3
cannam@89 474 mov al, [esi]
cannam@89 475 mov [edi],al
cannam@89 476 mov al, [esi+1]
cannam@89 477 mov dl, [esi+2]
cannam@89 478 add esi,3
cannam@89 479 mov [edi+1],al
cannam@89 480 mov [edi+2],dl
cannam@89 481 add edi,3
cannam@89 482 rep movsb
cannam@89 483
cannam@89 484 mov esi, [esp+44]
cannam@89 485 jmp L_while_test
cannam@89 486
cannam@89 487 ALIGN 4
cannam@89 488 L_check_dist_one:
cannam@89 489 cmp edx,1
cannam@89 490 jne L_check_window
cannam@89 491 cmp [esp+40],edi
cannam@89 492 je L_check_window
cannam@89 493
cannam@89 494 dec edi
cannam@89 495 mov ecx, [esp+24]
cannam@89 496 mov al, [edi]
cannam@89 497 sub ecx,3
cannam@89 498
cannam@89 499 mov [edi+1],al
cannam@89 500 mov [edi+2],al
cannam@89 501 mov [edi+3],al
cannam@89 502 add edi,4
cannam@89 503 rep stosb
cannam@89 504
cannam@89 505 jmp L_while_test
cannam@89 506
cannam@89 507 ALIGN 4
cannam@89 508 L_test_for_second_level_length:
cannam@89 509
cannam@89 510
cannam@89 511
cannam@89 512
cannam@89 513 test al,64
cannam@89 514 jnz L_test_for_end_of_block
cannam@89 515
cannam@89 516 mov eax,1
cannam@89 517 shl eax,cl
cannam@89 518 dec eax
cannam@89 519 and eax,ebp
cannam@89 520 add eax,edx
cannam@89 521 mov edx, [esp+8]
cannam@89 522 mov eax, [edx+eax*4]
cannam@89 523 jmp L_dolen
cannam@89 524
cannam@89 525 ALIGN 4
cannam@89 526 L_test_for_second_level_dist:
cannam@89 527
cannam@89 528
cannam@89 529
cannam@89 530
cannam@89 531 test al,64
cannam@89 532 jnz L_invalid_distance_code
cannam@89 533
cannam@89 534 mov eax,1
cannam@89 535 shl eax,cl
cannam@89 536 dec eax
cannam@89 537 and eax,ebp
cannam@89 538 add eax,edx
cannam@89 539 mov edx, [esp+12]
cannam@89 540 mov eax, [edx+eax*4]
cannam@89 541 jmp L_dodist
cannam@89 542
cannam@89 543 ALIGN 4
cannam@89 544 L_clip_window:
cannam@89 545 ; 721 "inffast.S"
cannam@89 546 mov ecx,eax
cannam@89 547 mov eax, [esp+52]
cannam@89 548 neg ecx
cannam@89 549 mov esi, [esp+56]
cannam@89 550
cannam@89 551 cmp eax,edx
cannam@89 552 jb L_invalid_distance_too_far
cannam@89 553
cannam@89 554 add ecx,edx
cannam@89 555 cmp dword ptr [esp+48],0
cannam@89 556 jne L_wrap_around_window
cannam@89 557
cannam@89 558 sub eax,ecx
cannam@89 559 add esi,eax
cannam@89 560 ; 749 "inffast.S"
cannam@89 561 mov eax, [esp+24]
cannam@89 562 cmp eax,ecx
cannam@89 563 jbe L_do_copy1
cannam@89 564
cannam@89 565 sub eax,ecx
cannam@89 566 rep movsb
cannam@89 567 mov esi,edi
cannam@89 568 sub esi,edx
cannam@89 569 jmp L_do_copy1
cannam@89 570
cannam@89 571 cmp eax,ecx
cannam@89 572 jbe L_do_copy1
cannam@89 573
cannam@89 574 sub eax,ecx
cannam@89 575 rep movsb
cannam@89 576 mov esi,edi
cannam@89 577 sub esi,edx
cannam@89 578 jmp L_do_copy1
cannam@89 579
cannam@89 580 L_wrap_around_window:
cannam@89 581 ; 793 "inffast.S"
cannam@89 582 mov eax, [esp+48]
cannam@89 583 cmp ecx,eax
cannam@89 584 jbe L_contiguous_in_window
cannam@89 585
cannam@89 586 add esi, [esp+52]
cannam@89 587 add esi,eax
cannam@89 588 sub esi,ecx
cannam@89 589 sub ecx,eax
cannam@89 590
cannam@89 591
cannam@89 592 mov eax, [esp+24]
cannam@89 593 cmp eax,ecx
cannam@89 594 jbe L_do_copy1
cannam@89 595
cannam@89 596 sub eax,ecx
cannam@89 597 rep movsb
cannam@89 598 mov esi, [esp+56]
cannam@89 599 mov ecx, [esp+48]
cannam@89 600 cmp eax,ecx
cannam@89 601 jbe L_do_copy1
cannam@89 602
cannam@89 603 sub eax,ecx
cannam@89 604 rep movsb
cannam@89 605 mov esi,edi
cannam@89 606 sub esi,edx
cannam@89 607 jmp L_do_copy1
cannam@89 608
cannam@89 609 L_contiguous_in_window:
cannam@89 610 ; 836 "inffast.S"
cannam@89 611 add esi,eax
cannam@89 612 sub esi,ecx
cannam@89 613
cannam@89 614
cannam@89 615 mov eax, [esp+24]
cannam@89 616 cmp eax,ecx
cannam@89 617 jbe L_do_copy1
cannam@89 618
cannam@89 619 sub eax,ecx
cannam@89 620 rep movsb
cannam@89 621 mov esi,edi
cannam@89 622 sub esi,edx
cannam@89 623
cannam@89 624 L_do_copy1:
cannam@89 625 ; 862 "inffast.S"
cannam@89 626 mov ecx,eax
cannam@89 627 rep movsb
cannam@89 628
cannam@89 629 mov esi, [esp+44]
cannam@89 630 jmp L_while_test
cannam@89 631 ; 878 "inffast.S"
cannam@89 632 ALIGN 4
cannam@89 633 L_init_mmx:
cannam@89 634 emms
cannam@89 635
cannam@89 636
cannam@89 637
cannam@89 638
cannam@89 639
cannam@89 640 movd mm0,ebp
cannam@89 641 mov ebp,ebx
cannam@89 642 ; 896 "inffast.S"
cannam@89 643 movd mm4,dword ptr [esp+0]
cannam@89 644 movq mm3,mm4
cannam@89 645 movd mm5,dword ptr [esp+4]
cannam@89 646 movq mm2,mm5
cannam@89 647 pxor mm1,mm1
cannam@89 648 mov ebx, [esp+8]
cannam@89 649 jmp L_do_loop_mmx
cannam@89 650
cannam@89 651 ALIGN 4
cannam@89 652 L_do_loop_mmx:
cannam@89 653 psrlq mm0,mm1
cannam@89 654
cannam@89 655 cmp ebp,32
cannam@89 656 ja L_get_length_code_mmx
cannam@89 657
cannam@89 658 movd mm6,ebp
cannam@89 659 movd mm7,dword ptr [esi]
cannam@89 660 add esi,4
cannam@89 661 psllq mm7,mm6
cannam@89 662 add ebp,32
cannam@89 663 por mm0,mm7
cannam@89 664
cannam@89 665 L_get_length_code_mmx:
cannam@89 666 pand mm4,mm0
cannam@89 667 movd eax,mm4
cannam@89 668 movq mm4,mm3
cannam@89 669 mov eax, [ebx+eax*4]
cannam@89 670
cannam@89 671 L_dolen_mmx:
cannam@89 672 movzx ecx,ah
cannam@89 673 movd mm1,ecx
cannam@89 674 sub ebp,ecx
cannam@89 675
cannam@89 676 test al,al
cannam@89 677 jnz L_test_for_length_base_mmx
cannam@89 678
cannam@89 679 shr eax,16
cannam@89 680 stosb
cannam@89 681
cannam@89 682 L_while_test_mmx:
cannam@89 683
cannam@89 684
cannam@89 685 cmp [esp+16],edi
cannam@89 686 jbe L_break_loop
cannam@89 687
cannam@89 688 cmp [esp+20],esi
cannam@89 689 ja L_do_loop_mmx
cannam@89 690 jmp L_break_loop
cannam@89 691
cannam@89 692 L_test_for_length_base_mmx:
cannam@89 693
cannam@89 694 mov edx,eax
cannam@89 695 shr edx,16
cannam@89 696
cannam@89 697 test al,16
cannam@89 698 jz L_test_for_second_level_length_mmx
cannam@89 699 and eax,15
cannam@89 700 jz L_decode_distance_mmx
cannam@89 701
cannam@89 702 psrlq mm0,mm1
cannam@89 703 movd mm1,eax
cannam@89 704 movd ecx,mm0
cannam@89 705 sub ebp,eax
cannam@89 706 and ecx, [inflate_fast_mask+eax*4]
cannam@89 707 add edx,ecx
cannam@89 708
cannam@89 709 L_decode_distance_mmx:
cannam@89 710 psrlq mm0,mm1
cannam@89 711
cannam@89 712 cmp ebp,32
cannam@89 713 ja L_get_dist_code_mmx
cannam@89 714
cannam@89 715 movd mm6,ebp
cannam@89 716 movd mm7,dword ptr [esi]
cannam@89 717 add esi,4
cannam@89 718 psllq mm7,mm6
cannam@89 719 add ebp,32
cannam@89 720 por mm0,mm7
cannam@89 721
cannam@89 722 L_get_dist_code_mmx:
cannam@89 723 mov ebx, [esp+12]
cannam@89 724 pand mm5,mm0
cannam@89 725 movd eax,mm5
cannam@89 726 movq mm5,mm2
cannam@89 727 mov eax, [ebx+eax*4]
cannam@89 728
cannam@89 729 L_dodist_mmx:
cannam@89 730
cannam@89 731 movzx ecx,ah
cannam@89 732 mov ebx,eax
cannam@89 733 shr ebx,16
cannam@89 734 sub ebp,ecx
cannam@89 735 movd mm1,ecx
cannam@89 736
cannam@89 737 test al,16
cannam@89 738 jz L_test_for_second_level_dist_mmx
cannam@89 739 and eax,15
cannam@89 740 jz L_check_dist_one_mmx
cannam@89 741
cannam@89 742 L_add_bits_to_dist_mmx:
cannam@89 743 psrlq mm0,mm1
cannam@89 744 movd mm1,eax
cannam@89 745 movd ecx,mm0
cannam@89 746 sub ebp,eax
cannam@89 747 and ecx, [inflate_fast_mask+eax*4]
cannam@89 748 add ebx,ecx
cannam@89 749
cannam@89 750 L_check_window_mmx:
cannam@89 751 mov [esp+44],esi
cannam@89 752 mov eax,edi
cannam@89 753 sub eax, [esp+40]
cannam@89 754
cannam@89 755 cmp eax,ebx
cannam@89 756 jb L_clip_window_mmx
cannam@89 757
cannam@89 758 mov ecx,edx
cannam@89 759 mov esi,edi
cannam@89 760 sub esi,ebx
cannam@89 761
cannam@89 762 sub ecx,3
cannam@89 763 mov al, [esi]
cannam@89 764 mov [edi],al
cannam@89 765 mov al, [esi+1]
cannam@89 766 mov dl, [esi+2]
cannam@89 767 add esi,3
cannam@89 768 mov [edi+1],al
cannam@89 769 mov [edi+2],dl
cannam@89 770 add edi,3
cannam@89 771 rep movsb
cannam@89 772
cannam@89 773 mov esi, [esp+44]
cannam@89 774 mov ebx, [esp+8]
cannam@89 775 jmp L_while_test_mmx
cannam@89 776
cannam@89 777 ALIGN 4
cannam@89 778 L_check_dist_one_mmx:
cannam@89 779 cmp ebx,1
cannam@89 780 jne L_check_window_mmx
cannam@89 781 cmp [esp+40],edi
cannam@89 782 je L_check_window_mmx
cannam@89 783
cannam@89 784 dec edi
cannam@89 785 mov ecx,edx
cannam@89 786 mov al, [edi]
cannam@89 787 sub ecx,3
cannam@89 788
cannam@89 789 mov [edi+1],al
cannam@89 790 mov [edi+2],al
cannam@89 791 mov [edi+3],al
cannam@89 792 add edi,4
cannam@89 793 rep stosb
cannam@89 794
cannam@89 795 mov ebx, [esp+8]
cannam@89 796 jmp L_while_test_mmx
cannam@89 797
cannam@89 798 ALIGN 4
cannam@89 799 L_test_for_second_level_length_mmx:
cannam@89 800 test al,64
cannam@89 801 jnz L_test_for_end_of_block
cannam@89 802
cannam@89 803 and eax,15
cannam@89 804 psrlq mm0,mm1
cannam@89 805 movd ecx,mm0
cannam@89 806 and ecx, [inflate_fast_mask+eax*4]
cannam@89 807 add ecx,edx
cannam@89 808 mov eax, [ebx+ecx*4]
cannam@89 809 jmp L_dolen_mmx
cannam@89 810
cannam@89 811 ALIGN 4
cannam@89 812 L_test_for_second_level_dist_mmx:
cannam@89 813 test al,64
cannam@89 814 jnz L_invalid_distance_code
cannam@89 815
cannam@89 816 and eax,15
cannam@89 817 psrlq mm0,mm1
cannam@89 818 movd ecx,mm0
cannam@89 819 and ecx, [inflate_fast_mask+eax*4]
cannam@89 820 mov eax, [esp+12]
cannam@89 821 add ecx,ebx
cannam@89 822 mov eax, [eax+ecx*4]
cannam@89 823 jmp L_dodist_mmx
cannam@89 824
cannam@89 825 ALIGN 4
cannam@89 826 L_clip_window_mmx:
cannam@89 827
cannam@89 828 mov ecx,eax
cannam@89 829 mov eax, [esp+52]
cannam@89 830 neg ecx
cannam@89 831 mov esi, [esp+56]
cannam@89 832
cannam@89 833 cmp eax,ebx
cannam@89 834 jb L_invalid_distance_too_far
cannam@89 835
cannam@89 836 add ecx,ebx
cannam@89 837 cmp dword ptr [esp+48],0
cannam@89 838 jne L_wrap_around_window_mmx
cannam@89 839
cannam@89 840 sub eax,ecx
cannam@89 841 add esi,eax
cannam@89 842
cannam@89 843 cmp edx,ecx
cannam@89 844 jbe L_do_copy1_mmx
cannam@89 845
cannam@89 846 sub edx,ecx
cannam@89 847 rep movsb
cannam@89 848 mov esi,edi
cannam@89 849 sub esi,ebx
cannam@89 850 jmp L_do_copy1_mmx
cannam@89 851
cannam@89 852 cmp edx,ecx
cannam@89 853 jbe L_do_copy1_mmx
cannam@89 854
cannam@89 855 sub edx,ecx
cannam@89 856 rep movsb
cannam@89 857 mov esi,edi
cannam@89 858 sub esi,ebx
cannam@89 859 jmp L_do_copy1_mmx
cannam@89 860
cannam@89 861 L_wrap_around_window_mmx:
cannam@89 862
cannam@89 863 mov eax, [esp+48]
cannam@89 864 cmp ecx,eax
cannam@89 865 jbe L_contiguous_in_window_mmx
cannam@89 866
cannam@89 867 add esi, [esp+52]
cannam@89 868 add esi,eax
cannam@89 869 sub esi,ecx
cannam@89 870 sub ecx,eax
cannam@89 871
cannam@89 872
cannam@89 873 cmp edx,ecx
cannam@89 874 jbe L_do_copy1_mmx
cannam@89 875
cannam@89 876 sub edx,ecx
cannam@89 877 rep movsb
cannam@89 878 mov esi, [esp+56]
cannam@89 879 mov ecx, [esp+48]
cannam@89 880 cmp edx,ecx
cannam@89 881 jbe L_do_copy1_mmx
cannam@89 882
cannam@89 883 sub edx,ecx
cannam@89 884 rep movsb
cannam@89 885 mov esi,edi
cannam@89 886 sub esi,ebx
cannam@89 887 jmp L_do_copy1_mmx
cannam@89 888
cannam@89 889 L_contiguous_in_window_mmx:
cannam@89 890
cannam@89 891 add esi,eax
cannam@89 892 sub esi,ecx
cannam@89 893
cannam@89 894
cannam@89 895 cmp edx,ecx
cannam@89 896 jbe L_do_copy1_mmx
cannam@89 897
cannam@89 898 sub edx,ecx
cannam@89 899 rep movsb
cannam@89 900 mov esi,edi
cannam@89 901 sub esi,ebx
cannam@89 902
cannam@89 903 L_do_copy1_mmx:
cannam@89 904
cannam@89 905
cannam@89 906 mov ecx,edx
cannam@89 907 rep movsb
cannam@89 908
cannam@89 909 mov esi, [esp+44]
cannam@89 910 mov ebx, [esp+8]
cannam@89 911 jmp L_while_test_mmx
cannam@89 912 ; 1174 "inffast.S"
cannam@89 913 L_invalid_distance_code:
cannam@89 914
cannam@89 915
cannam@89 916
cannam@89 917
cannam@89 918
cannam@89 919 mov ecx, invalid_distance_code_msg
cannam@89 920 mov edx,INFLATE_MODE_BAD
cannam@89 921 jmp L_update_stream_state
cannam@89 922
cannam@89 923 L_test_for_end_of_block:
cannam@89 924
cannam@89 925
cannam@89 926
cannam@89 927
cannam@89 928
cannam@89 929 test al,32
cannam@89 930 jz L_invalid_literal_length_code
cannam@89 931
cannam@89 932 mov ecx,0
cannam@89 933 mov edx,INFLATE_MODE_TYPE
cannam@89 934 jmp L_update_stream_state
cannam@89 935
cannam@89 936 L_invalid_literal_length_code:
cannam@89 937
cannam@89 938
cannam@89 939
cannam@89 940
cannam@89 941
cannam@89 942 mov ecx, invalid_literal_length_code_msg
cannam@89 943 mov edx,INFLATE_MODE_BAD
cannam@89 944 jmp L_update_stream_state
cannam@89 945
cannam@89 946 L_invalid_distance_too_far:
cannam@89 947
cannam@89 948
cannam@89 949
cannam@89 950 mov esi, [esp+44]
cannam@89 951 mov ecx, invalid_distance_too_far_msg
cannam@89 952 mov edx,INFLATE_MODE_BAD
cannam@89 953 jmp L_update_stream_state
cannam@89 954
cannam@89 955 L_update_stream_state:
cannam@89 956
cannam@89 957 mov eax, [esp+88]
cannam@89 958 test ecx,ecx
cannam@89 959 jz L_skip_msg
cannam@89 960 mov [eax+24],ecx
cannam@89 961 L_skip_msg:
cannam@89 962 mov eax, [eax+28]
cannam@89 963 mov [eax+mode_state],edx
cannam@89 964 jmp L_break_loop
cannam@89 965
cannam@89 966 ALIGN 4
cannam@89 967 L_break_loop:
cannam@89 968 ; 1243 "inffast.S"
cannam@89 969 cmp dword ptr [inflate_fast_use_mmx],2
cannam@89 970 jne L_update_next_in
cannam@89 971
cannam@89 972
cannam@89 973
cannam@89 974 mov ebx,ebp
cannam@89 975
cannam@89 976 L_update_next_in:
cannam@89 977 ; 1266 "inffast.S"
cannam@89 978 mov eax, [esp+88]
cannam@89 979 mov ecx,ebx
cannam@89 980 mov edx, [eax+28]
cannam@89 981 shr ecx,3
cannam@89 982 sub esi,ecx
cannam@89 983 shl ecx,3
cannam@89 984 sub ebx,ecx
cannam@89 985 mov [eax+12],edi
cannam@89 986 mov [edx+bits_state],ebx
cannam@89 987 mov ecx,ebx
cannam@89 988
cannam@89 989 lea ebx, [esp+28]
cannam@89 990 cmp [esp+20],ebx
cannam@89 991 jne L_buf_not_used
cannam@89 992
cannam@89 993 sub esi,ebx
cannam@89 994 mov ebx, [eax+0]
cannam@89 995 mov [esp+20],ebx
cannam@89 996 add esi,ebx
cannam@89 997 mov ebx, [eax+4]
cannam@89 998 sub ebx,11
cannam@89 999 add [esp+20],ebx
cannam@89 1000
cannam@89 1001 L_buf_not_used:
cannam@89 1002 mov [eax+0],esi
cannam@89 1003
cannam@89 1004 mov ebx,1
cannam@89 1005 shl ebx,cl
cannam@89 1006 dec ebx
cannam@89 1007
cannam@89 1008
cannam@89 1009
cannam@89 1010
cannam@89 1011
cannam@89 1012 cmp dword ptr [inflate_fast_use_mmx],2
cannam@89 1013 jne L_update_hold
cannam@89 1014
cannam@89 1015
cannam@89 1016
cannam@89 1017 psrlq mm0,mm1
cannam@89 1018 movd ebp,mm0
cannam@89 1019
cannam@89 1020 emms
cannam@89 1021
cannam@89 1022 L_update_hold:
cannam@89 1023
cannam@89 1024
cannam@89 1025
cannam@89 1026 and ebp,ebx
cannam@89 1027 mov [edx+hold_state],ebp
cannam@89 1028
cannam@89 1029
cannam@89 1030
cannam@89 1031
cannam@89 1032 mov ebx, [esp+20]
cannam@89 1033 cmp ebx,esi
cannam@89 1034 jbe L_last_is_smaller
cannam@89 1035
cannam@89 1036 sub ebx,esi
cannam@89 1037 add ebx,11
cannam@89 1038 mov [eax+4],ebx
cannam@89 1039 jmp L_fixup_out
cannam@89 1040 L_last_is_smaller:
cannam@89 1041 sub esi,ebx
cannam@89 1042 neg esi
cannam@89 1043 add esi,11
cannam@89 1044 mov [eax+4],esi
cannam@89 1045
cannam@89 1046
cannam@89 1047
cannam@89 1048
cannam@89 1049 L_fixup_out:
cannam@89 1050
cannam@89 1051 mov ebx, [esp+16]
cannam@89 1052 cmp ebx,edi
cannam@89 1053 jbe L_end_is_smaller
cannam@89 1054
cannam@89 1055 sub ebx,edi
cannam@89 1056 add ebx,257
cannam@89 1057 mov [eax+16],ebx
cannam@89 1058 jmp L_done
cannam@89 1059 L_end_is_smaller:
cannam@89 1060 sub edi,ebx
cannam@89 1061 neg edi
cannam@89 1062 add edi,257
cannam@89 1063 mov [eax+16],edi
cannam@89 1064
cannam@89 1065
cannam@89 1066
cannam@89 1067
cannam@89 1068
cannam@89 1069 L_done:
cannam@89 1070 add esp,64
cannam@89 1071 popfd
cannam@89 1072 pop ebx
cannam@89 1073 pop ebp
cannam@89 1074 pop esi
cannam@89 1075 pop edi
cannam@89 1076 ret
cannam@89 1077 _inflate_fast endp
cannam@89 1078
cannam@89 1079 _TEXT ends
cannam@89 1080 end