cannam@128: ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding cannam@128: ; * cannam@128: ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code cannam@128: ; * cannam@128: ; * Copyright (C) 1995-2003 Mark Adler cannam@128: ; * For conditions of distribution and use, see copyright notice in zlib.h cannam@128: ; * cannam@128: ; * Copyright (C) 2003 Chris Anderson cannam@128: ; * Please use the copyright conditions above. cannam@128: ; * cannam@128: ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from cannam@128: ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at cannam@128: ; * the moment. I have successfully compiled and tested this code with gcc2.96, cannam@128: ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S cannam@128: ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX cannam@128: ; * enabled. I will attempt to merge the MMX code into this version. Newer cannam@128: ; * versions of this and inffast.S can be found at cannam@128: ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ cannam@128: ; * cannam@128: ; * 2005 : modification by Gilles Vollant cannam@128: ; */ cannam@128: ; For Visual C++ 4.x and higher and ML 6.x and higher cannam@128: ; ml.exe is in directory \MASM611C of Win95 DDK cannam@128: ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm cannam@128: ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ cannam@128: ; cannam@128: ; cannam@128: ; compile with command line option cannam@128: ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm cannam@128: cannam@128: ; if you define NO_GZIP (see inflate.h), compile with cannam@128: ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm cannam@128: cannam@128: cannam@128: ; zlib122sup is 0 fort zlib 1.2.2.1 and lower cannam@128: ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head cannam@128: ; in inflate_state in inflate.h) cannam@128: zlib1222sup equ 8 cannam@128: cannam@128: cannam@128: IFDEF GUNZIP cannam@128: INFLATE_MODE_TYPE equ 11 cannam@128: INFLATE_MODE_BAD equ 26 cannam@128: ELSE cannam@128: IFNDEF NO_GUNZIP cannam@128: INFLATE_MODE_TYPE equ 11 cannam@128: INFLATE_MODE_BAD equ 26 cannam@128: ELSE cannam@128: INFLATE_MODE_TYPE equ 3 cannam@128: INFLATE_MODE_BAD equ 17 cannam@128: ENDIF cannam@128: ENDIF cannam@128: cannam@128: cannam@128: ; 75 "inffast.S" cannam@128: ;FILE "inffast.S" cannam@128: cannam@128: ;;;GLOBAL _inflate_fast cannam@128: cannam@128: ;;;SECTION .text cannam@128: cannam@128: cannam@128: cannam@128: .586p cannam@128: .mmx cannam@128: cannam@128: name inflate_fast_x86 cannam@128: .MODEL FLAT cannam@128: cannam@128: _DATA segment cannam@128: inflate_fast_use_mmx: cannam@128: dd 1 cannam@128: cannam@128: cannam@128: _TEXT segment cannam@128: cannam@128: cannam@128: cannam@128: ALIGN 4 cannam@128: db 'Fast decoding Code from Chris Anderson' cannam@128: db 0 cannam@128: cannam@128: ALIGN 4 cannam@128: invalid_literal_length_code_msg: cannam@128: db 'invalid literal/length code' cannam@128: db 0 cannam@128: cannam@128: ALIGN 4 cannam@128: invalid_distance_code_msg: cannam@128: db 'invalid distance code' cannam@128: db 0 cannam@128: cannam@128: ALIGN 4 cannam@128: invalid_distance_too_far_msg: cannam@128: db 'invalid distance too far back' cannam@128: db 0 cannam@128: cannam@128: cannam@128: ALIGN 4 cannam@128: inflate_fast_mask: cannam@128: dd 0 cannam@128: dd 1 cannam@128: dd 3 cannam@128: dd 7 cannam@128: dd 15 cannam@128: dd 31 cannam@128: dd 63 cannam@128: dd 127 cannam@128: dd 255 cannam@128: dd 511 cannam@128: dd 1023 cannam@128: dd 2047 cannam@128: dd 4095 cannam@128: dd 8191 cannam@128: dd 16383 cannam@128: dd 32767 cannam@128: dd 65535 cannam@128: dd 131071 cannam@128: dd 262143 cannam@128: dd 524287 cannam@128: dd 1048575 cannam@128: dd 2097151 cannam@128: dd 4194303 cannam@128: dd 8388607 cannam@128: dd 16777215 cannam@128: dd 33554431 cannam@128: dd 67108863 cannam@128: dd 134217727 cannam@128: dd 268435455 cannam@128: dd 536870911 cannam@128: dd 1073741823 cannam@128: dd 2147483647 cannam@128: dd 4294967295 cannam@128: cannam@128: cannam@128: mode_state equ 0 ;/* state->mode */ cannam@128: wsize_state equ (32+zlib1222sup) ;/* state->wsize */ cannam@128: write_state equ (36+4+zlib1222sup) ;/* state->write */ cannam@128: window_state equ (40+4+zlib1222sup) ;/* state->window */ cannam@128: hold_state equ (44+4+zlib1222sup) ;/* state->hold */ cannam@128: bits_state equ (48+4+zlib1222sup) ;/* state->bits */ cannam@128: lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ cannam@128: distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ cannam@128: lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ cannam@128: distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ cannam@128: cannam@128: cannam@128: ;;SECTION .text cannam@128: ; 205 "inffast.S" cannam@128: ;GLOBAL inflate_fast_use_mmx cannam@128: cannam@128: ;SECTION .data cannam@128: cannam@128: cannam@128: ; GLOBAL inflate_fast_use_mmx:object cannam@128: ;.size inflate_fast_use_mmx, 4 cannam@128: ; 226 "inffast.S" cannam@128: ;SECTION .text cannam@128: cannam@128: ALIGN 4 cannam@128: _inflate_fast proc near cannam@128: .FPO (16, 4, 0, 0, 1, 0) cannam@128: push edi cannam@128: push esi cannam@128: push ebp cannam@128: push ebx cannam@128: pushfd cannam@128: sub esp,64 cannam@128: cld cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov esi, [esp+88] cannam@128: mov edi, [esi+28] cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov edx, [esi+4] cannam@128: mov eax, [esi+0] cannam@128: cannam@128: add edx,eax cannam@128: sub edx,11 cannam@128: cannam@128: mov [esp+44],eax cannam@128: mov [esp+20],edx cannam@128: cannam@128: mov ebp, [esp+92] cannam@128: mov ecx, [esi+16] cannam@128: mov ebx, [esi+12] cannam@128: cannam@128: sub ebp,ecx cannam@128: neg ebp cannam@128: add ebp,ebx cannam@128: cannam@128: sub ecx,257 cannam@128: add ecx,ebx cannam@128: cannam@128: mov [esp+60],ebx cannam@128: mov [esp+40],ebp cannam@128: mov [esp+16],ecx cannam@128: ; 285 "inffast.S" cannam@128: mov eax, [edi+lencode_state] cannam@128: mov ecx, [edi+distcode_state] cannam@128: cannam@128: mov [esp+8],eax cannam@128: mov [esp+12],ecx cannam@128: cannam@128: mov eax,1 cannam@128: mov ecx, [edi+lenbits_state] cannam@128: shl eax,cl cannam@128: dec eax cannam@128: mov [esp+0],eax cannam@128: cannam@128: mov eax,1 cannam@128: mov ecx, [edi+distbits_state] cannam@128: shl eax,cl cannam@128: dec eax cannam@128: mov [esp+4],eax cannam@128: cannam@128: mov eax, [edi+wsize_state] cannam@128: mov ecx, [edi+write_state] cannam@128: mov edx, [edi+window_state] cannam@128: cannam@128: mov [esp+52],eax cannam@128: mov [esp+48],ecx cannam@128: mov [esp+56],edx cannam@128: cannam@128: mov ebp, [edi+hold_state] cannam@128: mov ebx, [edi+bits_state] cannam@128: ; 321 "inffast.S" cannam@128: mov esi, [esp+44] cannam@128: mov ecx, [esp+20] cannam@128: cmp ecx,esi cannam@128: ja L_align_long cannam@128: cannam@128: add ecx,11 cannam@128: sub ecx,esi cannam@128: mov eax,12 cannam@128: sub eax,ecx cannam@128: lea edi, [esp+28] cannam@128: rep movsb cannam@128: mov ecx,eax cannam@128: xor eax,eax cannam@128: rep stosb cannam@128: lea esi, [esp+28] cannam@128: mov [esp+20],esi cannam@128: jmp L_is_aligned cannam@128: cannam@128: cannam@128: L_align_long: cannam@128: test esi,3 cannam@128: jz L_is_aligned cannam@128: xor eax,eax cannam@128: mov al, [esi] cannam@128: inc esi cannam@128: mov ecx,ebx cannam@128: add ebx,8 cannam@128: shl eax,cl cannam@128: or ebp,eax cannam@128: jmp L_align_long cannam@128: cannam@128: L_is_aligned: cannam@128: mov edi, [esp+60] cannam@128: ; 366 "inffast.S" cannam@128: L_check_mmx: cannam@128: cmp dword ptr [inflate_fast_use_mmx],2 cannam@128: je L_init_mmx cannam@128: ja L_do_loop cannam@128: cannam@128: push eax cannam@128: push ebx cannam@128: push ecx cannam@128: push edx cannam@128: pushfd cannam@128: mov eax, [esp] cannam@128: xor dword ptr [esp],0200000h cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: popfd cannam@128: pushfd cannam@128: pop edx cannam@128: xor edx,eax cannam@128: jz L_dont_use_mmx cannam@128: xor eax,eax cannam@128: cpuid cannam@128: cmp ebx,0756e6547h cannam@128: jne L_dont_use_mmx cannam@128: cmp ecx,06c65746eh cannam@128: jne L_dont_use_mmx cannam@128: cmp edx,049656e69h cannam@128: jne L_dont_use_mmx cannam@128: mov eax,1 cannam@128: cpuid cannam@128: shr eax,8 cannam@128: and eax,15 cannam@128: cmp eax,6 cannam@128: jne L_dont_use_mmx cannam@128: test edx,0800000h cannam@128: jnz L_use_mmx cannam@128: jmp L_dont_use_mmx cannam@128: L_use_mmx: cannam@128: mov dword ptr [inflate_fast_use_mmx],2 cannam@128: jmp L_check_mmx_pop cannam@128: L_dont_use_mmx: cannam@128: mov dword ptr [inflate_fast_use_mmx],3 cannam@128: L_check_mmx_pop: cannam@128: pop edx cannam@128: pop ecx cannam@128: pop ebx cannam@128: pop eax cannam@128: jmp L_check_mmx cannam@128: ; 426 "inffast.S" cannam@128: ALIGN 4 cannam@128: L_do_loop: cannam@128: ; 437 "inffast.S" cannam@128: cmp bl,15 cannam@128: ja L_get_length_code cannam@128: cannam@128: xor eax,eax cannam@128: lodsw cannam@128: mov cl,bl cannam@128: add bl,16 cannam@128: shl eax,cl cannam@128: or ebp,eax cannam@128: cannam@128: L_get_length_code: cannam@128: mov edx, [esp+0] cannam@128: mov ecx, [esp+8] cannam@128: and edx,ebp cannam@128: mov eax, [ecx+edx*4] cannam@128: cannam@128: L_dolen: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov cl,ah cannam@128: sub bl,ah cannam@128: shr ebp,cl cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: test al,al cannam@128: jnz L_test_for_length_base cannam@128: cannam@128: shr eax,16 cannam@128: stosb cannam@128: cannam@128: L_while_test: cannam@128: cannam@128: cannam@128: cmp [esp+16],edi cannam@128: jbe L_break_loop cannam@128: cannam@128: cmp [esp+20],esi cannam@128: ja L_do_loop cannam@128: jmp L_break_loop cannam@128: cannam@128: L_test_for_length_base: cannam@128: ; 502 "inffast.S" cannam@128: mov edx,eax cannam@128: shr edx,16 cannam@128: mov cl,al cannam@128: cannam@128: test al,16 cannam@128: jz L_test_for_second_level_length cannam@128: and cl,15 cannam@128: jz L_save_len cannam@128: cmp bl,cl cannam@128: jae L_add_bits_to_len cannam@128: cannam@128: mov ch,cl cannam@128: xor eax,eax cannam@128: lodsw cannam@128: mov cl,bl cannam@128: add bl,16 cannam@128: shl eax,cl cannam@128: or ebp,eax cannam@128: mov cl,ch cannam@128: cannam@128: L_add_bits_to_len: cannam@128: mov eax,1 cannam@128: shl eax,cl cannam@128: dec eax cannam@128: sub bl,cl cannam@128: and eax,ebp cannam@128: shr ebp,cl cannam@128: add edx,eax cannam@128: cannam@128: L_save_len: cannam@128: mov [esp+24],edx cannam@128: cannam@128: cannam@128: L_decode_distance: cannam@128: ; 549 "inffast.S" cannam@128: cmp bl,15 cannam@128: ja L_get_distance_code cannam@128: cannam@128: xor eax,eax cannam@128: lodsw cannam@128: mov cl,bl cannam@128: add bl,16 cannam@128: shl eax,cl cannam@128: or ebp,eax cannam@128: cannam@128: L_get_distance_code: cannam@128: mov edx, [esp+4] cannam@128: mov ecx, [esp+12] cannam@128: and edx,ebp cannam@128: mov eax, [ecx+edx*4] cannam@128: cannam@128: cannam@128: L_dodist: cannam@128: mov edx,eax cannam@128: shr edx,16 cannam@128: mov cl,ah cannam@128: sub bl,ah cannam@128: shr ebp,cl cannam@128: ; 584 "inffast.S" cannam@128: mov cl,al cannam@128: cannam@128: test al,16 cannam@128: jz L_test_for_second_level_dist cannam@128: and cl,15 cannam@128: jz L_check_dist_one cannam@128: cmp bl,cl cannam@128: jae L_add_bits_to_dist cannam@128: cannam@128: mov ch,cl cannam@128: xor eax,eax cannam@128: lodsw cannam@128: mov cl,bl cannam@128: add bl,16 cannam@128: shl eax,cl cannam@128: or ebp,eax cannam@128: mov cl,ch cannam@128: cannam@128: L_add_bits_to_dist: cannam@128: mov eax,1 cannam@128: shl eax,cl cannam@128: dec eax cannam@128: sub bl,cl cannam@128: and eax,ebp cannam@128: shr ebp,cl cannam@128: add edx,eax cannam@128: jmp L_check_window cannam@128: cannam@128: L_check_window: cannam@128: ; 625 "inffast.S" cannam@128: mov [esp+44],esi cannam@128: mov eax,edi cannam@128: sub eax, [esp+40] cannam@128: cannam@128: cmp eax,edx cannam@128: jb L_clip_window cannam@128: cannam@128: mov ecx, [esp+24] cannam@128: mov esi,edi cannam@128: sub esi,edx cannam@128: cannam@128: sub ecx,3 cannam@128: mov al, [esi] cannam@128: mov [edi],al cannam@128: mov al, [esi+1] cannam@128: mov dl, [esi+2] cannam@128: add esi,3 cannam@128: mov [edi+1],al cannam@128: mov [edi+2],dl cannam@128: add edi,3 cannam@128: rep movsb cannam@128: cannam@128: mov esi, [esp+44] cannam@128: jmp L_while_test cannam@128: cannam@128: ALIGN 4 cannam@128: L_check_dist_one: cannam@128: cmp edx,1 cannam@128: jne L_check_window cannam@128: cmp [esp+40],edi cannam@128: je L_check_window cannam@128: cannam@128: dec edi cannam@128: mov ecx, [esp+24] cannam@128: mov al, [edi] cannam@128: sub ecx,3 cannam@128: cannam@128: mov [edi+1],al cannam@128: mov [edi+2],al cannam@128: mov [edi+3],al cannam@128: add edi,4 cannam@128: rep stosb cannam@128: cannam@128: jmp L_while_test cannam@128: cannam@128: ALIGN 4 cannam@128: L_test_for_second_level_length: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: test al,64 cannam@128: jnz L_test_for_end_of_block cannam@128: cannam@128: mov eax,1 cannam@128: shl eax,cl cannam@128: dec eax cannam@128: and eax,ebp cannam@128: add eax,edx cannam@128: mov edx, [esp+8] cannam@128: mov eax, [edx+eax*4] cannam@128: jmp L_dolen cannam@128: cannam@128: ALIGN 4 cannam@128: L_test_for_second_level_dist: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: test al,64 cannam@128: jnz L_invalid_distance_code cannam@128: cannam@128: mov eax,1 cannam@128: shl eax,cl cannam@128: dec eax cannam@128: and eax,ebp cannam@128: add eax,edx cannam@128: mov edx, [esp+12] cannam@128: mov eax, [edx+eax*4] cannam@128: jmp L_dodist cannam@128: cannam@128: ALIGN 4 cannam@128: L_clip_window: cannam@128: ; 721 "inffast.S" cannam@128: mov ecx,eax cannam@128: mov eax, [esp+52] cannam@128: neg ecx cannam@128: mov esi, [esp+56] cannam@128: cannam@128: cmp eax,edx cannam@128: jb L_invalid_distance_too_far cannam@128: cannam@128: add ecx,edx cannam@128: cmp dword ptr [esp+48],0 cannam@128: jne L_wrap_around_window cannam@128: cannam@128: sub eax,ecx cannam@128: add esi,eax cannam@128: ; 749 "inffast.S" cannam@128: mov eax, [esp+24] cannam@128: cmp eax,ecx cannam@128: jbe L_do_copy1 cannam@128: cannam@128: sub eax,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,edx cannam@128: jmp L_do_copy1 cannam@128: cannam@128: cmp eax,ecx cannam@128: jbe L_do_copy1 cannam@128: cannam@128: sub eax,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,edx cannam@128: jmp L_do_copy1 cannam@128: cannam@128: L_wrap_around_window: cannam@128: ; 793 "inffast.S" cannam@128: mov eax, [esp+48] cannam@128: cmp ecx,eax cannam@128: jbe L_contiguous_in_window cannam@128: cannam@128: add esi, [esp+52] cannam@128: add esi,eax cannam@128: sub esi,ecx cannam@128: sub ecx,eax cannam@128: cannam@128: cannam@128: mov eax, [esp+24] cannam@128: cmp eax,ecx cannam@128: jbe L_do_copy1 cannam@128: cannam@128: sub eax,ecx cannam@128: rep movsb cannam@128: mov esi, [esp+56] cannam@128: mov ecx, [esp+48] cannam@128: cmp eax,ecx cannam@128: jbe L_do_copy1 cannam@128: cannam@128: sub eax,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,edx cannam@128: jmp L_do_copy1 cannam@128: cannam@128: L_contiguous_in_window: cannam@128: ; 836 "inffast.S" cannam@128: add esi,eax cannam@128: sub esi,ecx cannam@128: cannam@128: cannam@128: mov eax, [esp+24] cannam@128: cmp eax,ecx cannam@128: jbe L_do_copy1 cannam@128: cannam@128: sub eax,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,edx cannam@128: cannam@128: L_do_copy1: cannam@128: ; 862 "inffast.S" cannam@128: mov ecx,eax cannam@128: rep movsb cannam@128: cannam@128: mov esi, [esp+44] cannam@128: jmp L_while_test cannam@128: ; 878 "inffast.S" cannam@128: ALIGN 4 cannam@128: L_init_mmx: cannam@128: emms cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: movd mm0,ebp cannam@128: mov ebp,ebx cannam@128: ; 896 "inffast.S" cannam@128: movd mm4,dword ptr [esp+0] cannam@128: movq mm3,mm4 cannam@128: movd mm5,dword ptr [esp+4] cannam@128: movq mm2,mm5 cannam@128: pxor mm1,mm1 cannam@128: mov ebx, [esp+8] cannam@128: jmp L_do_loop_mmx cannam@128: cannam@128: ALIGN 4 cannam@128: L_do_loop_mmx: cannam@128: psrlq mm0,mm1 cannam@128: cannam@128: cmp ebp,32 cannam@128: ja L_get_length_code_mmx cannam@128: cannam@128: movd mm6,ebp cannam@128: movd mm7,dword ptr [esi] cannam@128: add esi,4 cannam@128: psllq mm7,mm6 cannam@128: add ebp,32 cannam@128: por mm0,mm7 cannam@128: cannam@128: L_get_length_code_mmx: cannam@128: pand mm4,mm0 cannam@128: movd eax,mm4 cannam@128: movq mm4,mm3 cannam@128: mov eax, [ebx+eax*4] cannam@128: cannam@128: L_dolen_mmx: cannam@128: movzx ecx,ah cannam@128: movd mm1,ecx cannam@128: sub ebp,ecx cannam@128: cannam@128: test al,al cannam@128: jnz L_test_for_length_base_mmx cannam@128: cannam@128: shr eax,16 cannam@128: stosb cannam@128: cannam@128: L_while_test_mmx: cannam@128: cannam@128: cannam@128: cmp [esp+16],edi cannam@128: jbe L_break_loop cannam@128: cannam@128: cmp [esp+20],esi cannam@128: ja L_do_loop_mmx cannam@128: jmp L_break_loop cannam@128: cannam@128: L_test_for_length_base_mmx: cannam@128: cannam@128: mov edx,eax cannam@128: shr edx,16 cannam@128: cannam@128: test al,16 cannam@128: jz L_test_for_second_level_length_mmx cannam@128: and eax,15 cannam@128: jz L_decode_distance_mmx cannam@128: cannam@128: psrlq mm0,mm1 cannam@128: movd mm1,eax cannam@128: movd ecx,mm0 cannam@128: sub ebp,eax cannam@128: and ecx, [inflate_fast_mask+eax*4] cannam@128: add edx,ecx cannam@128: cannam@128: L_decode_distance_mmx: cannam@128: psrlq mm0,mm1 cannam@128: cannam@128: cmp ebp,32 cannam@128: ja L_get_dist_code_mmx cannam@128: cannam@128: movd mm6,ebp cannam@128: movd mm7,dword ptr [esi] cannam@128: add esi,4 cannam@128: psllq mm7,mm6 cannam@128: add ebp,32 cannam@128: por mm0,mm7 cannam@128: cannam@128: L_get_dist_code_mmx: cannam@128: mov ebx, [esp+12] cannam@128: pand mm5,mm0 cannam@128: movd eax,mm5 cannam@128: movq mm5,mm2 cannam@128: mov eax, [ebx+eax*4] cannam@128: cannam@128: L_dodist_mmx: cannam@128: cannam@128: movzx ecx,ah cannam@128: mov ebx,eax cannam@128: shr ebx,16 cannam@128: sub ebp,ecx cannam@128: movd mm1,ecx cannam@128: cannam@128: test al,16 cannam@128: jz L_test_for_second_level_dist_mmx cannam@128: and eax,15 cannam@128: jz L_check_dist_one_mmx cannam@128: cannam@128: L_add_bits_to_dist_mmx: cannam@128: psrlq mm0,mm1 cannam@128: movd mm1,eax cannam@128: movd ecx,mm0 cannam@128: sub ebp,eax cannam@128: and ecx, [inflate_fast_mask+eax*4] cannam@128: add ebx,ecx cannam@128: cannam@128: L_check_window_mmx: cannam@128: mov [esp+44],esi cannam@128: mov eax,edi cannam@128: sub eax, [esp+40] cannam@128: cannam@128: cmp eax,ebx cannam@128: jb L_clip_window_mmx cannam@128: cannam@128: mov ecx,edx cannam@128: mov esi,edi cannam@128: sub esi,ebx cannam@128: cannam@128: sub ecx,3 cannam@128: mov al, [esi] cannam@128: mov [edi],al cannam@128: mov al, [esi+1] cannam@128: mov dl, [esi+2] cannam@128: add esi,3 cannam@128: mov [edi+1],al cannam@128: mov [edi+2],dl cannam@128: add edi,3 cannam@128: rep movsb cannam@128: cannam@128: mov esi, [esp+44] cannam@128: mov ebx, [esp+8] cannam@128: jmp L_while_test_mmx cannam@128: cannam@128: ALIGN 4 cannam@128: L_check_dist_one_mmx: cannam@128: cmp ebx,1 cannam@128: jne L_check_window_mmx cannam@128: cmp [esp+40],edi cannam@128: je L_check_window_mmx cannam@128: cannam@128: dec edi cannam@128: mov ecx,edx cannam@128: mov al, [edi] cannam@128: sub ecx,3 cannam@128: cannam@128: mov [edi+1],al cannam@128: mov [edi+2],al cannam@128: mov [edi+3],al cannam@128: add edi,4 cannam@128: rep stosb cannam@128: cannam@128: mov ebx, [esp+8] cannam@128: jmp L_while_test_mmx cannam@128: cannam@128: ALIGN 4 cannam@128: L_test_for_second_level_length_mmx: cannam@128: test al,64 cannam@128: jnz L_test_for_end_of_block cannam@128: cannam@128: and eax,15 cannam@128: psrlq mm0,mm1 cannam@128: movd ecx,mm0 cannam@128: and ecx, [inflate_fast_mask+eax*4] cannam@128: add ecx,edx cannam@128: mov eax, [ebx+ecx*4] cannam@128: jmp L_dolen_mmx cannam@128: cannam@128: ALIGN 4 cannam@128: L_test_for_second_level_dist_mmx: cannam@128: test al,64 cannam@128: jnz L_invalid_distance_code cannam@128: cannam@128: and eax,15 cannam@128: psrlq mm0,mm1 cannam@128: movd ecx,mm0 cannam@128: and ecx, [inflate_fast_mask+eax*4] cannam@128: mov eax, [esp+12] cannam@128: add ecx,ebx cannam@128: mov eax, [eax+ecx*4] cannam@128: jmp L_dodist_mmx cannam@128: cannam@128: ALIGN 4 cannam@128: L_clip_window_mmx: cannam@128: cannam@128: mov ecx,eax cannam@128: mov eax, [esp+52] cannam@128: neg ecx cannam@128: mov esi, [esp+56] cannam@128: cannam@128: cmp eax,ebx cannam@128: jb L_invalid_distance_too_far cannam@128: cannam@128: add ecx,ebx cannam@128: cmp dword ptr [esp+48],0 cannam@128: jne L_wrap_around_window_mmx cannam@128: cannam@128: sub eax,ecx cannam@128: add esi,eax cannam@128: cannam@128: cmp edx,ecx cannam@128: jbe L_do_copy1_mmx cannam@128: cannam@128: sub edx,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,ebx cannam@128: jmp L_do_copy1_mmx cannam@128: cannam@128: cmp edx,ecx cannam@128: jbe L_do_copy1_mmx cannam@128: cannam@128: sub edx,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,ebx cannam@128: jmp L_do_copy1_mmx cannam@128: cannam@128: L_wrap_around_window_mmx: cannam@128: cannam@128: mov eax, [esp+48] cannam@128: cmp ecx,eax cannam@128: jbe L_contiguous_in_window_mmx cannam@128: cannam@128: add esi, [esp+52] cannam@128: add esi,eax cannam@128: sub esi,ecx cannam@128: sub ecx,eax cannam@128: cannam@128: cannam@128: cmp edx,ecx cannam@128: jbe L_do_copy1_mmx cannam@128: cannam@128: sub edx,ecx cannam@128: rep movsb cannam@128: mov esi, [esp+56] cannam@128: mov ecx, [esp+48] cannam@128: cmp edx,ecx cannam@128: jbe L_do_copy1_mmx cannam@128: cannam@128: sub edx,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,ebx cannam@128: jmp L_do_copy1_mmx cannam@128: cannam@128: L_contiguous_in_window_mmx: cannam@128: cannam@128: add esi,eax cannam@128: sub esi,ecx cannam@128: cannam@128: cannam@128: cmp edx,ecx cannam@128: jbe L_do_copy1_mmx cannam@128: cannam@128: sub edx,ecx cannam@128: rep movsb cannam@128: mov esi,edi cannam@128: sub esi,ebx cannam@128: cannam@128: L_do_copy1_mmx: cannam@128: cannam@128: cannam@128: mov ecx,edx cannam@128: rep movsb cannam@128: cannam@128: mov esi, [esp+44] cannam@128: mov ebx, [esp+8] cannam@128: jmp L_while_test_mmx cannam@128: ; 1174 "inffast.S" cannam@128: L_invalid_distance_code: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov ecx, invalid_distance_code_msg cannam@128: mov edx,INFLATE_MODE_BAD cannam@128: jmp L_update_stream_state cannam@128: cannam@128: L_test_for_end_of_block: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: test al,32 cannam@128: jz L_invalid_literal_length_code cannam@128: cannam@128: mov ecx,0 cannam@128: mov edx,INFLATE_MODE_TYPE cannam@128: jmp L_update_stream_state cannam@128: cannam@128: L_invalid_literal_length_code: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov ecx, invalid_literal_length_code_msg cannam@128: mov edx,INFLATE_MODE_BAD cannam@128: jmp L_update_stream_state cannam@128: cannam@128: L_invalid_distance_too_far: cannam@128: cannam@128: cannam@128: cannam@128: mov esi, [esp+44] cannam@128: mov ecx, invalid_distance_too_far_msg cannam@128: mov edx,INFLATE_MODE_BAD cannam@128: jmp L_update_stream_state cannam@128: cannam@128: L_update_stream_state: cannam@128: cannam@128: mov eax, [esp+88] cannam@128: test ecx,ecx cannam@128: jz L_skip_msg cannam@128: mov [eax+24],ecx cannam@128: L_skip_msg: cannam@128: mov eax, [eax+28] cannam@128: mov [eax+mode_state],edx cannam@128: jmp L_break_loop cannam@128: cannam@128: ALIGN 4 cannam@128: L_break_loop: cannam@128: ; 1243 "inffast.S" cannam@128: cmp dword ptr [inflate_fast_use_mmx],2 cannam@128: jne L_update_next_in cannam@128: cannam@128: cannam@128: cannam@128: mov ebx,ebp cannam@128: cannam@128: L_update_next_in: cannam@128: ; 1266 "inffast.S" cannam@128: mov eax, [esp+88] cannam@128: mov ecx,ebx cannam@128: mov edx, [eax+28] cannam@128: shr ecx,3 cannam@128: sub esi,ecx cannam@128: shl ecx,3 cannam@128: sub ebx,ecx cannam@128: mov [eax+12],edi cannam@128: mov [edx+bits_state],ebx cannam@128: mov ecx,ebx cannam@128: cannam@128: lea ebx, [esp+28] cannam@128: cmp [esp+20],ebx cannam@128: jne L_buf_not_used cannam@128: cannam@128: sub esi,ebx cannam@128: mov ebx, [eax+0] cannam@128: mov [esp+20],ebx cannam@128: add esi,ebx cannam@128: mov ebx, [eax+4] cannam@128: sub ebx,11 cannam@128: add [esp+20],ebx cannam@128: cannam@128: L_buf_not_used: cannam@128: mov [eax+0],esi cannam@128: cannam@128: mov ebx,1 cannam@128: shl ebx,cl cannam@128: dec ebx cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cmp dword ptr [inflate_fast_use_mmx],2 cannam@128: jne L_update_hold cannam@128: cannam@128: cannam@128: cannam@128: psrlq mm0,mm1 cannam@128: movd ebp,mm0 cannam@128: cannam@128: emms cannam@128: cannam@128: L_update_hold: cannam@128: cannam@128: cannam@128: cannam@128: and ebp,ebx cannam@128: mov [edx+hold_state],ebp cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: mov ebx, [esp+20] cannam@128: cmp ebx,esi cannam@128: jbe L_last_is_smaller cannam@128: cannam@128: sub ebx,esi cannam@128: add ebx,11 cannam@128: mov [eax+4],ebx cannam@128: jmp L_fixup_out cannam@128: L_last_is_smaller: cannam@128: sub esi,ebx cannam@128: neg esi cannam@128: add esi,11 cannam@128: mov [eax+4],esi cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: L_fixup_out: cannam@128: cannam@128: mov ebx, [esp+16] cannam@128: cmp ebx,edi cannam@128: jbe L_end_is_smaller cannam@128: cannam@128: sub ebx,edi cannam@128: add ebx,257 cannam@128: mov [eax+16],ebx cannam@128: jmp L_done cannam@128: L_end_is_smaller: cannam@128: sub edi,ebx cannam@128: neg edi cannam@128: add edi,257 cannam@128: mov [eax+16],edi cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: cannam@128: L_done: cannam@128: add esp,64 cannam@128: popfd cannam@128: pop ebx cannam@128: pop ebp cannam@128: pop esi cannam@128: pop edi cannam@128: ret cannam@128: _inflate_fast endp cannam@128: cannam@128: _TEXT ends cannam@128: end