cannam@89: ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding cannam@89: ; * cannam@89: ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code cannam@89: ; * cannam@89: ; * Copyright (C) 1995-2003 Mark Adler cannam@89: ; * For conditions of distribution and use, see copyright notice in zlib.h cannam@89: ; * cannam@89: ; * Copyright (C) 2003 Chris Anderson cannam@89: ; * Please use the copyright conditions above. cannam@89: ; * cannam@89: ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from cannam@89: ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at cannam@89: ; * the moment. I have successfully compiled and tested this code with gcc2.96, cannam@89: ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S cannam@89: ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX cannam@89: ; * enabled. I will attempt to merge the MMX code into this version. Newer cannam@89: ; * versions of this and inffast.S can be found at cannam@89: ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ cannam@89: ; * cannam@89: ; * 2005 : modification by Gilles Vollant cannam@89: ; */ cannam@89: ; For Visual C++ 4.x and higher and ML 6.x and higher cannam@89: ; ml.exe is in directory \MASM611C of Win95 DDK cannam@89: ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm cannam@89: ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ cannam@89: ; cannam@89: ; cannam@89: ; compile with command line option cannam@89: ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm cannam@89: cannam@89: ; if you define NO_GZIP (see inflate.h), compile with cannam@89: ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm cannam@89: cannam@89: cannam@89: ; zlib122sup is 0 fort zlib 1.2.2.1 and lower cannam@89: ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head cannam@89: ; in inflate_state in inflate.h) cannam@89: zlib1222sup equ 8 cannam@89: cannam@89: cannam@89: IFDEF GUNZIP cannam@89: INFLATE_MODE_TYPE equ 11 cannam@89: INFLATE_MODE_BAD equ 26 cannam@89: ELSE cannam@89: IFNDEF NO_GUNZIP cannam@89: INFLATE_MODE_TYPE equ 11 cannam@89: INFLATE_MODE_BAD equ 26 cannam@89: ELSE cannam@89: INFLATE_MODE_TYPE equ 3 cannam@89: INFLATE_MODE_BAD equ 17 cannam@89: ENDIF cannam@89: ENDIF cannam@89: cannam@89: cannam@89: ; 75 "inffast.S" cannam@89: ;FILE "inffast.S" cannam@89: cannam@89: ;;;GLOBAL _inflate_fast cannam@89: cannam@89: ;;;SECTION .text cannam@89: cannam@89: cannam@89: cannam@89: .586p cannam@89: .mmx cannam@89: cannam@89: name inflate_fast_x86 cannam@89: .MODEL FLAT cannam@89: cannam@89: _DATA segment cannam@89: inflate_fast_use_mmx: cannam@89: dd 1 cannam@89: cannam@89: cannam@89: _TEXT segment cannam@89: cannam@89: cannam@89: cannam@89: ALIGN 4 cannam@89: db 'Fast decoding Code from Chris Anderson' cannam@89: db 0 cannam@89: cannam@89: ALIGN 4 cannam@89: invalid_literal_length_code_msg: cannam@89: db 'invalid literal/length code' cannam@89: db 0 cannam@89: cannam@89: ALIGN 4 cannam@89: invalid_distance_code_msg: cannam@89: db 'invalid distance code' cannam@89: db 0 cannam@89: cannam@89: ALIGN 4 cannam@89: invalid_distance_too_far_msg: cannam@89: db 'invalid distance too far back' cannam@89: db 0 cannam@89: cannam@89: cannam@89: ALIGN 4 cannam@89: inflate_fast_mask: cannam@89: dd 0 cannam@89: dd 1 cannam@89: dd 3 cannam@89: dd 7 cannam@89: dd 15 cannam@89: dd 31 cannam@89: dd 63 cannam@89: dd 127 cannam@89: dd 255 cannam@89: dd 511 cannam@89: dd 1023 cannam@89: dd 2047 cannam@89: dd 4095 cannam@89: dd 8191 cannam@89: dd 16383 cannam@89: dd 32767 cannam@89: dd 65535 cannam@89: dd 131071 cannam@89: dd 262143 cannam@89: dd 524287 cannam@89: dd 1048575 cannam@89: dd 2097151 cannam@89: dd 4194303 cannam@89: dd 8388607 cannam@89: dd 16777215 cannam@89: dd 33554431 cannam@89: dd 67108863 cannam@89: dd 134217727 cannam@89: dd 268435455 cannam@89: dd 536870911 cannam@89: dd 1073741823 cannam@89: dd 2147483647 cannam@89: dd 4294967295 cannam@89: cannam@89: cannam@89: mode_state equ 0 ;/* state->mode */ cannam@89: wsize_state equ (32+zlib1222sup) ;/* state->wsize */ cannam@89: write_state equ (36+4+zlib1222sup) ;/* state->write */ cannam@89: window_state equ (40+4+zlib1222sup) ;/* state->window */ cannam@89: hold_state equ (44+4+zlib1222sup) ;/* state->hold */ cannam@89: bits_state equ (48+4+zlib1222sup) ;/* state->bits */ cannam@89: lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ cannam@89: distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ cannam@89: lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ cannam@89: distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ cannam@89: cannam@89: cannam@89: ;;SECTION .text cannam@89: ; 205 "inffast.S" cannam@89: ;GLOBAL inflate_fast_use_mmx cannam@89: cannam@89: ;SECTION .data cannam@89: cannam@89: cannam@89: ; GLOBAL inflate_fast_use_mmx:object cannam@89: ;.size inflate_fast_use_mmx, 4 cannam@89: ; 226 "inffast.S" cannam@89: ;SECTION .text cannam@89: cannam@89: ALIGN 4 cannam@89: _inflate_fast proc near cannam@89: .FPO (16, 4, 0, 0, 1, 0) cannam@89: push edi cannam@89: push esi cannam@89: push ebp cannam@89: push ebx cannam@89: pushfd cannam@89: sub esp,64 cannam@89: cld cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov esi, [esp+88] cannam@89: mov edi, [esi+28] cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov edx, [esi+4] cannam@89: mov eax, [esi+0] cannam@89: cannam@89: add edx,eax cannam@89: sub edx,11 cannam@89: cannam@89: mov [esp+44],eax cannam@89: mov [esp+20],edx cannam@89: cannam@89: mov ebp, [esp+92] cannam@89: mov ecx, [esi+16] cannam@89: mov ebx, [esi+12] cannam@89: cannam@89: sub ebp,ecx cannam@89: neg ebp cannam@89: add ebp,ebx cannam@89: cannam@89: sub ecx,257 cannam@89: add ecx,ebx cannam@89: cannam@89: mov [esp+60],ebx cannam@89: mov [esp+40],ebp cannam@89: mov [esp+16],ecx cannam@89: ; 285 "inffast.S" cannam@89: mov eax, [edi+lencode_state] cannam@89: mov ecx, [edi+distcode_state] cannam@89: cannam@89: mov [esp+8],eax cannam@89: mov [esp+12],ecx cannam@89: cannam@89: mov eax,1 cannam@89: mov ecx, [edi+lenbits_state] cannam@89: shl eax,cl cannam@89: dec eax cannam@89: mov [esp+0],eax cannam@89: cannam@89: mov eax,1 cannam@89: mov ecx, [edi+distbits_state] cannam@89: shl eax,cl cannam@89: dec eax cannam@89: mov [esp+4],eax cannam@89: cannam@89: mov eax, [edi+wsize_state] cannam@89: mov ecx, [edi+write_state] cannam@89: mov edx, [edi+window_state] cannam@89: cannam@89: mov [esp+52],eax cannam@89: mov [esp+48],ecx cannam@89: mov [esp+56],edx cannam@89: cannam@89: mov ebp, [edi+hold_state] cannam@89: mov ebx, [edi+bits_state] cannam@89: ; 321 "inffast.S" cannam@89: mov esi, [esp+44] cannam@89: mov ecx, [esp+20] cannam@89: cmp ecx,esi cannam@89: ja L_align_long cannam@89: cannam@89: add ecx,11 cannam@89: sub ecx,esi cannam@89: mov eax,12 cannam@89: sub eax,ecx cannam@89: lea edi, [esp+28] cannam@89: rep movsb cannam@89: mov ecx,eax cannam@89: xor eax,eax cannam@89: rep stosb cannam@89: lea esi, [esp+28] cannam@89: mov [esp+20],esi cannam@89: jmp L_is_aligned cannam@89: cannam@89: cannam@89: L_align_long: cannam@89: test esi,3 cannam@89: jz L_is_aligned cannam@89: xor eax,eax cannam@89: mov al, [esi] cannam@89: inc esi cannam@89: mov ecx,ebx cannam@89: add ebx,8 cannam@89: shl eax,cl cannam@89: or ebp,eax cannam@89: jmp L_align_long cannam@89: cannam@89: L_is_aligned: cannam@89: mov edi, [esp+60] cannam@89: ; 366 "inffast.S" cannam@89: L_check_mmx: cannam@89: cmp dword ptr [inflate_fast_use_mmx],2 cannam@89: je L_init_mmx cannam@89: ja L_do_loop cannam@89: cannam@89: push eax cannam@89: push ebx cannam@89: push ecx cannam@89: push edx cannam@89: pushfd cannam@89: mov eax, [esp] cannam@89: xor dword ptr [esp],0200000h cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: popfd cannam@89: pushfd cannam@89: pop edx cannam@89: xor edx,eax cannam@89: jz L_dont_use_mmx cannam@89: xor eax,eax cannam@89: cpuid cannam@89: cmp ebx,0756e6547h cannam@89: jne L_dont_use_mmx cannam@89: cmp ecx,06c65746eh cannam@89: jne L_dont_use_mmx cannam@89: cmp edx,049656e69h cannam@89: jne L_dont_use_mmx cannam@89: mov eax,1 cannam@89: cpuid cannam@89: shr eax,8 cannam@89: and eax,15 cannam@89: cmp eax,6 cannam@89: jne L_dont_use_mmx cannam@89: test edx,0800000h cannam@89: jnz L_use_mmx cannam@89: jmp L_dont_use_mmx cannam@89: L_use_mmx: cannam@89: mov dword ptr [inflate_fast_use_mmx],2 cannam@89: jmp L_check_mmx_pop cannam@89: L_dont_use_mmx: cannam@89: mov dword ptr [inflate_fast_use_mmx],3 cannam@89: L_check_mmx_pop: cannam@89: pop edx cannam@89: pop ecx cannam@89: pop ebx cannam@89: pop eax cannam@89: jmp L_check_mmx cannam@89: ; 426 "inffast.S" cannam@89: ALIGN 4 cannam@89: L_do_loop: cannam@89: ; 437 "inffast.S" cannam@89: cmp bl,15 cannam@89: ja L_get_length_code cannam@89: cannam@89: xor eax,eax cannam@89: lodsw cannam@89: mov cl,bl cannam@89: add bl,16 cannam@89: shl eax,cl cannam@89: or ebp,eax cannam@89: cannam@89: L_get_length_code: cannam@89: mov edx, [esp+0] cannam@89: mov ecx, [esp+8] cannam@89: and edx,ebp cannam@89: mov eax, [ecx+edx*4] cannam@89: cannam@89: L_dolen: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov cl,ah cannam@89: sub bl,ah cannam@89: shr ebp,cl cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: test al,al cannam@89: jnz L_test_for_length_base cannam@89: cannam@89: shr eax,16 cannam@89: stosb cannam@89: cannam@89: L_while_test: cannam@89: cannam@89: cannam@89: cmp [esp+16],edi cannam@89: jbe L_break_loop cannam@89: cannam@89: cmp [esp+20],esi cannam@89: ja L_do_loop cannam@89: jmp L_break_loop cannam@89: cannam@89: L_test_for_length_base: cannam@89: ; 502 "inffast.S" cannam@89: mov edx,eax cannam@89: shr edx,16 cannam@89: mov cl,al cannam@89: cannam@89: test al,16 cannam@89: jz L_test_for_second_level_length cannam@89: and cl,15 cannam@89: jz L_save_len cannam@89: cmp bl,cl cannam@89: jae L_add_bits_to_len cannam@89: cannam@89: mov ch,cl cannam@89: xor eax,eax cannam@89: lodsw cannam@89: mov cl,bl cannam@89: add bl,16 cannam@89: shl eax,cl cannam@89: or ebp,eax cannam@89: mov cl,ch cannam@89: cannam@89: L_add_bits_to_len: cannam@89: mov eax,1 cannam@89: shl eax,cl cannam@89: dec eax cannam@89: sub bl,cl cannam@89: and eax,ebp cannam@89: shr ebp,cl cannam@89: add edx,eax cannam@89: cannam@89: L_save_len: cannam@89: mov [esp+24],edx cannam@89: cannam@89: cannam@89: L_decode_distance: cannam@89: ; 549 "inffast.S" cannam@89: cmp bl,15 cannam@89: ja L_get_distance_code cannam@89: cannam@89: xor eax,eax cannam@89: lodsw cannam@89: mov cl,bl cannam@89: add bl,16 cannam@89: shl eax,cl cannam@89: or ebp,eax cannam@89: cannam@89: L_get_distance_code: cannam@89: mov edx, [esp+4] cannam@89: mov ecx, [esp+12] cannam@89: and edx,ebp cannam@89: mov eax, [ecx+edx*4] cannam@89: cannam@89: cannam@89: L_dodist: cannam@89: mov edx,eax cannam@89: shr edx,16 cannam@89: mov cl,ah cannam@89: sub bl,ah cannam@89: shr ebp,cl cannam@89: ; 584 "inffast.S" cannam@89: mov cl,al cannam@89: cannam@89: test al,16 cannam@89: jz L_test_for_second_level_dist cannam@89: and cl,15 cannam@89: jz L_check_dist_one cannam@89: cmp bl,cl cannam@89: jae L_add_bits_to_dist cannam@89: cannam@89: mov ch,cl cannam@89: xor eax,eax cannam@89: lodsw cannam@89: mov cl,bl cannam@89: add bl,16 cannam@89: shl eax,cl cannam@89: or ebp,eax cannam@89: mov cl,ch cannam@89: cannam@89: L_add_bits_to_dist: cannam@89: mov eax,1 cannam@89: shl eax,cl cannam@89: dec eax cannam@89: sub bl,cl cannam@89: and eax,ebp cannam@89: shr ebp,cl cannam@89: add edx,eax cannam@89: jmp L_check_window cannam@89: cannam@89: L_check_window: cannam@89: ; 625 "inffast.S" cannam@89: mov [esp+44],esi cannam@89: mov eax,edi cannam@89: sub eax, [esp+40] cannam@89: cannam@89: cmp eax,edx cannam@89: jb L_clip_window cannam@89: cannam@89: mov ecx, [esp+24] cannam@89: mov esi,edi cannam@89: sub esi,edx cannam@89: cannam@89: sub ecx,3 cannam@89: mov al, [esi] cannam@89: mov [edi],al cannam@89: mov al, [esi+1] cannam@89: mov dl, [esi+2] cannam@89: add esi,3 cannam@89: mov [edi+1],al cannam@89: mov [edi+2],dl cannam@89: add edi,3 cannam@89: rep movsb cannam@89: cannam@89: mov esi, [esp+44] cannam@89: jmp L_while_test cannam@89: cannam@89: ALIGN 4 cannam@89: L_check_dist_one: cannam@89: cmp edx,1 cannam@89: jne L_check_window cannam@89: cmp [esp+40],edi cannam@89: je L_check_window cannam@89: cannam@89: dec edi cannam@89: mov ecx, [esp+24] cannam@89: mov al, [edi] cannam@89: sub ecx,3 cannam@89: cannam@89: mov [edi+1],al cannam@89: mov [edi+2],al cannam@89: mov [edi+3],al cannam@89: add edi,4 cannam@89: rep stosb cannam@89: cannam@89: jmp L_while_test cannam@89: cannam@89: ALIGN 4 cannam@89: L_test_for_second_level_length: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: test al,64 cannam@89: jnz L_test_for_end_of_block cannam@89: cannam@89: mov eax,1 cannam@89: shl eax,cl cannam@89: dec eax cannam@89: and eax,ebp cannam@89: add eax,edx cannam@89: mov edx, [esp+8] cannam@89: mov eax, [edx+eax*4] cannam@89: jmp L_dolen cannam@89: cannam@89: ALIGN 4 cannam@89: L_test_for_second_level_dist: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: test al,64 cannam@89: jnz L_invalid_distance_code cannam@89: cannam@89: mov eax,1 cannam@89: shl eax,cl cannam@89: dec eax cannam@89: and eax,ebp cannam@89: add eax,edx cannam@89: mov edx, [esp+12] cannam@89: mov eax, [edx+eax*4] cannam@89: jmp L_dodist cannam@89: cannam@89: ALIGN 4 cannam@89: L_clip_window: cannam@89: ; 721 "inffast.S" cannam@89: mov ecx,eax cannam@89: mov eax, [esp+52] cannam@89: neg ecx cannam@89: mov esi, [esp+56] cannam@89: cannam@89: cmp eax,edx cannam@89: jb L_invalid_distance_too_far cannam@89: cannam@89: add ecx,edx cannam@89: cmp dword ptr [esp+48],0 cannam@89: jne L_wrap_around_window cannam@89: cannam@89: sub eax,ecx cannam@89: add esi,eax cannam@89: ; 749 "inffast.S" cannam@89: mov eax, [esp+24] cannam@89: cmp eax,ecx cannam@89: jbe L_do_copy1 cannam@89: cannam@89: sub eax,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,edx cannam@89: jmp L_do_copy1 cannam@89: cannam@89: cmp eax,ecx cannam@89: jbe L_do_copy1 cannam@89: cannam@89: sub eax,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,edx cannam@89: jmp L_do_copy1 cannam@89: cannam@89: L_wrap_around_window: cannam@89: ; 793 "inffast.S" cannam@89: mov eax, [esp+48] cannam@89: cmp ecx,eax cannam@89: jbe L_contiguous_in_window cannam@89: cannam@89: add esi, [esp+52] cannam@89: add esi,eax cannam@89: sub esi,ecx cannam@89: sub ecx,eax cannam@89: cannam@89: cannam@89: mov eax, [esp+24] cannam@89: cmp eax,ecx cannam@89: jbe L_do_copy1 cannam@89: cannam@89: sub eax,ecx cannam@89: rep movsb cannam@89: mov esi, [esp+56] cannam@89: mov ecx, [esp+48] cannam@89: cmp eax,ecx cannam@89: jbe L_do_copy1 cannam@89: cannam@89: sub eax,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,edx cannam@89: jmp L_do_copy1 cannam@89: cannam@89: L_contiguous_in_window: cannam@89: ; 836 "inffast.S" cannam@89: add esi,eax cannam@89: sub esi,ecx cannam@89: cannam@89: cannam@89: mov eax, [esp+24] cannam@89: cmp eax,ecx cannam@89: jbe L_do_copy1 cannam@89: cannam@89: sub eax,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,edx cannam@89: cannam@89: L_do_copy1: cannam@89: ; 862 "inffast.S" cannam@89: mov ecx,eax cannam@89: rep movsb cannam@89: cannam@89: mov esi, [esp+44] cannam@89: jmp L_while_test cannam@89: ; 878 "inffast.S" cannam@89: ALIGN 4 cannam@89: L_init_mmx: cannam@89: emms cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: movd mm0,ebp cannam@89: mov ebp,ebx cannam@89: ; 896 "inffast.S" cannam@89: movd mm4,dword ptr [esp+0] cannam@89: movq mm3,mm4 cannam@89: movd mm5,dword ptr [esp+4] cannam@89: movq mm2,mm5 cannam@89: pxor mm1,mm1 cannam@89: mov ebx, [esp+8] cannam@89: jmp L_do_loop_mmx cannam@89: cannam@89: ALIGN 4 cannam@89: L_do_loop_mmx: cannam@89: psrlq mm0,mm1 cannam@89: cannam@89: cmp ebp,32 cannam@89: ja L_get_length_code_mmx cannam@89: cannam@89: movd mm6,ebp cannam@89: movd mm7,dword ptr [esi] cannam@89: add esi,4 cannam@89: psllq mm7,mm6 cannam@89: add ebp,32 cannam@89: por mm0,mm7 cannam@89: cannam@89: L_get_length_code_mmx: cannam@89: pand mm4,mm0 cannam@89: movd eax,mm4 cannam@89: movq mm4,mm3 cannam@89: mov eax, [ebx+eax*4] cannam@89: cannam@89: L_dolen_mmx: cannam@89: movzx ecx,ah cannam@89: movd mm1,ecx cannam@89: sub ebp,ecx cannam@89: cannam@89: test al,al cannam@89: jnz L_test_for_length_base_mmx cannam@89: cannam@89: shr eax,16 cannam@89: stosb cannam@89: cannam@89: L_while_test_mmx: cannam@89: cannam@89: cannam@89: cmp [esp+16],edi cannam@89: jbe L_break_loop cannam@89: cannam@89: cmp [esp+20],esi cannam@89: ja L_do_loop_mmx cannam@89: jmp L_break_loop cannam@89: cannam@89: L_test_for_length_base_mmx: cannam@89: cannam@89: mov edx,eax cannam@89: shr edx,16 cannam@89: cannam@89: test al,16 cannam@89: jz L_test_for_second_level_length_mmx cannam@89: and eax,15 cannam@89: jz L_decode_distance_mmx cannam@89: cannam@89: psrlq mm0,mm1 cannam@89: movd mm1,eax cannam@89: movd ecx,mm0 cannam@89: sub ebp,eax cannam@89: and ecx, [inflate_fast_mask+eax*4] cannam@89: add edx,ecx cannam@89: cannam@89: L_decode_distance_mmx: cannam@89: psrlq mm0,mm1 cannam@89: cannam@89: cmp ebp,32 cannam@89: ja L_get_dist_code_mmx cannam@89: cannam@89: movd mm6,ebp cannam@89: movd mm7,dword ptr [esi] cannam@89: add esi,4 cannam@89: psllq mm7,mm6 cannam@89: add ebp,32 cannam@89: por mm0,mm7 cannam@89: cannam@89: L_get_dist_code_mmx: cannam@89: mov ebx, [esp+12] cannam@89: pand mm5,mm0 cannam@89: movd eax,mm5 cannam@89: movq mm5,mm2 cannam@89: mov eax, [ebx+eax*4] cannam@89: cannam@89: L_dodist_mmx: cannam@89: cannam@89: movzx ecx,ah cannam@89: mov ebx,eax cannam@89: shr ebx,16 cannam@89: sub ebp,ecx cannam@89: movd mm1,ecx cannam@89: cannam@89: test al,16 cannam@89: jz L_test_for_second_level_dist_mmx cannam@89: and eax,15 cannam@89: jz L_check_dist_one_mmx cannam@89: cannam@89: L_add_bits_to_dist_mmx: cannam@89: psrlq mm0,mm1 cannam@89: movd mm1,eax cannam@89: movd ecx,mm0 cannam@89: sub ebp,eax cannam@89: and ecx, [inflate_fast_mask+eax*4] cannam@89: add ebx,ecx cannam@89: cannam@89: L_check_window_mmx: cannam@89: mov [esp+44],esi cannam@89: mov eax,edi cannam@89: sub eax, [esp+40] cannam@89: cannam@89: cmp eax,ebx cannam@89: jb L_clip_window_mmx cannam@89: cannam@89: mov ecx,edx cannam@89: mov esi,edi cannam@89: sub esi,ebx cannam@89: cannam@89: sub ecx,3 cannam@89: mov al, [esi] cannam@89: mov [edi],al cannam@89: mov al, [esi+1] cannam@89: mov dl, [esi+2] cannam@89: add esi,3 cannam@89: mov [edi+1],al cannam@89: mov [edi+2],dl cannam@89: add edi,3 cannam@89: rep movsb cannam@89: cannam@89: mov esi, [esp+44] cannam@89: mov ebx, [esp+8] cannam@89: jmp L_while_test_mmx cannam@89: cannam@89: ALIGN 4 cannam@89: L_check_dist_one_mmx: cannam@89: cmp ebx,1 cannam@89: jne L_check_window_mmx cannam@89: cmp [esp+40],edi cannam@89: je L_check_window_mmx cannam@89: cannam@89: dec edi cannam@89: mov ecx,edx cannam@89: mov al, [edi] cannam@89: sub ecx,3 cannam@89: cannam@89: mov [edi+1],al cannam@89: mov [edi+2],al cannam@89: mov [edi+3],al cannam@89: add edi,4 cannam@89: rep stosb cannam@89: cannam@89: mov ebx, [esp+8] cannam@89: jmp L_while_test_mmx cannam@89: cannam@89: ALIGN 4 cannam@89: L_test_for_second_level_length_mmx: cannam@89: test al,64 cannam@89: jnz L_test_for_end_of_block cannam@89: cannam@89: and eax,15 cannam@89: psrlq mm0,mm1 cannam@89: movd ecx,mm0 cannam@89: and ecx, [inflate_fast_mask+eax*4] cannam@89: add ecx,edx cannam@89: mov eax, [ebx+ecx*4] cannam@89: jmp L_dolen_mmx cannam@89: cannam@89: ALIGN 4 cannam@89: L_test_for_second_level_dist_mmx: cannam@89: test al,64 cannam@89: jnz L_invalid_distance_code cannam@89: cannam@89: and eax,15 cannam@89: psrlq mm0,mm1 cannam@89: movd ecx,mm0 cannam@89: and ecx, [inflate_fast_mask+eax*4] cannam@89: mov eax, [esp+12] cannam@89: add ecx,ebx cannam@89: mov eax, [eax+ecx*4] cannam@89: jmp L_dodist_mmx cannam@89: cannam@89: ALIGN 4 cannam@89: L_clip_window_mmx: cannam@89: cannam@89: mov ecx,eax cannam@89: mov eax, [esp+52] cannam@89: neg ecx cannam@89: mov esi, [esp+56] cannam@89: cannam@89: cmp eax,ebx cannam@89: jb L_invalid_distance_too_far cannam@89: cannam@89: add ecx,ebx cannam@89: cmp dword ptr [esp+48],0 cannam@89: jne L_wrap_around_window_mmx cannam@89: cannam@89: sub eax,ecx cannam@89: add esi,eax cannam@89: cannam@89: cmp edx,ecx cannam@89: jbe L_do_copy1_mmx cannam@89: cannam@89: sub edx,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,ebx cannam@89: jmp L_do_copy1_mmx cannam@89: cannam@89: cmp edx,ecx cannam@89: jbe L_do_copy1_mmx cannam@89: cannam@89: sub edx,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,ebx cannam@89: jmp L_do_copy1_mmx cannam@89: cannam@89: L_wrap_around_window_mmx: cannam@89: cannam@89: mov eax, [esp+48] cannam@89: cmp ecx,eax cannam@89: jbe L_contiguous_in_window_mmx cannam@89: cannam@89: add esi, [esp+52] cannam@89: add esi,eax cannam@89: sub esi,ecx cannam@89: sub ecx,eax cannam@89: cannam@89: cannam@89: cmp edx,ecx cannam@89: jbe L_do_copy1_mmx cannam@89: cannam@89: sub edx,ecx cannam@89: rep movsb cannam@89: mov esi, [esp+56] cannam@89: mov ecx, [esp+48] cannam@89: cmp edx,ecx cannam@89: jbe L_do_copy1_mmx cannam@89: cannam@89: sub edx,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,ebx cannam@89: jmp L_do_copy1_mmx cannam@89: cannam@89: L_contiguous_in_window_mmx: cannam@89: cannam@89: add esi,eax cannam@89: sub esi,ecx cannam@89: cannam@89: cannam@89: cmp edx,ecx cannam@89: jbe L_do_copy1_mmx cannam@89: cannam@89: sub edx,ecx cannam@89: rep movsb cannam@89: mov esi,edi cannam@89: sub esi,ebx cannam@89: cannam@89: L_do_copy1_mmx: cannam@89: cannam@89: cannam@89: mov ecx,edx cannam@89: rep movsb cannam@89: cannam@89: mov esi, [esp+44] cannam@89: mov ebx, [esp+8] cannam@89: jmp L_while_test_mmx cannam@89: ; 1174 "inffast.S" cannam@89: L_invalid_distance_code: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov ecx, invalid_distance_code_msg cannam@89: mov edx,INFLATE_MODE_BAD cannam@89: jmp L_update_stream_state cannam@89: cannam@89: L_test_for_end_of_block: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: test al,32 cannam@89: jz L_invalid_literal_length_code cannam@89: cannam@89: mov ecx,0 cannam@89: mov edx,INFLATE_MODE_TYPE cannam@89: jmp L_update_stream_state cannam@89: cannam@89: L_invalid_literal_length_code: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov ecx, invalid_literal_length_code_msg cannam@89: mov edx,INFLATE_MODE_BAD cannam@89: jmp L_update_stream_state cannam@89: cannam@89: L_invalid_distance_too_far: cannam@89: cannam@89: cannam@89: cannam@89: mov esi, [esp+44] cannam@89: mov ecx, invalid_distance_too_far_msg cannam@89: mov edx,INFLATE_MODE_BAD cannam@89: jmp L_update_stream_state cannam@89: cannam@89: L_update_stream_state: cannam@89: cannam@89: mov eax, [esp+88] cannam@89: test ecx,ecx cannam@89: jz L_skip_msg cannam@89: mov [eax+24],ecx cannam@89: L_skip_msg: cannam@89: mov eax, [eax+28] cannam@89: mov [eax+mode_state],edx cannam@89: jmp L_break_loop cannam@89: cannam@89: ALIGN 4 cannam@89: L_break_loop: cannam@89: ; 1243 "inffast.S" cannam@89: cmp dword ptr [inflate_fast_use_mmx],2 cannam@89: jne L_update_next_in cannam@89: cannam@89: cannam@89: cannam@89: mov ebx,ebp cannam@89: cannam@89: L_update_next_in: cannam@89: ; 1266 "inffast.S" cannam@89: mov eax, [esp+88] cannam@89: mov ecx,ebx cannam@89: mov edx, [eax+28] cannam@89: shr ecx,3 cannam@89: sub esi,ecx cannam@89: shl ecx,3 cannam@89: sub ebx,ecx cannam@89: mov [eax+12],edi cannam@89: mov [edx+bits_state],ebx cannam@89: mov ecx,ebx cannam@89: cannam@89: lea ebx, [esp+28] cannam@89: cmp [esp+20],ebx cannam@89: jne L_buf_not_used cannam@89: cannam@89: sub esi,ebx cannam@89: mov ebx, [eax+0] cannam@89: mov [esp+20],ebx cannam@89: add esi,ebx cannam@89: mov ebx, [eax+4] cannam@89: sub ebx,11 cannam@89: add [esp+20],ebx cannam@89: cannam@89: L_buf_not_used: cannam@89: mov [eax+0],esi cannam@89: cannam@89: mov ebx,1 cannam@89: shl ebx,cl cannam@89: dec ebx cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cmp dword ptr [inflate_fast_use_mmx],2 cannam@89: jne L_update_hold cannam@89: cannam@89: cannam@89: cannam@89: psrlq mm0,mm1 cannam@89: movd ebp,mm0 cannam@89: cannam@89: emms cannam@89: cannam@89: L_update_hold: cannam@89: cannam@89: cannam@89: cannam@89: and ebp,ebx cannam@89: mov [edx+hold_state],ebp cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: mov ebx, [esp+20] cannam@89: cmp ebx,esi cannam@89: jbe L_last_is_smaller cannam@89: cannam@89: sub ebx,esi cannam@89: add ebx,11 cannam@89: mov [eax+4],ebx cannam@89: jmp L_fixup_out cannam@89: L_last_is_smaller: cannam@89: sub esi,ebx cannam@89: neg esi cannam@89: add esi,11 cannam@89: mov [eax+4],esi cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: L_fixup_out: cannam@89: cannam@89: mov ebx, [esp+16] cannam@89: cmp ebx,edi cannam@89: jbe L_end_is_smaller cannam@89: cannam@89: sub ebx,edi cannam@89: add ebx,257 cannam@89: mov [eax+16],ebx cannam@89: jmp L_done cannam@89: L_end_is_smaller: cannam@89: sub edi,ebx cannam@89: neg edi cannam@89: add edi,257 cannam@89: mov [eax+16],edi cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: cannam@89: L_done: cannam@89: add esp,64 cannam@89: popfd cannam@89: pop ebx cannam@89: pop ebp cannam@89: pop esi cannam@89: pop edi cannam@89: ret cannam@89: _inflate_fast endp cannam@89: cannam@89: _TEXT ends cannam@89: end