Chris@43: ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding Chris@43: ; * Chris@43: ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code Chris@43: ; * Chris@43: ; * Copyright (C) 1995-2003 Mark Adler Chris@43: ; * For conditions of distribution and use, see copyright notice in zlib.h Chris@43: ; * Chris@43: ; * Copyright (C) 2003 Chris Anderson Chris@43: ; * Please use the copyright conditions above. Chris@43: ; * Chris@43: ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from Chris@43: ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at Chris@43: ; * the moment. I have successfully compiled and tested this code with gcc2.96, Chris@43: ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S Chris@43: ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX Chris@43: ; * enabled. I will attempt to merge the MMX code into this version. Newer Chris@43: ; * versions of this and inffast.S can be found at Chris@43: ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ Chris@43: ; * Chris@43: ; * 2005 : modification by Gilles Vollant Chris@43: ; */ Chris@43: ; For Visual C++ 4.x and higher and ML 6.x and higher Chris@43: ; ml.exe is in directory \MASM611C of Win95 DDK Chris@43: ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm Chris@43: ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ Chris@43: ; Chris@43: ; Chris@43: ; compile with command line option Chris@43: ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm Chris@43: Chris@43: ; if you define NO_GZIP (see inflate.h), compile with Chris@43: ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm Chris@43: Chris@43: Chris@43: ; zlib122sup is 0 fort zlib 1.2.2.1 and lower Chris@43: ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head Chris@43: ; in inflate_state in inflate.h) Chris@43: zlib1222sup equ 8 Chris@43: Chris@43: Chris@43: IFDEF GUNZIP Chris@43: INFLATE_MODE_TYPE equ 11 Chris@43: INFLATE_MODE_BAD equ 26 Chris@43: ELSE Chris@43: IFNDEF NO_GUNZIP Chris@43: INFLATE_MODE_TYPE equ 11 Chris@43: INFLATE_MODE_BAD equ 26 Chris@43: ELSE Chris@43: INFLATE_MODE_TYPE equ 3 Chris@43: INFLATE_MODE_BAD equ 17 Chris@43: ENDIF Chris@43: ENDIF Chris@43: Chris@43: Chris@43: ; 75 "inffast.S" Chris@43: ;FILE "inffast.S" Chris@43: Chris@43: ;;;GLOBAL _inflate_fast Chris@43: Chris@43: ;;;SECTION .text Chris@43: Chris@43: Chris@43: Chris@43: .586p Chris@43: .mmx Chris@43: Chris@43: name inflate_fast_x86 Chris@43: .MODEL FLAT Chris@43: Chris@43: _DATA segment Chris@43: inflate_fast_use_mmx: Chris@43: dd 1 Chris@43: Chris@43: Chris@43: _TEXT segment Chris@43: Chris@43: Chris@43: Chris@43: ALIGN 4 Chris@43: db 'Fast decoding Code from Chris Anderson' Chris@43: db 0 Chris@43: Chris@43: ALIGN 4 Chris@43: invalid_literal_length_code_msg: Chris@43: db 'invalid literal/length code' Chris@43: db 0 Chris@43: Chris@43: ALIGN 4 Chris@43: invalid_distance_code_msg: Chris@43: db 'invalid distance code' Chris@43: db 0 Chris@43: Chris@43: ALIGN 4 Chris@43: invalid_distance_too_far_msg: Chris@43: db 'invalid distance too far back' Chris@43: db 0 Chris@43: Chris@43: Chris@43: ALIGN 4 Chris@43: inflate_fast_mask: Chris@43: dd 0 Chris@43: dd 1 Chris@43: dd 3 Chris@43: dd 7 Chris@43: dd 15 Chris@43: dd 31 Chris@43: dd 63 Chris@43: dd 127 Chris@43: dd 255 Chris@43: dd 511 Chris@43: dd 1023 Chris@43: dd 2047 Chris@43: dd 4095 Chris@43: dd 8191 Chris@43: dd 16383 Chris@43: dd 32767 Chris@43: dd 65535 Chris@43: dd 131071 Chris@43: dd 262143 Chris@43: dd 524287 Chris@43: dd 1048575 Chris@43: dd 2097151 Chris@43: dd 4194303 Chris@43: dd 8388607 Chris@43: dd 16777215 Chris@43: dd 33554431 Chris@43: dd 67108863 Chris@43: dd 134217727 Chris@43: dd 268435455 Chris@43: dd 536870911 Chris@43: dd 1073741823 Chris@43: dd 2147483647 Chris@43: dd 4294967295 Chris@43: Chris@43: Chris@43: mode_state equ 0 ;/* state->mode */ Chris@43: wsize_state equ (32+zlib1222sup) ;/* state->wsize */ Chris@43: write_state equ (36+4+zlib1222sup) ;/* state->write */ Chris@43: window_state equ (40+4+zlib1222sup) ;/* state->window */ Chris@43: hold_state equ (44+4+zlib1222sup) ;/* state->hold */ Chris@43: bits_state equ (48+4+zlib1222sup) ;/* state->bits */ Chris@43: lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ Chris@43: distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ Chris@43: lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ Chris@43: distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ Chris@43: Chris@43: Chris@43: ;;SECTION .text Chris@43: ; 205 "inffast.S" Chris@43: ;GLOBAL inflate_fast_use_mmx Chris@43: Chris@43: ;SECTION .data Chris@43: Chris@43: Chris@43: ; GLOBAL inflate_fast_use_mmx:object Chris@43: ;.size inflate_fast_use_mmx, 4 Chris@43: ; 226 "inffast.S" Chris@43: ;SECTION .text Chris@43: Chris@43: ALIGN 4 Chris@43: _inflate_fast proc near Chris@43: .FPO (16, 4, 0, 0, 1, 0) Chris@43: push edi Chris@43: push esi Chris@43: push ebp Chris@43: push ebx Chris@43: pushfd Chris@43: sub esp,64 Chris@43: cld Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov esi, [esp+88] Chris@43: mov edi, [esi+28] Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov edx, [esi+4] Chris@43: mov eax, [esi+0] Chris@43: Chris@43: add edx,eax Chris@43: sub edx,11 Chris@43: Chris@43: mov [esp+44],eax Chris@43: mov [esp+20],edx Chris@43: Chris@43: mov ebp, [esp+92] Chris@43: mov ecx, [esi+16] Chris@43: mov ebx, [esi+12] Chris@43: Chris@43: sub ebp,ecx Chris@43: neg ebp Chris@43: add ebp,ebx Chris@43: Chris@43: sub ecx,257 Chris@43: add ecx,ebx Chris@43: Chris@43: mov [esp+60],ebx Chris@43: mov [esp+40],ebp Chris@43: mov [esp+16],ecx Chris@43: ; 285 "inffast.S" Chris@43: mov eax, [edi+lencode_state] Chris@43: mov ecx, [edi+distcode_state] Chris@43: Chris@43: mov [esp+8],eax Chris@43: mov [esp+12],ecx Chris@43: Chris@43: mov eax,1 Chris@43: mov ecx, [edi+lenbits_state] Chris@43: shl eax,cl Chris@43: dec eax Chris@43: mov [esp+0],eax Chris@43: Chris@43: mov eax,1 Chris@43: mov ecx, [edi+distbits_state] Chris@43: shl eax,cl Chris@43: dec eax Chris@43: mov [esp+4],eax Chris@43: Chris@43: mov eax, [edi+wsize_state] Chris@43: mov ecx, [edi+write_state] Chris@43: mov edx, [edi+window_state] Chris@43: Chris@43: mov [esp+52],eax Chris@43: mov [esp+48],ecx Chris@43: mov [esp+56],edx Chris@43: Chris@43: mov ebp, [edi+hold_state] Chris@43: mov ebx, [edi+bits_state] Chris@43: ; 321 "inffast.S" Chris@43: mov esi, [esp+44] Chris@43: mov ecx, [esp+20] Chris@43: cmp ecx,esi Chris@43: ja L_align_long Chris@43: Chris@43: add ecx,11 Chris@43: sub ecx,esi Chris@43: mov eax,12 Chris@43: sub eax,ecx Chris@43: lea edi, [esp+28] Chris@43: rep movsb Chris@43: mov ecx,eax Chris@43: xor eax,eax Chris@43: rep stosb Chris@43: lea esi, [esp+28] Chris@43: mov [esp+20],esi Chris@43: jmp L_is_aligned Chris@43: Chris@43: Chris@43: L_align_long: Chris@43: test esi,3 Chris@43: jz L_is_aligned Chris@43: xor eax,eax Chris@43: mov al, [esi] Chris@43: inc esi Chris@43: mov ecx,ebx Chris@43: add ebx,8 Chris@43: shl eax,cl Chris@43: or ebp,eax Chris@43: jmp L_align_long Chris@43: Chris@43: L_is_aligned: Chris@43: mov edi, [esp+60] Chris@43: ; 366 "inffast.S" Chris@43: L_check_mmx: Chris@43: cmp dword ptr [inflate_fast_use_mmx],2 Chris@43: je L_init_mmx Chris@43: ja L_do_loop Chris@43: Chris@43: push eax Chris@43: push ebx Chris@43: push ecx Chris@43: push edx Chris@43: pushfd Chris@43: mov eax, [esp] Chris@43: xor dword ptr [esp],0200000h Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: popfd Chris@43: pushfd Chris@43: pop edx Chris@43: xor edx,eax Chris@43: jz L_dont_use_mmx Chris@43: xor eax,eax Chris@43: cpuid Chris@43: cmp ebx,0756e6547h Chris@43: jne L_dont_use_mmx Chris@43: cmp ecx,06c65746eh Chris@43: jne L_dont_use_mmx Chris@43: cmp edx,049656e69h Chris@43: jne L_dont_use_mmx Chris@43: mov eax,1 Chris@43: cpuid Chris@43: shr eax,8 Chris@43: and eax,15 Chris@43: cmp eax,6 Chris@43: jne L_dont_use_mmx Chris@43: test edx,0800000h Chris@43: jnz L_use_mmx Chris@43: jmp L_dont_use_mmx Chris@43: L_use_mmx: Chris@43: mov dword ptr [inflate_fast_use_mmx],2 Chris@43: jmp L_check_mmx_pop Chris@43: L_dont_use_mmx: Chris@43: mov dword ptr [inflate_fast_use_mmx],3 Chris@43: L_check_mmx_pop: Chris@43: pop edx Chris@43: pop ecx Chris@43: pop ebx Chris@43: pop eax Chris@43: jmp L_check_mmx Chris@43: ; 426 "inffast.S" Chris@43: ALIGN 4 Chris@43: L_do_loop: Chris@43: ; 437 "inffast.S" Chris@43: cmp bl,15 Chris@43: ja L_get_length_code Chris@43: Chris@43: xor eax,eax Chris@43: lodsw Chris@43: mov cl,bl Chris@43: add bl,16 Chris@43: shl eax,cl Chris@43: or ebp,eax Chris@43: Chris@43: L_get_length_code: Chris@43: mov edx, [esp+0] Chris@43: mov ecx, [esp+8] Chris@43: and edx,ebp Chris@43: mov eax, [ecx+edx*4] Chris@43: Chris@43: L_dolen: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov cl,ah Chris@43: sub bl,ah Chris@43: shr ebp,cl Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: test al,al Chris@43: jnz L_test_for_length_base Chris@43: Chris@43: shr eax,16 Chris@43: stosb Chris@43: Chris@43: L_while_test: Chris@43: Chris@43: Chris@43: cmp [esp+16],edi Chris@43: jbe L_break_loop Chris@43: Chris@43: cmp [esp+20],esi Chris@43: ja L_do_loop Chris@43: jmp L_break_loop Chris@43: Chris@43: L_test_for_length_base: Chris@43: ; 502 "inffast.S" Chris@43: mov edx,eax Chris@43: shr edx,16 Chris@43: mov cl,al Chris@43: Chris@43: test al,16 Chris@43: jz L_test_for_second_level_length Chris@43: and cl,15 Chris@43: jz L_save_len Chris@43: cmp bl,cl Chris@43: jae L_add_bits_to_len Chris@43: Chris@43: mov ch,cl Chris@43: xor eax,eax Chris@43: lodsw Chris@43: mov cl,bl Chris@43: add bl,16 Chris@43: shl eax,cl Chris@43: or ebp,eax Chris@43: mov cl,ch Chris@43: Chris@43: L_add_bits_to_len: Chris@43: mov eax,1 Chris@43: shl eax,cl Chris@43: dec eax Chris@43: sub bl,cl Chris@43: and eax,ebp Chris@43: shr ebp,cl Chris@43: add edx,eax Chris@43: Chris@43: L_save_len: Chris@43: mov [esp+24],edx Chris@43: Chris@43: Chris@43: L_decode_distance: Chris@43: ; 549 "inffast.S" Chris@43: cmp bl,15 Chris@43: ja L_get_distance_code Chris@43: Chris@43: xor eax,eax Chris@43: lodsw Chris@43: mov cl,bl Chris@43: add bl,16 Chris@43: shl eax,cl Chris@43: or ebp,eax Chris@43: Chris@43: L_get_distance_code: Chris@43: mov edx, [esp+4] Chris@43: mov ecx, [esp+12] Chris@43: and edx,ebp Chris@43: mov eax, [ecx+edx*4] Chris@43: Chris@43: Chris@43: L_dodist: Chris@43: mov edx,eax Chris@43: shr edx,16 Chris@43: mov cl,ah Chris@43: sub bl,ah Chris@43: shr ebp,cl Chris@43: ; 584 "inffast.S" Chris@43: mov cl,al Chris@43: Chris@43: test al,16 Chris@43: jz L_test_for_second_level_dist Chris@43: and cl,15 Chris@43: jz L_check_dist_one Chris@43: cmp bl,cl Chris@43: jae L_add_bits_to_dist Chris@43: Chris@43: mov ch,cl Chris@43: xor eax,eax Chris@43: lodsw Chris@43: mov cl,bl Chris@43: add bl,16 Chris@43: shl eax,cl Chris@43: or ebp,eax Chris@43: mov cl,ch Chris@43: Chris@43: L_add_bits_to_dist: Chris@43: mov eax,1 Chris@43: shl eax,cl Chris@43: dec eax Chris@43: sub bl,cl Chris@43: and eax,ebp Chris@43: shr ebp,cl Chris@43: add edx,eax Chris@43: jmp L_check_window Chris@43: Chris@43: L_check_window: Chris@43: ; 625 "inffast.S" Chris@43: mov [esp+44],esi Chris@43: mov eax,edi Chris@43: sub eax, [esp+40] Chris@43: Chris@43: cmp eax,edx Chris@43: jb L_clip_window Chris@43: Chris@43: mov ecx, [esp+24] Chris@43: mov esi,edi Chris@43: sub esi,edx Chris@43: Chris@43: sub ecx,3 Chris@43: mov al, [esi] Chris@43: mov [edi],al Chris@43: mov al, [esi+1] Chris@43: mov dl, [esi+2] Chris@43: add esi,3 Chris@43: mov [edi+1],al Chris@43: mov [edi+2],dl Chris@43: add edi,3 Chris@43: rep movsb Chris@43: Chris@43: mov esi, [esp+44] Chris@43: jmp L_while_test Chris@43: Chris@43: ALIGN 4 Chris@43: L_check_dist_one: Chris@43: cmp edx,1 Chris@43: jne L_check_window Chris@43: cmp [esp+40],edi Chris@43: je L_check_window Chris@43: Chris@43: dec edi Chris@43: mov ecx, [esp+24] Chris@43: mov al, [edi] Chris@43: sub ecx,3 Chris@43: Chris@43: mov [edi+1],al Chris@43: mov [edi+2],al Chris@43: mov [edi+3],al Chris@43: add edi,4 Chris@43: rep stosb Chris@43: Chris@43: jmp L_while_test Chris@43: Chris@43: ALIGN 4 Chris@43: L_test_for_second_level_length: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: test al,64 Chris@43: jnz L_test_for_end_of_block Chris@43: Chris@43: mov eax,1 Chris@43: shl eax,cl Chris@43: dec eax Chris@43: and eax,ebp Chris@43: add eax,edx Chris@43: mov edx, [esp+8] Chris@43: mov eax, [edx+eax*4] Chris@43: jmp L_dolen Chris@43: Chris@43: ALIGN 4 Chris@43: L_test_for_second_level_dist: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: test al,64 Chris@43: jnz L_invalid_distance_code Chris@43: Chris@43: mov eax,1 Chris@43: shl eax,cl Chris@43: dec eax Chris@43: and eax,ebp Chris@43: add eax,edx Chris@43: mov edx, [esp+12] Chris@43: mov eax, [edx+eax*4] Chris@43: jmp L_dodist Chris@43: Chris@43: ALIGN 4 Chris@43: L_clip_window: Chris@43: ; 721 "inffast.S" Chris@43: mov ecx,eax Chris@43: mov eax, [esp+52] Chris@43: neg ecx Chris@43: mov esi, [esp+56] Chris@43: Chris@43: cmp eax,edx Chris@43: jb L_invalid_distance_too_far Chris@43: Chris@43: add ecx,edx Chris@43: cmp dword ptr [esp+48],0 Chris@43: jne L_wrap_around_window Chris@43: Chris@43: sub eax,ecx Chris@43: add esi,eax Chris@43: ; 749 "inffast.S" Chris@43: mov eax, [esp+24] Chris@43: cmp eax,ecx Chris@43: jbe L_do_copy1 Chris@43: Chris@43: sub eax,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,edx Chris@43: jmp L_do_copy1 Chris@43: Chris@43: cmp eax,ecx Chris@43: jbe L_do_copy1 Chris@43: Chris@43: sub eax,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,edx Chris@43: jmp L_do_copy1 Chris@43: Chris@43: L_wrap_around_window: Chris@43: ; 793 "inffast.S" Chris@43: mov eax, [esp+48] Chris@43: cmp ecx,eax Chris@43: jbe L_contiguous_in_window Chris@43: Chris@43: add esi, [esp+52] Chris@43: add esi,eax Chris@43: sub esi,ecx Chris@43: sub ecx,eax Chris@43: Chris@43: Chris@43: mov eax, [esp+24] Chris@43: cmp eax,ecx Chris@43: jbe L_do_copy1 Chris@43: Chris@43: sub eax,ecx Chris@43: rep movsb Chris@43: mov esi, [esp+56] Chris@43: mov ecx, [esp+48] Chris@43: cmp eax,ecx Chris@43: jbe L_do_copy1 Chris@43: Chris@43: sub eax,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,edx Chris@43: jmp L_do_copy1 Chris@43: Chris@43: L_contiguous_in_window: Chris@43: ; 836 "inffast.S" Chris@43: add esi,eax Chris@43: sub esi,ecx Chris@43: Chris@43: Chris@43: mov eax, [esp+24] Chris@43: cmp eax,ecx Chris@43: jbe L_do_copy1 Chris@43: Chris@43: sub eax,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,edx Chris@43: Chris@43: L_do_copy1: Chris@43: ; 862 "inffast.S" Chris@43: mov ecx,eax Chris@43: rep movsb Chris@43: Chris@43: mov esi, [esp+44] Chris@43: jmp L_while_test Chris@43: ; 878 "inffast.S" Chris@43: ALIGN 4 Chris@43: L_init_mmx: Chris@43: emms Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: movd mm0,ebp Chris@43: mov ebp,ebx Chris@43: ; 896 "inffast.S" Chris@43: movd mm4,dword ptr [esp+0] Chris@43: movq mm3,mm4 Chris@43: movd mm5,dword ptr [esp+4] Chris@43: movq mm2,mm5 Chris@43: pxor mm1,mm1 Chris@43: mov ebx, [esp+8] Chris@43: jmp L_do_loop_mmx Chris@43: Chris@43: ALIGN 4 Chris@43: L_do_loop_mmx: Chris@43: psrlq mm0,mm1 Chris@43: Chris@43: cmp ebp,32 Chris@43: ja L_get_length_code_mmx Chris@43: Chris@43: movd mm6,ebp Chris@43: movd mm7,dword ptr [esi] Chris@43: add esi,4 Chris@43: psllq mm7,mm6 Chris@43: add ebp,32 Chris@43: por mm0,mm7 Chris@43: Chris@43: L_get_length_code_mmx: Chris@43: pand mm4,mm0 Chris@43: movd eax,mm4 Chris@43: movq mm4,mm3 Chris@43: mov eax, [ebx+eax*4] Chris@43: Chris@43: L_dolen_mmx: Chris@43: movzx ecx,ah Chris@43: movd mm1,ecx Chris@43: sub ebp,ecx Chris@43: Chris@43: test al,al Chris@43: jnz L_test_for_length_base_mmx Chris@43: Chris@43: shr eax,16 Chris@43: stosb Chris@43: Chris@43: L_while_test_mmx: Chris@43: Chris@43: Chris@43: cmp [esp+16],edi Chris@43: jbe L_break_loop Chris@43: Chris@43: cmp [esp+20],esi Chris@43: ja L_do_loop_mmx Chris@43: jmp L_break_loop Chris@43: Chris@43: L_test_for_length_base_mmx: Chris@43: Chris@43: mov edx,eax Chris@43: shr edx,16 Chris@43: Chris@43: test al,16 Chris@43: jz L_test_for_second_level_length_mmx Chris@43: and eax,15 Chris@43: jz L_decode_distance_mmx Chris@43: Chris@43: psrlq mm0,mm1 Chris@43: movd mm1,eax Chris@43: movd ecx,mm0 Chris@43: sub ebp,eax Chris@43: and ecx, [inflate_fast_mask+eax*4] Chris@43: add edx,ecx Chris@43: Chris@43: L_decode_distance_mmx: Chris@43: psrlq mm0,mm1 Chris@43: Chris@43: cmp ebp,32 Chris@43: ja L_get_dist_code_mmx Chris@43: Chris@43: movd mm6,ebp Chris@43: movd mm7,dword ptr [esi] Chris@43: add esi,4 Chris@43: psllq mm7,mm6 Chris@43: add ebp,32 Chris@43: por mm0,mm7 Chris@43: Chris@43: L_get_dist_code_mmx: Chris@43: mov ebx, [esp+12] Chris@43: pand mm5,mm0 Chris@43: movd eax,mm5 Chris@43: movq mm5,mm2 Chris@43: mov eax, [ebx+eax*4] Chris@43: Chris@43: L_dodist_mmx: Chris@43: Chris@43: movzx ecx,ah Chris@43: mov ebx,eax Chris@43: shr ebx,16 Chris@43: sub ebp,ecx Chris@43: movd mm1,ecx Chris@43: Chris@43: test al,16 Chris@43: jz L_test_for_second_level_dist_mmx Chris@43: and eax,15 Chris@43: jz L_check_dist_one_mmx Chris@43: Chris@43: L_add_bits_to_dist_mmx: Chris@43: psrlq mm0,mm1 Chris@43: movd mm1,eax Chris@43: movd ecx,mm0 Chris@43: sub ebp,eax Chris@43: and ecx, [inflate_fast_mask+eax*4] Chris@43: add ebx,ecx Chris@43: Chris@43: L_check_window_mmx: Chris@43: mov [esp+44],esi Chris@43: mov eax,edi Chris@43: sub eax, [esp+40] Chris@43: Chris@43: cmp eax,ebx Chris@43: jb L_clip_window_mmx Chris@43: Chris@43: mov ecx,edx Chris@43: mov esi,edi Chris@43: sub esi,ebx Chris@43: Chris@43: sub ecx,3 Chris@43: mov al, [esi] Chris@43: mov [edi],al Chris@43: mov al, [esi+1] Chris@43: mov dl, [esi+2] Chris@43: add esi,3 Chris@43: mov [edi+1],al Chris@43: mov [edi+2],dl Chris@43: add edi,3 Chris@43: rep movsb Chris@43: Chris@43: mov esi, [esp+44] Chris@43: mov ebx, [esp+8] Chris@43: jmp L_while_test_mmx Chris@43: Chris@43: ALIGN 4 Chris@43: L_check_dist_one_mmx: Chris@43: cmp ebx,1 Chris@43: jne L_check_window_mmx Chris@43: cmp [esp+40],edi Chris@43: je L_check_window_mmx Chris@43: Chris@43: dec edi Chris@43: mov ecx,edx Chris@43: mov al, [edi] Chris@43: sub ecx,3 Chris@43: Chris@43: mov [edi+1],al Chris@43: mov [edi+2],al Chris@43: mov [edi+3],al Chris@43: add edi,4 Chris@43: rep stosb Chris@43: Chris@43: mov ebx, [esp+8] Chris@43: jmp L_while_test_mmx Chris@43: Chris@43: ALIGN 4 Chris@43: L_test_for_second_level_length_mmx: Chris@43: test al,64 Chris@43: jnz L_test_for_end_of_block Chris@43: Chris@43: and eax,15 Chris@43: psrlq mm0,mm1 Chris@43: movd ecx,mm0 Chris@43: and ecx, [inflate_fast_mask+eax*4] Chris@43: add ecx,edx Chris@43: mov eax, [ebx+ecx*4] Chris@43: jmp L_dolen_mmx Chris@43: Chris@43: ALIGN 4 Chris@43: L_test_for_second_level_dist_mmx: Chris@43: test al,64 Chris@43: jnz L_invalid_distance_code Chris@43: Chris@43: and eax,15 Chris@43: psrlq mm0,mm1 Chris@43: movd ecx,mm0 Chris@43: and ecx, [inflate_fast_mask+eax*4] Chris@43: mov eax, [esp+12] Chris@43: add ecx,ebx Chris@43: mov eax, [eax+ecx*4] Chris@43: jmp L_dodist_mmx Chris@43: Chris@43: ALIGN 4 Chris@43: L_clip_window_mmx: Chris@43: Chris@43: mov ecx,eax Chris@43: mov eax, [esp+52] Chris@43: neg ecx Chris@43: mov esi, [esp+56] Chris@43: Chris@43: cmp eax,ebx Chris@43: jb L_invalid_distance_too_far Chris@43: Chris@43: add ecx,ebx Chris@43: cmp dword ptr [esp+48],0 Chris@43: jne L_wrap_around_window_mmx Chris@43: Chris@43: sub eax,ecx Chris@43: add esi,eax Chris@43: Chris@43: cmp edx,ecx Chris@43: jbe L_do_copy1_mmx Chris@43: Chris@43: sub edx,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,ebx Chris@43: jmp L_do_copy1_mmx Chris@43: Chris@43: cmp edx,ecx Chris@43: jbe L_do_copy1_mmx Chris@43: Chris@43: sub edx,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,ebx Chris@43: jmp L_do_copy1_mmx Chris@43: Chris@43: L_wrap_around_window_mmx: Chris@43: Chris@43: mov eax, [esp+48] Chris@43: cmp ecx,eax Chris@43: jbe L_contiguous_in_window_mmx Chris@43: Chris@43: add esi, [esp+52] Chris@43: add esi,eax Chris@43: sub esi,ecx Chris@43: sub ecx,eax Chris@43: Chris@43: Chris@43: cmp edx,ecx Chris@43: jbe L_do_copy1_mmx Chris@43: Chris@43: sub edx,ecx Chris@43: rep movsb Chris@43: mov esi, [esp+56] Chris@43: mov ecx, [esp+48] Chris@43: cmp edx,ecx Chris@43: jbe L_do_copy1_mmx Chris@43: Chris@43: sub edx,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,ebx Chris@43: jmp L_do_copy1_mmx Chris@43: Chris@43: L_contiguous_in_window_mmx: Chris@43: Chris@43: add esi,eax Chris@43: sub esi,ecx Chris@43: Chris@43: Chris@43: cmp edx,ecx Chris@43: jbe L_do_copy1_mmx Chris@43: Chris@43: sub edx,ecx Chris@43: rep movsb Chris@43: mov esi,edi Chris@43: sub esi,ebx Chris@43: Chris@43: L_do_copy1_mmx: Chris@43: Chris@43: Chris@43: mov ecx,edx Chris@43: rep movsb Chris@43: Chris@43: mov esi, [esp+44] Chris@43: mov ebx, [esp+8] Chris@43: jmp L_while_test_mmx Chris@43: ; 1174 "inffast.S" Chris@43: L_invalid_distance_code: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov ecx, invalid_distance_code_msg Chris@43: mov edx,INFLATE_MODE_BAD Chris@43: jmp L_update_stream_state Chris@43: Chris@43: L_test_for_end_of_block: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: test al,32 Chris@43: jz L_invalid_literal_length_code Chris@43: Chris@43: mov ecx,0 Chris@43: mov edx,INFLATE_MODE_TYPE Chris@43: jmp L_update_stream_state Chris@43: Chris@43: L_invalid_literal_length_code: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov ecx, invalid_literal_length_code_msg Chris@43: mov edx,INFLATE_MODE_BAD Chris@43: jmp L_update_stream_state Chris@43: Chris@43: L_invalid_distance_too_far: Chris@43: Chris@43: Chris@43: Chris@43: mov esi, [esp+44] Chris@43: mov ecx, invalid_distance_too_far_msg Chris@43: mov edx,INFLATE_MODE_BAD Chris@43: jmp L_update_stream_state Chris@43: Chris@43: L_update_stream_state: Chris@43: Chris@43: mov eax, [esp+88] Chris@43: test ecx,ecx Chris@43: jz L_skip_msg Chris@43: mov [eax+24],ecx Chris@43: L_skip_msg: Chris@43: mov eax, [eax+28] Chris@43: mov [eax+mode_state],edx Chris@43: jmp L_break_loop Chris@43: Chris@43: ALIGN 4 Chris@43: L_break_loop: Chris@43: ; 1243 "inffast.S" Chris@43: cmp dword ptr [inflate_fast_use_mmx],2 Chris@43: jne L_update_next_in Chris@43: Chris@43: Chris@43: Chris@43: mov ebx,ebp Chris@43: Chris@43: L_update_next_in: Chris@43: ; 1266 "inffast.S" Chris@43: mov eax, [esp+88] Chris@43: mov ecx,ebx Chris@43: mov edx, [eax+28] Chris@43: shr ecx,3 Chris@43: sub esi,ecx Chris@43: shl ecx,3 Chris@43: sub ebx,ecx Chris@43: mov [eax+12],edi Chris@43: mov [edx+bits_state],ebx Chris@43: mov ecx,ebx Chris@43: Chris@43: lea ebx, [esp+28] Chris@43: cmp [esp+20],ebx Chris@43: jne L_buf_not_used Chris@43: Chris@43: sub esi,ebx Chris@43: mov ebx, [eax+0] Chris@43: mov [esp+20],ebx Chris@43: add esi,ebx Chris@43: mov ebx, [eax+4] Chris@43: sub ebx,11 Chris@43: add [esp+20],ebx Chris@43: Chris@43: L_buf_not_used: Chris@43: mov [eax+0],esi Chris@43: Chris@43: mov ebx,1 Chris@43: shl ebx,cl Chris@43: dec ebx Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: cmp dword ptr [inflate_fast_use_mmx],2 Chris@43: jne L_update_hold Chris@43: Chris@43: Chris@43: Chris@43: psrlq mm0,mm1 Chris@43: movd ebp,mm0 Chris@43: Chris@43: emms Chris@43: Chris@43: L_update_hold: Chris@43: Chris@43: Chris@43: Chris@43: and ebp,ebx Chris@43: mov [edx+hold_state],ebp Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: mov ebx, [esp+20] Chris@43: cmp ebx,esi Chris@43: jbe L_last_is_smaller Chris@43: Chris@43: sub ebx,esi Chris@43: add ebx,11 Chris@43: mov [eax+4],ebx Chris@43: jmp L_fixup_out Chris@43: L_last_is_smaller: Chris@43: sub esi,ebx Chris@43: neg esi Chris@43: add esi,11 Chris@43: mov [eax+4],esi Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: L_fixup_out: Chris@43: Chris@43: mov ebx, [esp+16] Chris@43: cmp ebx,edi Chris@43: jbe L_end_is_smaller Chris@43: Chris@43: sub ebx,edi Chris@43: add ebx,257 Chris@43: mov [eax+16],ebx Chris@43: jmp L_done Chris@43: L_end_is_smaller: Chris@43: sub edi,ebx Chris@43: neg edi Chris@43: add edi,257 Chris@43: mov [eax+16],edi Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: Chris@43: L_done: Chris@43: add esp,64 Chris@43: popfd Chris@43: pop ebx Chris@43: pop ebp Chris@43: pop esi Chris@43: pop edi Chris@43: ret Chris@43: _inflate_fast endp Chris@43: Chris@43: _TEXT ends Chris@43: end