Chris@4: ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding Chris@4: ; * Chris@4: ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code Chris@4: ; * Chris@4: ; * Copyright (C) 1995-2003 Mark Adler Chris@4: ; * For conditions of distribution and use, see copyright notice in zlib.h Chris@4: ; * Chris@4: ; * Copyright (C) 2003 Chris Anderson Chris@4: ; * Please use the copyright conditions above. Chris@4: ; * Chris@4: ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from Chris@4: ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at Chris@4: ; * the moment. I have successfully compiled and tested this code with gcc2.96, Chris@4: ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S Chris@4: ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX Chris@4: ; * enabled. I will attempt to merge the MMX code into this version. Newer Chris@4: ; * versions of this and inffast.S can be found at Chris@4: ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ Chris@4: ; * Chris@4: ; * 2005 : modification by Gilles Vollant Chris@4: ; */ Chris@4: ; For Visual C++ 4.x and higher and ML 6.x and higher Chris@4: ; ml.exe is in directory \MASM611C of Win95 DDK Chris@4: ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm Chris@4: ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ Chris@4: ; Chris@4: ; Chris@4: ; compile with command line option Chris@4: ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm Chris@4: Chris@4: ; if you define NO_GZIP (see inflate.h), compile with Chris@4: ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm Chris@4: Chris@4: Chris@4: ; zlib122sup is 0 fort zlib 1.2.2.1 and lower Chris@4: ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head Chris@4: ; in inflate_state in inflate.h) Chris@4: zlib1222sup equ 8 Chris@4: Chris@4: Chris@4: IFDEF GUNZIP Chris@4: INFLATE_MODE_TYPE equ 11 Chris@4: INFLATE_MODE_BAD equ 26 Chris@4: ELSE Chris@4: IFNDEF NO_GUNZIP Chris@4: INFLATE_MODE_TYPE equ 11 Chris@4: INFLATE_MODE_BAD equ 26 Chris@4: ELSE Chris@4: INFLATE_MODE_TYPE equ 3 Chris@4: INFLATE_MODE_BAD equ 17 Chris@4: ENDIF Chris@4: ENDIF Chris@4: Chris@4: Chris@4: ; 75 "inffast.S" Chris@4: ;FILE "inffast.S" Chris@4: Chris@4: ;;;GLOBAL _inflate_fast Chris@4: Chris@4: ;;;SECTION .text Chris@4: Chris@4: Chris@4: Chris@4: .586p Chris@4: .mmx Chris@4: Chris@4: name inflate_fast_x86 Chris@4: .MODEL FLAT Chris@4: Chris@4: _DATA segment Chris@4: inflate_fast_use_mmx: Chris@4: dd 1 Chris@4: Chris@4: Chris@4: _TEXT segment Chris@4: Chris@4: Chris@4: Chris@4: ALIGN 4 Chris@4: db 'Fast decoding Code from Chris Anderson' Chris@4: db 0 Chris@4: Chris@4: ALIGN 4 Chris@4: invalid_literal_length_code_msg: Chris@4: db 'invalid literal/length code' Chris@4: db 0 Chris@4: Chris@4: ALIGN 4 Chris@4: invalid_distance_code_msg: Chris@4: db 'invalid distance code' Chris@4: db 0 Chris@4: Chris@4: ALIGN 4 Chris@4: invalid_distance_too_far_msg: Chris@4: db 'invalid distance too far back' Chris@4: db 0 Chris@4: Chris@4: Chris@4: ALIGN 4 Chris@4: inflate_fast_mask: Chris@4: dd 0 Chris@4: dd 1 Chris@4: dd 3 Chris@4: dd 7 Chris@4: dd 15 Chris@4: dd 31 Chris@4: dd 63 Chris@4: dd 127 Chris@4: dd 255 Chris@4: dd 511 Chris@4: dd 1023 Chris@4: dd 2047 Chris@4: dd 4095 Chris@4: dd 8191 Chris@4: dd 16383 Chris@4: dd 32767 Chris@4: dd 65535 Chris@4: dd 131071 Chris@4: dd 262143 Chris@4: dd 524287 Chris@4: dd 1048575 Chris@4: dd 2097151 Chris@4: dd 4194303 Chris@4: dd 8388607 Chris@4: dd 16777215 Chris@4: dd 33554431 Chris@4: dd 67108863 Chris@4: dd 134217727 Chris@4: dd 268435455 Chris@4: dd 536870911 Chris@4: dd 1073741823 Chris@4: dd 2147483647 Chris@4: dd 4294967295 Chris@4: Chris@4: Chris@4: mode_state equ 0 ;/* state->mode */ Chris@4: wsize_state equ (32+zlib1222sup) ;/* state->wsize */ Chris@4: write_state equ (36+4+zlib1222sup) ;/* state->write */ Chris@4: window_state equ (40+4+zlib1222sup) ;/* state->window */ Chris@4: hold_state equ (44+4+zlib1222sup) ;/* state->hold */ Chris@4: bits_state equ (48+4+zlib1222sup) ;/* state->bits */ Chris@4: lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ Chris@4: distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ Chris@4: lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ Chris@4: distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ Chris@4: Chris@4: Chris@4: ;;SECTION .text Chris@4: ; 205 "inffast.S" Chris@4: ;GLOBAL inflate_fast_use_mmx Chris@4: Chris@4: ;SECTION .data Chris@4: Chris@4: Chris@4: ; GLOBAL inflate_fast_use_mmx:object Chris@4: ;.size inflate_fast_use_mmx, 4 Chris@4: ; 226 "inffast.S" Chris@4: ;SECTION .text Chris@4: Chris@4: ALIGN 4 Chris@4: _inflate_fast proc near Chris@4: .FPO (16, 4, 0, 0, 1, 0) Chris@4: push edi Chris@4: push esi Chris@4: push ebp Chris@4: push ebx Chris@4: pushfd Chris@4: sub esp,64 Chris@4: cld Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov esi, [esp+88] Chris@4: mov edi, [esi+28] Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov edx, [esi+4] Chris@4: mov eax, [esi+0] Chris@4: Chris@4: add edx,eax Chris@4: sub edx,11 Chris@4: Chris@4: mov [esp+44],eax Chris@4: mov [esp+20],edx Chris@4: Chris@4: mov ebp, [esp+92] Chris@4: mov ecx, [esi+16] Chris@4: mov ebx, [esi+12] Chris@4: Chris@4: sub ebp,ecx Chris@4: neg ebp Chris@4: add ebp,ebx Chris@4: Chris@4: sub ecx,257 Chris@4: add ecx,ebx Chris@4: Chris@4: mov [esp+60],ebx Chris@4: mov [esp+40],ebp Chris@4: mov [esp+16],ecx Chris@4: ; 285 "inffast.S" Chris@4: mov eax, [edi+lencode_state] Chris@4: mov ecx, [edi+distcode_state] Chris@4: Chris@4: mov [esp+8],eax Chris@4: mov [esp+12],ecx Chris@4: Chris@4: mov eax,1 Chris@4: mov ecx, [edi+lenbits_state] Chris@4: shl eax,cl Chris@4: dec eax Chris@4: mov [esp+0],eax Chris@4: Chris@4: mov eax,1 Chris@4: mov ecx, [edi+distbits_state] Chris@4: shl eax,cl Chris@4: dec eax Chris@4: mov [esp+4],eax Chris@4: Chris@4: mov eax, [edi+wsize_state] Chris@4: mov ecx, [edi+write_state] Chris@4: mov edx, [edi+window_state] Chris@4: Chris@4: mov [esp+52],eax Chris@4: mov [esp+48],ecx Chris@4: mov [esp+56],edx Chris@4: Chris@4: mov ebp, [edi+hold_state] Chris@4: mov ebx, [edi+bits_state] Chris@4: ; 321 "inffast.S" Chris@4: mov esi, [esp+44] Chris@4: mov ecx, [esp+20] Chris@4: cmp ecx,esi Chris@4: ja L_align_long Chris@4: Chris@4: add ecx,11 Chris@4: sub ecx,esi Chris@4: mov eax,12 Chris@4: sub eax,ecx Chris@4: lea edi, [esp+28] Chris@4: rep movsb Chris@4: mov ecx,eax Chris@4: xor eax,eax Chris@4: rep stosb Chris@4: lea esi, [esp+28] Chris@4: mov [esp+20],esi Chris@4: jmp L_is_aligned Chris@4: Chris@4: Chris@4: L_align_long: Chris@4: test esi,3 Chris@4: jz L_is_aligned Chris@4: xor eax,eax Chris@4: mov al, [esi] Chris@4: inc esi Chris@4: mov ecx,ebx Chris@4: add ebx,8 Chris@4: shl eax,cl Chris@4: or ebp,eax Chris@4: jmp L_align_long Chris@4: Chris@4: L_is_aligned: Chris@4: mov edi, [esp+60] Chris@4: ; 366 "inffast.S" Chris@4: L_check_mmx: Chris@4: cmp dword ptr [inflate_fast_use_mmx],2 Chris@4: je L_init_mmx Chris@4: ja L_do_loop Chris@4: Chris@4: push eax Chris@4: push ebx Chris@4: push ecx Chris@4: push edx Chris@4: pushfd Chris@4: mov eax, [esp] Chris@4: xor dword ptr [esp],0200000h Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: popfd Chris@4: pushfd Chris@4: pop edx Chris@4: xor edx,eax Chris@4: jz L_dont_use_mmx Chris@4: xor eax,eax Chris@4: cpuid Chris@4: cmp ebx,0756e6547h Chris@4: jne L_dont_use_mmx Chris@4: cmp ecx,06c65746eh Chris@4: jne L_dont_use_mmx Chris@4: cmp edx,049656e69h Chris@4: jne L_dont_use_mmx Chris@4: mov eax,1 Chris@4: cpuid Chris@4: shr eax,8 Chris@4: and eax,15 Chris@4: cmp eax,6 Chris@4: jne L_dont_use_mmx Chris@4: test edx,0800000h Chris@4: jnz L_use_mmx Chris@4: jmp L_dont_use_mmx Chris@4: L_use_mmx: Chris@4: mov dword ptr [inflate_fast_use_mmx],2 Chris@4: jmp L_check_mmx_pop Chris@4: L_dont_use_mmx: Chris@4: mov dword ptr [inflate_fast_use_mmx],3 Chris@4: L_check_mmx_pop: Chris@4: pop edx Chris@4: pop ecx Chris@4: pop ebx Chris@4: pop eax Chris@4: jmp L_check_mmx Chris@4: ; 426 "inffast.S" Chris@4: ALIGN 4 Chris@4: L_do_loop: Chris@4: ; 437 "inffast.S" Chris@4: cmp bl,15 Chris@4: ja L_get_length_code Chris@4: Chris@4: xor eax,eax Chris@4: lodsw Chris@4: mov cl,bl Chris@4: add bl,16 Chris@4: shl eax,cl Chris@4: or ebp,eax Chris@4: Chris@4: L_get_length_code: Chris@4: mov edx, [esp+0] Chris@4: mov ecx, [esp+8] Chris@4: and edx,ebp Chris@4: mov eax, [ecx+edx*4] Chris@4: Chris@4: L_dolen: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov cl,ah Chris@4: sub bl,ah Chris@4: shr ebp,cl Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: test al,al Chris@4: jnz L_test_for_length_base Chris@4: Chris@4: shr eax,16 Chris@4: stosb Chris@4: Chris@4: L_while_test: Chris@4: Chris@4: Chris@4: cmp [esp+16],edi Chris@4: jbe L_break_loop Chris@4: Chris@4: cmp [esp+20],esi Chris@4: ja L_do_loop Chris@4: jmp L_break_loop Chris@4: Chris@4: L_test_for_length_base: Chris@4: ; 502 "inffast.S" Chris@4: mov edx,eax Chris@4: shr edx,16 Chris@4: mov cl,al Chris@4: Chris@4: test al,16 Chris@4: jz L_test_for_second_level_length Chris@4: and cl,15 Chris@4: jz L_save_len Chris@4: cmp bl,cl Chris@4: jae L_add_bits_to_len Chris@4: Chris@4: mov ch,cl Chris@4: xor eax,eax Chris@4: lodsw Chris@4: mov cl,bl Chris@4: add bl,16 Chris@4: shl eax,cl Chris@4: or ebp,eax Chris@4: mov cl,ch Chris@4: Chris@4: L_add_bits_to_len: Chris@4: mov eax,1 Chris@4: shl eax,cl Chris@4: dec eax Chris@4: sub bl,cl Chris@4: and eax,ebp Chris@4: shr ebp,cl Chris@4: add edx,eax Chris@4: Chris@4: L_save_len: Chris@4: mov [esp+24],edx Chris@4: Chris@4: Chris@4: L_decode_distance: Chris@4: ; 549 "inffast.S" Chris@4: cmp bl,15 Chris@4: ja L_get_distance_code Chris@4: Chris@4: xor eax,eax Chris@4: lodsw Chris@4: mov cl,bl Chris@4: add bl,16 Chris@4: shl eax,cl Chris@4: or ebp,eax Chris@4: Chris@4: L_get_distance_code: Chris@4: mov edx, [esp+4] Chris@4: mov ecx, [esp+12] Chris@4: and edx,ebp Chris@4: mov eax, [ecx+edx*4] Chris@4: Chris@4: Chris@4: L_dodist: Chris@4: mov edx,eax Chris@4: shr edx,16 Chris@4: mov cl,ah Chris@4: sub bl,ah Chris@4: shr ebp,cl Chris@4: ; 584 "inffast.S" Chris@4: mov cl,al Chris@4: Chris@4: test al,16 Chris@4: jz L_test_for_second_level_dist Chris@4: and cl,15 Chris@4: jz L_check_dist_one Chris@4: cmp bl,cl Chris@4: jae L_add_bits_to_dist Chris@4: Chris@4: mov ch,cl Chris@4: xor eax,eax Chris@4: lodsw Chris@4: mov cl,bl Chris@4: add bl,16 Chris@4: shl eax,cl Chris@4: or ebp,eax Chris@4: mov cl,ch Chris@4: Chris@4: L_add_bits_to_dist: Chris@4: mov eax,1 Chris@4: shl eax,cl Chris@4: dec eax Chris@4: sub bl,cl Chris@4: and eax,ebp Chris@4: shr ebp,cl Chris@4: add edx,eax Chris@4: jmp L_check_window Chris@4: Chris@4: L_check_window: Chris@4: ; 625 "inffast.S" Chris@4: mov [esp+44],esi Chris@4: mov eax,edi Chris@4: sub eax, [esp+40] Chris@4: Chris@4: cmp eax,edx Chris@4: jb L_clip_window Chris@4: Chris@4: mov ecx, [esp+24] Chris@4: mov esi,edi Chris@4: sub esi,edx Chris@4: Chris@4: sub ecx,3 Chris@4: mov al, [esi] Chris@4: mov [edi],al Chris@4: mov al, [esi+1] Chris@4: mov dl, [esi+2] Chris@4: add esi,3 Chris@4: mov [edi+1],al Chris@4: mov [edi+2],dl Chris@4: add edi,3 Chris@4: rep movsb Chris@4: Chris@4: mov esi, [esp+44] Chris@4: jmp L_while_test Chris@4: Chris@4: ALIGN 4 Chris@4: L_check_dist_one: Chris@4: cmp edx,1 Chris@4: jne L_check_window Chris@4: cmp [esp+40],edi Chris@4: je L_check_window Chris@4: Chris@4: dec edi Chris@4: mov ecx, [esp+24] Chris@4: mov al, [edi] Chris@4: sub ecx,3 Chris@4: Chris@4: mov [edi+1],al Chris@4: mov [edi+2],al Chris@4: mov [edi+3],al Chris@4: add edi,4 Chris@4: rep stosb Chris@4: Chris@4: jmp L_while_test Chris@4: Chris@4: ALIGN 4 Chris@4: L_test_for_second_level_length: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: test al,64 Chris@4: jnz L_test_for_end_of_block Chris@4: Chris@4: mov eax,1 Chris@4: shl eax,cl Chris@4: dec eax Chris@4: and eax,ebp Chris@4: add eax,edx Chris@4: mov edx, [esp+8] Chris@4: mov eax, [edx+eax*4] Chris@4: jmp L_dolen Chris@4: Chris@4: ALIGN 4 Chris@4: L_test_for_second_level_dist: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: test al,64 Chris@4: jnz L_invalid_distance_code Chris@4: Chris@4: mov eax,1 Chris@4: shl eax,cl Chris@4: dec eax Chris@4: and eax,ebp Chris@4: add eax,edx Chris@4: mov edx, [esp+12] Chris@4: mov eax, [edx+eax*4] Chris@4: jmp L_dodist Chris@4: Chris@4: ALIGN 4 Chris@4: L_clip_window: Chris@4: ; 721 "inffast.S" Chris@4: mov ecx,eax Chris@4: mov eax, [esp+52] Chris@4: neg ecx Chris@4: mov esi, [esp+56] Chris@4: Chris@4: cmp eax,edx Chris@4: jb L_invalid_distance_too_far Chris@4: Chris@4: add ecx,edx Chris@4: cmp dword ptr [esp+48],0 Chris@4: jne L_wrap_around_window Chris@4: Chris@4: sub eax,ecx Chris@4: add esi,eax Chris@4: ; 749 "inffast.S" Chris@4: mov eax, [esp+24] Chris@4: cmp eax,ecx Chris@4: jbe L_do_copy1 Chris@4: Chris@4: sub eax,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,edx Chris@4: jmp L_do_copy1 Chris@4: Chris@4: cmp eax,ecx Chris@4: jbe L_do_copy1 Chris@4: Chris@4: sub eax,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,edx Chris@4: jmp L_do_copy1 Chris@4: Chris@4: L_wrap_around_window: Chris@4: ; 793 "inffast.S" Chris@4: mov eax, [esp+48] Chris@4: cmp ecx,eax Chris@4: jbe L_contiguous_in_window Chris@4: Chris@4: add esi, [esp+52] Chris@4: add esi,eax Chris@4: sub esi,ecx Chris@4: sub ecx,eax Chris@4: Chris@4: Chris@4: mov eax, [esp+24] Chris@4: cmp eax,ecx Chris@4: jbe L_do_copy1 Chris@4: Chris@4: sub eax,ecx Chris@4: rep movsb Chris@4: mov esi, [esp+56] Chris@4: mov ecx, [esp+48] Chris@4: cmp eax,ecx Chris@4: jbe L_do_copy1 Chris@4: Chris@4: sub eax,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,edx Chris@4: jmp L_do_copy1 Chris@4: Chris@4: L_contiguous_in_window: Chris@4: ; 836 "inffast.S" Chris@4: add esi,eax Chris@4: sub esi,ecx Chris@4: Chris@4: Chris@4: mov eax, [esp+24] Chris@4: cmp eax,ecx Chris@4: jbe L_do_copy1 Chris@4: Chris@4: sub eax,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,edx Chris@4: Chris@4: L_do_copy1: Chris@4: ; 862 "inffast.S" Chris@4: mov ecx,eax Chris@4: rep movsb Chris@4: Chris@4: mov esi, [esp+44] Chris@4: jmp L_while_test Chris@4: ; 878 "inffast.S" Chris@4: ALIGN 4 Chris@4: L_init_mmx: Chris@4: emms Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: movd mm0,ebp Chris@4: mov ebp,ebx Chris@4: ; 896 "inffast.S" Chris@4: movd mm4,dword ptr [esp+0] Chris@4: movq mm3,mm4 Chris@4: movd mm5,dword ptr [esp+4] Chris@4: movq mm2,mm5 Chris@4: pxor mm1,mm1 Chris@4: mov ebx, [esp+8] Chris@4: jmp L_do_loop_mmx Chris@4: Chris@4: ALIGN 4 Chris@4: L_do_loop_mmx: Chris@4: psrlq mm0,mm1 Chris@4: Chris@4: cmp ebp,32 Chris@4: ja L_get_length_code_mmx Chris@4: Chris@4: movd mm6,ebp Chris@4: movd mm7,dword ptr [esi] Chris@4: add esi,4 Chris@4: psllq mm7,mm6 Chris@4: add ebp,32 Chris@4: por mm0,mm7 Chris@4: Chris@4: L_get_length_code_mmx: Chris@4: pand mm4,mm0 Chris@4: movd eax,mm4 Chris@4: movq mm4,mm3 Chris@4: mov eax, [ebx+eax*4] Chris@4: Chris@4: L_dolen_mmx: Chris@4: movzx ecx,ah Chris@4: movd mm1,ecx Chris@4: sub ebp,ecx Chris@4: Chris@4: test al,al Chris@4: jnz L_test_for_length_base_mmx Chris@4: Chris@4: shr eax,16 Chris@4: stosb Chris@4: Chris@4: L_while_test_mmx: Chris@4: Chris@4: Chris@4: cmp [esp+16],edi Chris@4: jbe L_break_loop Chris@4: Chris@4: cmp [esp+20],esi Chris@4: ja L_do_loop_mmx Chris@4: jmp L_break_loop Chris@4: Chris@4: L_test_for_length_base_mmx: Chris@4: Chris@4: mov edx,eax Chris@4: shr edx,16 Chris@4: Chris@4: test al,16 Chris@4: jz L_test_for_second_level_length_mmx Chris@4: and eax,15 Chris@4: jz L_decode_distance_mmx Chris@4: Chris@4: psrlq mm0,mm1 Chris@4: movd mm1,eax Chris@4: movd ecx,mm0 Chris@4: sub ebp,eax Chris@4: and ecx, [inflate_fast_mask+eax*4] Chris@4: add edx,ecx Chris@4: Chris@4: L_decode_distance_mmx: Chris@4: psrlq mm0,mm1 Chris@4: Chris@4: cmp ebp,32 Chris@4: ja L_get_dist_code_mmx Chris@4: Chris@4: movd mm6,ebp Chris@4: movd mm7,dword ptr [esi] Chris@4: add esi,4 Chris@4: psllq mm7,mm6 Chris@4: add ebp,32 Chris@4: por mm0,mm7 Chris@4: Chris@4: L_get_dist_code_mmx: Chris@4: mov ebx, [esp+12] Chris@4: pand mm5,mm0 Chris@4: movd eax,mm5 Chris@4: movq mm5,mm2 Chris@4: mov eax, [ebx+eax*4] Chris@4: Chris@4: L_dodist_mmx: Chris@4: Chris@4: movzx ecx,ah Chris@4: mov ebx,eax Chris@4: shr ebx,16 Chris@4: sub ebp,ecx Chris@4: movd mm1,ecx Chris@4: Chris@4: test al,16 Chris@4: jz L_test_for_second_level_dist_mmx Chris@4: and eax,15 Chris@4: jz L_check_dist_one_mmx Chris@4: Chris@4: L_add_bits_to_dist_mmx: Chris@4: psrlq mm0,mm1 Chris@4: movd mm1,eax Chris@4: movd ecx,mm0 Chris@4: sub ebp,eax Chris@4: and ecx, [inflate_fast_mask+eax*4] Chris@4: add ebx,ecx Chris@4: Chris@4: L_check_window_mmx: Chris@4: mov [esp+44],esi Chris@4: mov eax,edi Chris@4: sub eax, [esp+40] Chris@4: Chris@4: cmp eax,ebx Chris@4: jb L_clip_window_mmx Chris@4: Chris@4: mov ecx,edx Chris@4: mov esi,edi Chris@4: sub esi,ebx Chris@4: Chris@4: sub ecx,3 Chris@4: mov al, [esi] Chris@4: mov [edi],al Chris@4: mov al, [esi+1] Chris@4: mov dl, [esi+2] Chris@4: add esi,3 Chris@4: mov [edi+1],al Chris@4: mov [edi+2],dl Chris@4: add edi,3 Chris@4: rep movsb Chris@4: Chris@4: mov esi, [esp+44] Chris@4: mov ebx, [esp+8] Chris@4: jmp L_while_test_mmx Chris@4: Chris@4: ALIGN 4 Chris@4: L_check_dist_one_mmx: Chris@4: cmp ebx,1 Chris@4: jne L_check_window_mmx Chris@4: cmp [esp+40],edi Chris@4: je L_check_window_mmx Chris@4: Chris@4: dec edi Chris@4: mov ecx,edx Chris@4: mov al, [edi] Chris@4: sub ecx,3 Chris@4: Chris@4: mov [edi+1],al Chris@4: mov [edi+2],al Chris@4: mov [edi+3],al Chris@4: add edi,4 Chris@4: rep stosb Chris@4: Chris@4: mov ebx, [esp+8] Chris@4: jmp L_while_test_mmx Chris@4: Chris@4: ALIGN 4 Chris@4: L_test_for_second_level_length_mmx: Chris@4: test al,64 Chris@4: jnz L_test_for_end_of_block Chris@4: Chris@4: and eax,15 Chris@4: psrlq mm0,mm1 Chris@4: movd ecx,mm0 Chris@4: and ecx, [inflate_fast_mask+eax*4] Chris@4: add ecx,edx Chris@4: mov eax, [ebx+ecx*4] Chris@4: jmp L_dolen_mmx Chris@4: Chris@4: ALIGN 4 Chris@4: L_test_for_second_level_dist_mmx: Chris@4: test al,64 Chris@4: jnz L_invalid_distance_code Chris@4: Chris@4: and eax,15 Chris@4: psrlq mm0,mm1 Chris@4: movd ecx,mm0 Chris@4: and ecx, [inflate_fast_mask+eax*4] Chris@4: mov eax, [esp+12] Chris@4: add ecx,ebx Chris@4: mov eax, [eax+ecx*4] Chris@4: jmp L_dodist_mmx Chris@4: Chris@4: ALIGN 4 Chris@4: L_clip_window_mmx: Chris@4: Chris@4: mov ecx,eax Chris@4: mov eax, [esp+52] Chris@4: neg ecx Chris@4: mov esi, [esp+56] Chris@4: Chris@4: cmp eax,ebx Chris@4: jb L_invalid_distance_too_far Chris@4: Chris@4: add ecx,ebx Chris@4: cmp dword ptr [esp+48],0 Chris@4: jne L_wrap_around_window_mmx Chris@4: Chris@4: sub eax,ecx Chris@4: add esi,eax Chris@4: Chris@4: cmp edx,ecx Chris@4: jbe L_do_copy1_mmx Chris@4: Chris@4: sub edx,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,ebx Chris@4: jmp L_do_copy1_mmx Chris@4: Chris@4: cmp edx,ecx Chris@4: jbe L_do_copy1_mmx Chris@4: Chris@4: sub edx,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,ebx Chris@4: jmp L_do_copy1_mmx Chris@4: Chris@4: L_wrap_around_window_mmx: Chris@4: Chris@4: mov eax, [esp+48] Chris@4: cmp ecx,eax Chris@4: jbe L_contiguous_in_window_mmx Chris@4: Chris@4: add esi, [esp+52] Chris@4: add esi,eax Chris@4: sub esi,ecx Chris@4: sub ecx,eax Chris@4: Chris@4: Chris@4: cmp edx,ecx Chris@4: jbe L_do_copy1_mmx Chris@4: Chris@4: sub edx,ecx Chris@4: rep movsb Chris@4: mov esi, [esp+56] Chris@4: mov ecx, [esp+48] Chris@4: cmp edx,ecx Chris@4: jbe L_do_copy1_mmx Chris@4: Chris@4: sub edx,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,ebx Chris@4: jmp L_do_copy1_mmx Chris@4: Chris@4: L_contiguous_in_window_mmx: Chris@4: Chris@4: add esi,eax Chris@4: sub esi,ecx Chris@4: Chris@4: Chris@4: cmp edx,ecx Chris@4: jbe L_do_copy1_mmx Chris@4: Chris@4: sub edx,ecx Chris@4: rep movsb Chris@4: mov esi,edi Chris@4: sub esi,ebx Chris@4: Chris@4: L_do_copy1_mmx: Chris@4: Chris@4: Chris@4: mov ecx,edx Chris@4: rep movsb Chris@4: Chris@4: mov esi, [esp+44] Chris@4: mov ebx, [esp+8] Chris@4: jmp L_while_test_mmx Chris@4: ; 1174 "inffast.S" Chris@4: L_invalid_distance_code: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov ecx, invalid_distance_code_msg Chris@4: mov edx,INFLATE_MODE_BAD Chris@4: jmp L_update_stream_state Chris@4: Chris@4: L_test_for_end_of_block: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: test al,32 Chris@4: jz L_invalid_literal_length_code Chris@4: Chris@4: mov ecx,0 Chris@4: mov edx,INFLATE_MODE_TYPE Chris@4: jmp L_update_stream_state Chris@4: Chris@4: L_invalid_literal_length_code: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov ecx, invalid_literal_length_code_msg Chris@4: mov edx,INFLATE_MODE_BAD Chris@4: jmp L_update_stream_state Chris@4: Chris@4: L_invalid_distance_too_far: Chris@4: Chris@4: Chris@4: Chris@4: mov esi, [esp+44] Chris@4: mov ecx, invalid_distance_too_far_msg Chris@4: mov edx,INFLATE_MODE_BAD Chris@4: jmp L_update_stream_state Chris@4: Chris@4: L_update_stream_state: Chris@4: Chris@4: mov eax, [esp+88] Chris@4: test ecx,ecx Chris@4: jz L_skip_msg Chris@4: mov [eax+24],ecx Chris@4: L_skip_msg: Chris@4: mov eax, [eax+28] Chris@4: mov [eax+mode_state],edx Chris@4: jmp L_break_loop Chris@4: Chris@4: ALIGN 4 Chris@4: L_break_loop: Chris@4: ; 1243 "inffast.S" Chris@4: cmp dword ptr [inflate_fast_use_mmx],2 Chris@4: jne L_update_next_in Chris@4: Chris@4: Chris@4: Chris@4: mov ebx,ebp Chris@4: Chris@4: L_update_next_in: Chris@4: ; 1266 "inffast.S" Chris@4: mov eax, [esp+88] Chris@4: mov ecx,ebx Chris@4: mov edx, [eax+28] Chris@4: shr ecx,3 Chris@4: sub esi,ecx Chris@4: shl ecx,3 Chris@4: sub ebx,ecx Chris@4: mov [eax+12],edi Chris@4: mov [edx+bits_state],ebx Chris@4: mov ecx,ebx Chris@4: Chris@4: lea ebx, [esp+28] Chris@4: cmp [esp+20],ebx Chris@4: jne L_buf_not_used Chris@4: Chris@4: sub esi,ebx Chris@4: mov ebx, [eax+0] Chris@4: mov [esp+20],ebx Chris@4: add esi,ebx Chris@4: mov ebx, [eax+4] Chris@4: sub ebx,11 Chris@4: add [esp+20],ebx Chris@4: Chris@4: L_buf_not_used: Chris@4: mov [eax+0],esi Chris@4: Chris@4: mov ebx,1 Chris@4: shl ebx,cl Chris@4: dec ebx Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: cmp dword ptr [inflate_fast_use_mmx],2 Chris@4: jne L_update_hold Chris@4: Chris@4: Chris@4: Chris@4: psrlq mm0,mm1 Chris@4: movd ebp,mm0 Chris@4: Chris@4: emms Chris@4: Chris@4: L_update_hold: Chris@4: Chris@4: Chris@4: Chris@4: and ebp,ebx Chris@4: mov [edx+hold_state],ebp Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: mov ebx, [esp+20] Chris@4: cmp ebx,esi Chris@4: jbe L_last_is_smaller Chris@4: Chris@4: sub ebx,esi Chris@4: add ebx,11 Chris@4: mov [eax+4],ebx Chris@4: jmp L_fixup_out Chris@4: L_last_is_smaller: Chris@4: sub esi,ebx Chris@4: neg esi Chris@4: add esi,11 Chris@4: mov [eax+4],esi Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: L_fixup_out: Chris@4: Chris@4: mov ebx, [esp+16] Chris@4: cmp ebx,edi Chris@4: jbe L_end_is_smaller Chris@4: Chris@4: sub ebx,edi Chris@4: add ebx,257 Chris@4: mov [eax+16],ebx Chris@4: jmp L_done Chris@4: L_end_is_smaller: Chris@4: sub edi,ebx Chris@4: neg edi Chris@4: add edi,257 Chris@4: mov [eax+16],edi Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: Chris@4: L_done: Chris@4: add esp,64 Chris@4: popfd Chris@4: pop ebx Chris@4: pop ebp Chris@4: pop esi Chris@4: pop edi Chris@4: ret Chris@4: _inflate_fast endp Chris@4: Chris@4: _TEXT ends Chris@4: end