Chris@4
|
1 /* inffas86.c is a hand tuned assembler version of
|
Chris@4
|
2 *
|
Chris@4
|
3 * inffast.c -- fast decoding
|
Chris@4
|
4 * Copyright (C) 1995-2003 Mark Adler
|
Chris@4
|
5 * For conditions of distribution and use, see copyright notice in zlib.h
|
Chris@4
|
6 *
|
Chris@4
|
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
|
Chris@4
|
8 * Please use the copyright conditions above.
|
Chris@4
|
9 *
|
Chris@4
|
10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
|
Chris@4
|
11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
|
Chris@4
|
12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
|
Chris@4
|
13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
|
Chris@4
|
14 * from http://fedora.linux.duke.edu/fc1_x86_64
|
Chris@4
|
15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
|
Chris@4
|
16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
|
Chris@4
|
17 * when decompressing mozilla-source-1.3.tar.gz.
|
Chris@4
|
18 *
|
Chris@4
|
19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
|
Chris@4
|
20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
|
Chris@4
|
21 * the moment. I have successfully compiled and tested this code with gcc2.96,
|
Chris@4
|
22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
|
Chris@4
|
23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
|
Chris@4
|
24 * enabled. I will attempt to merge the MMX code into this version. Newer
|
Chris@4
|
25 * versions of this and inffast.S can be found at
|
Chris@4
|
26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
|
Chris@4
|
27 */
|
Chris@4
|
28
|
Chris@4
|
29 #include "zutil.h"
|
Chris@4
|
30 #include "inftrees.h"
|
Chris@4
|
31 #include "inflate.h"
|
Chris@4
|
32 #include "inffast.h"
|
Chris@4
|
33
|
Chris@4
|
34 /* Mark Adler's comments from inffast.c: */
|
Chris@4
|
35
|
Chris@4
|
36 /*
|
Chris@4
|
37 Decode literal, length, and distance codes and write out the resulting
|
Chris@4
|
38 literal and match bytes until either not enough input or output is
|
Chris@4
|
39 available, an end-of-block is encountered, or a data error is encountered.
|
Chris@4
|
40 When large enough input and output buffers are supplied to inflate(), for
|
Chris@4
|
41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
|
Chris@4
|
42 inflate execution time is spent in this routine.
|
Chris@4
|
43
|
Chris@4
|
44 Entry assumptions:
|
Chris@4
|
45
|
Chris@4
|
46 state->mode == LEN
|
Chris@4
|
47 strm->avail_in >= 6
|
Chris@4
|
48 strm->avail_out >= 258
|
Chris@4
|
49 start >= strm->avail_out
|
Chris@4
|
50 state->bits < 8
|
Chris@4
|
51
|
Chris@4
|
52 On return, state->mode is one of:
|
Chris@4
|
53
|
Chris@4
|
54 LEN -- ran out of enough output space or enough available input
|
Chris@4
|
55 TYPE -- reached end of block code, inflate() to interpret next block
|
Chris@4
|
56 BAD -- error in block data
|
Chris@4
|
57
|
Chris@4
|
58 Notes:
|
Chris@4
|
59
|
Chris@4
|
60 - The maximum input bits used by a length/distance pair is 15 bits for the
|
Chris@4
|
61 length code, 5 bits for the length extra, 15 bits for the distance code,
|
Chris@4
|
62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
|
Chris@4
|
63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
|
Chris@4
|
64 checking for available input while decoding.
|
Chris@4
|
65
|
Chris@4
|
66 - The maximum bytes that a single length/distance pair can output is 258
|
Chris@4
|
67 bytes, which is the maximum length that can be coded. inflate_fast()
|
Chris@4
|
68 requires strm->avail_out >= 258 for each loop to avoid checking for
|
Chris@4
|
69 output space.
|
Chris@4
|
70 */
|
Chris@4
|
71 void inflate_fast(strm, start)
|
Chris@4
|
72 z_streamp strm;
|
Chris@4
|
73 unsigned start; /* inflate()'s starting value for strm->avail_out */
|
Chris@4
|
74 {
|
Chris@4
|
75 struct inflate_state FAR *state;
|
Chris@4
|
76 struct inffast_ar {
|
Chris@4
|
77 /* 64 32 x86 x86_64 */
|
Chris@4
|
78 /* ar offset register */
|
Chris@4
|
79 /* 0 0 */ void *esp; /* esp save */
|
Chris@4
|
80 /* 8 4 */ void *ebp; /* ebp save */
|
Chris@4
|
81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
|
Chris@4
|
82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
|
Chris@4
|
83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
|
Chris@4
|
84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
|
Chris@4
|
85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
|
Chris@4
|
86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
|
Chris@4
|
87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
|
Chris@4
|
88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
|
Chris@4
|
89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
|
Chris@4
|
90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
|
Chris@4
|
91 /* 92 48 */ unsigned wsize; /* window size */
|
Chris@4
|
92 /* 96 52 */ unsigned write; /* window write index */
|
Chris@4
|
93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
|
Chris@4
|
94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
|
Chris@4
|
95 /*108 64 */ unsigned len; /* r14 match length */
|
Chris@4
|
96 /*112 68 */ unsigned dist; /* r15 match distance */
|
Chris@4
|
97 /*116 72 */ unsigned status; /* set when state chng*/
|
Chris@4
|
98 } ar;
|
Chris@4
|
99
|
Chris@4
|
100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
|
Chris@4
|
101 #define PAD_AVAIL_IN 6
|
Chris@4
|
102 #define PAD_AVAIL_OUT 258
|
Chris@4
|
103 #else
|
Chris@4
|
104 #define PAD_AVAIL_IN 5
|
Chris@4
|
105 #define PAD_AVAIL_OUT 257
|
Chris@4
|
106 #endif
|
Chris@4
|
107
|
Chris@4
|
108 /* copy state to local variables */
|
Chris@4
|
109 state = (struct inflate_state FAR *)strm->state;
|
Chris@4
|
110 ar.in = strm->next_in;
|
Chris@4
|
111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
|
Chris@4
|
112 ar.out = strm->next_out;
|
Chris@4
|
113 ar.beg = ar.out - (start - strm->avail_out);
|
Chris@4
|
114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
|
Chris@4
|
115 ar.wsize = state->wsize;
|
Chris@4
|
116 ar.write = state->wnext;
|
Chris@4
|
117 ar.window = state->window;
|
Chris@4
|
118 ar.hold = state->hold;
|
Chris@4
|
119 ar.bits = state->bits;
|
Chris@4
|
120 ar.lcode = state->lencode;
|
Chris@4
|
121 ar.dcode = state->distcode;
|
Chris@4
|
122 ar.lmask = (1U << state->lenbits) - 1;
|
Chris@4
|
123 ar.dmask = (1U << state->distbits) - 1;
|
Chris@4
|
124
|
Chris@4
|
125 /* decode literals and length/distances until end-of-block or not enough
|
Chris@4
|
126 input data or output space */
|
Chris@4
|
127
|
Chris@4
|
128 /* align in on 1/2 hold size boundary */
|
Chris@4
|
129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
|
Chris@4
|
130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
|
Chris@4
|
131 ar.bits += 8;
|
Chris@4
|
132 }
|
Chris@4
|
133
|
Chris@4
|
134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
|
Chris@4
|
135 __asm__ __volatile__ (
|
Chris@4
|
136 " leaq %0, %%rax\n"
|
Chris@4
|
137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
|
Chris@4
|
138 " movq %%rsp, (%%rax)\n"
|
Chris@4
|
139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
|
Chris@4
|
140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
|
Chris@4
|
141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
|
Chris@4
|
142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
|
Chris@4
|
143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
|
Chris@4
|
144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
|
Chris@4
|
145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
|
Chris@4
|
146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
|
Chris@4
|
147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
|
Chris@4
|
148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
|
Chris@4
|
149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
|
Chris@4
|
150 /* r14d = len */
|
Chris@4
|
151 /* r15d = dist */
|
Chris@4
|
152 " cld\n"
|
Chris@4
|
153 " cmpq %%rdi, %%r10\n"
|
Chris@4
|
154 " je .L_one_time\n" /* if only one decode left */
|
Chris@4
|
155 " cmpq %%rsi, %%r9\n"
|
Chris@4
|
156 " je .L_one_time\n"
|
Chris@4
|
157 " jmp .L_do_loop\n"
|
Chris@4
|
158
|
Chris@4
|
159 ".L_one_time:\n"
|
Chris@4
|
160 " movq %%r12, %%r8\n" /* r8 = lmask */
|
Chris@4
|
161 " cmpb $32, %%bl\n"
|
Chris@4
|
162 " ja .L_get_length_code_one_time\n"
|
Chris@4
|
163
|
Chris@4
|
164 " lodsl\n" /* eax = *(uint *)in++ */
|
Chris@4
|
165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
166 " addb $32, %%bl\n" /* bits += 32 */
|
Chris@4
|
167 " shlq %%cl, %%rax\n"
|
Chris@4
|
168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
|
Chris@4
|
169 " jmp .L_get_length_code_one_time\n"
|
Chris@4
|
170
|
Chris@4
|
171 ".align 32,0x90\n"
|
Chris@4
|
172 ".L_while_test:\n"
|
Chris@4
|
173 " cmpq %%rdi, %%r10\n"
|
Chris@4
|
174 " jbe .L_break_loop\n"
|
Chris@4
|
175 " cmpq %%rsi, %%r9\n"
|
Chris@4
|
176 " jbe .L_break_loop\n"
|
Chris@4
|
177
|
Chris@4
|
178 ".L_do_loop:\n"
|
Chris@4
|
179 " movq %%r12, %%r8\n" /* r8 = lmask */
|
Chris@4
|
180 " cmpb $32, %%bl\n"
|
Chris@4
|
181 " ja .L_get_length_code\n" /* if (32 < bits) */
|
Chris@4
|
182
|
Chris@4
|
183 " lodsl\n" /* eax = *(uint *)in++ */
|
Chris@4
|
184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
185 " addb $32, %%bl\n" /* bits += 32 */
|
Chris@4
|
186 " shlq %%cl, %%rax\n"
|
Chris@4
|
187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
|
Chris@4
|
188
|
Chris@4
|
189 ".L_get_length_code:\n"
|
Chris@4
|
190 " andq %%rdx, %%r8\n" /* r8 &= hold */
|
Chris@4
|
191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
|
Chris@4
|
192
|
Chris@4
|
193 " movb %%ah, %%cl\n" /* cl = this.bits */
|
Chris@4
|
194 " subb %%ah, %%bl\n" /* bits -= this.bits */
|
Chris@4
|
195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
|
Chris@4
|
196
|
Chris@4
|
197 " testb %%al, %%al\n"
|
Chris@4
|
198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
|
Chris@4
|
199
|
Chris@4
|
200 " movq %%r12, %%r8\n" /* r8 = lmask */
|
Chris@4
|
201 " shrl $16, %%eax\n" /* output this.val char */
|
Chris@4
|
202 " stosb\n"
|
Chris@4
|
203
|
Chris@4
|
204 ".L_get_length_code_one_time:\n"
|
Chris@4
|
205 " andq %%rdx, %%r8\n" /* r8 &= hold */
|
Chris@4
|
206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
|
Chris@4
|
207
|
Chris@4
|
208 ".L_dolen:\n"
|
Chris@4
|
209 " movb %%ah, %%cl\n" /* cl = this.bits */
|
Chris@4
|
210 " subb %%ah, %%bl\n" /* bits -= this.bits */
|
Chris@4
|
211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
|
Chris@4
|
212
|
Chris@4
|
213 " testb %%al, %%al\n"
|
Chris@4
|
214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
|
Chris@4
|
215
|
Chris@4
|
216 " shrl $16, %%eax\n" /* output this.val char */
|
Chris@4
|
217 " stosb\n"
|
Chris@4
|
218 " jmp .L_while_test\n"
|
Chris@4
|
219
|
Chris@4
|
220 ".align 32,0x90\n"
|
Chris@4
|
221 ".L_test_for_length_base:\n"
|
Chris@4
|
222 " movl %%eax, %%r14d\n" /* len = this */
|
Chris@4
|
223 " shrl $16, %%r14d\n" /* len = this.val */
|
Chris@4
|
224 " movb %%al, %%cl\n"
|
Chris@4
|
225
|
Chris@4
|
226 " testb $16, %%al\n"
|
Chris@4
|
227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
|
Chris@4
|
228 " andb $15, %%cl\n" /* op &= 15 */
|
Chris@4
|
229 " jz .L_decode_distance\n" /* if (!op) */
|
Chris@4
|
230
|
Chris@4
|
231 ".L_add_bits_to_len:\n"
|
Chris@4
|
232 " subb %%cl, %%bl\n"
|
Chris@4
|
233 " xorl %%eax, %%eax\n"
|
Chris@4
|
234 " incl %%eax\n"
|
Chris@4
|
235 " shll %%cl, %%eax\n"
|
Chris@4
|
236 " decl %%eax\n"
|
Chris@4
|
237 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
238 " shrq %%cl, %%rdx\n"
|
Chris@4
|
239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
|
Chris@4
|
240
|
Chris@4
|
241 ".L_decode_distance:\n"
|
Chris@4
|
242 " movq %%r13, %%r8\n" /* r8 = dmask */
|
Chris@4
|
243 " cmpb $32, %%bl\n"
|
Chris@4
|
244 " ja .L_get_distance_code\n" /* if (32 < bits) */
|
Chris@4
|
245
|
Chris@4
|
246 " lodsl\n" /* eax = *(uint *)in++ */
|
Chris@4
|
247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
248 " addb $32, %%bl\n" /* bits += 32 */
|
Chris@4
|
249 " shlq %%cl, %%rax\n"
|
Chris@4
|
250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
|
Chris@4
|
251
|
Chris@4
|
252 ".L_get_distance_code:\n"
|
Chris@4
|
253 " andq %%rdx, %%r8\n" /* r8 &= hold */
|
Chris@4
|
254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
|
Chris@4
|
255
|
Chris@4
|
256 ".L_dodist:\n"
|
Chris@4
|
257 " movl %%eax, %%r15d\n" /* dist = this */
|
Chris@4
|
258 " shrl $16, %%r15d\n" /* dist = this.val */
|
Chris@4
|
259 " movb %%ah, %%cl\n"
|
Chris@4
|
260 " subb %%ah, %%bl\n" /* bits -= this.bits */
|
Chris@4
|
261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
|
Chris@4
|
262 " movb %%al, %%cl\n" /* cl = this.op */
|
Chris@4
|
263
|
Chris@4
|
264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
|
Chris@4
|
265 " jz .L_test_for_second_level_dist\n"
|
Chris@4
|
266 " andb $15, %%cl\n" /* op &= 15 */
|
Chris@4
|
267 " jz .L_check_dist_one\n"
|
Chris@4
|
268
|
Chris@4
|
269 ".L_add_bits_to_dist:\n"
|
Chris@4
|
270 " subb %%cl, %%bl\n"
|
Chris@4
|
271 " xorl %%eax, %%eax\n"
|
Chris@4
|
272 " incl %%eax\n"
|
Chris@4
|
273 " shll %%cl, %%eax\n"
|
Chris@4
|
274 " decl %%eax\n" /* (1 << op) - 1 */
|
Chris@4
|
275 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
276 " shrq %%cl, %%rdx\n"
|
Chris@4
|
277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
|
Chris@4
|
278
|
Chris@4
|
279 ".L_check_window:\n"
|
Chris@4
|
280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
|
Chris@4
|
281 " movq %%rdi, %%rax\n"
|
Chris@4
|
282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
|
Chris@4
|
283
|
Chris@4
|
284 " cmpl %%r15d, %%eax\n"
|
Chris@4
|
285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
|
Chris@4
|
286
|
Chris@4
|
287 " movl %%r14d, %%ecx\n" /* ecx = len */
|
Chris@4
|
288 " movq %%rdi, %%rsi\n"
|
Chris@4
|
289 " subq %%r15, %%rsi\n" /* from = out - dist */
|
Chris@4
|
290
|
Chris@4
|
291 " sarl %%ecx\n"
|
Chris@4
|
292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
|
Chris@4
|
293
|
Chris@4
|
294 " rep movsw\n"
|
Chris@4
|
295 " movb (%%rsi), %%al\n"
|
Chris@4
|
296 " movb %%al, (%%rdi)\n"
|
Chris@4
|
297 " incq %%rdi\n"
|
Chris@4
|
298
|
Chris@4
|
299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
|
Chris@4
|
300 " jmp .L_while_test\n"
|
Chris@4
|
301
|
Chris@4
|
302 ".L_copy_two:\n"
|
Chris@4
|
303 " rep movsw\n"
|
Chris@4
|
304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
|
Chris@4
|
305 " jmp .L_while_test\n"
|
Chris@4
|
306
|
Chris@4
|
307 ".align 32,0x90\n"
|
Chris@4
|
308 ".L_check_dist_one:\n"
|
Chris@4
|
309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
|
Chris@4
|
310 " jne .L_check_window\n"
|
Chris@4
|
311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
|
Chris@4
|
312 " je .L_check_window\n"
|
Chris@4
|
313
|
Chris@4
|
314 " movl %%r14d, %%ecx\n" /* ecx = len */
|
Chris@4
|
315 " movb -1(%%rdi), %%al\n"
|
Chris@4
|
316 " movb %%al, %%ah\n"
|
Chris@4
|
317
|
Chris@4
|
318 " sarl %%ecx\n"
|
Chris@4
|
319 " jnc .L_set_two\n"
|
Chris@4
|
320 " movb %%al, (%%rdi)\n"
|
Chris@4
|
321 " incq %%rdi\n"
|
Chris@4
|
322
|
Chris@4
|
323 ".L_set_two:\n"
|
Chris@4
|
324 " rep stosw\n"
|
Chris@4
|
325 " jmp .L_while_test\n"
|
Chris@4
|
326
|
Chris@4
|
327 ".align 32,0x90\n"
|
Chris@4
|
328 ".L_test_for_second_level_length:\n"
|
Chris@4
|
329 " testb $64, %%al\n"
|
Chris@4
|
330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
|
Chris@4
|
331
|
Chris@4
|
332 " xorl %%eax, %%eax\n"
|
Chris@4
|
333 " incl %%eax\n"
|
Chris@4
|
334 " shll %%cl, %%eax\n"
|
Chris@4
|
335 " decl %%eax\n"
|
Chris@4
|
336 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
337 " addl %%r14d, %%eax\n" /* eax += len */
|
Chris@4
|
338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
|
Chris@4
|
339 " jmp .L_dolen\n"
|
Chris@4
|
340
|
Chris@4
|
341 ".align 32,0x90\n"
|
Chris@4
|
342 ".L_test_for_second_level_dist:\n"
|
Chris@4
|
343 " testb $64, %%al\n"
|
Chris@4
|
344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
|
Chris@4
|
345
|
Chris@4
|
346 " xorl %%eax, %%eax\n"
|
Chris@4
|
347 " incl %%eax\n"
|
Chris@4
|
348 " shll %%cl, %%eax\n"
|
Chris@4
|
349 " decl %%eax\n"
|
Chris@4
|
350 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
351 " addl %%r15d, %%eax\n" /* eax += dist */
|
Chris@4
|
352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
|
Chris@4
|
353 " jmp .L_dodist\n"
|
Chris@4
|
354
|
Chris@4
|
355 ".align 32,0x90\n"
|
Chris@4
|
356 ".L_clip_window:\n"
|
Chris@4
|
357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
|
Chris@4
|
358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
|
Chris@4
|
359 " negl %%ecx\n" /* nbytes = -nbytes */
|
Chris@4
|
360
|
Chris@4
|
361 " cmpl %%r15d, %%eax\n"
|
Chris@4
|
362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
|
Chris@4
|
363
|
Chris@4
|
364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
|
Chris@4
|
365 " cmpl $0, 96(%%rsp)\n"
|
Chris@4
|
366 " jne .L_wrap_around_window\n" /* if (write != 0) */
|
Chris@4
|
367
|
Chris@4
|
368 " movq 56(%%rsp), %%rsi\n" /* from = window */
|
Chris@4
|
369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
|
Chris@4
|
370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
|
Chris@4
|
371
|
Chris@4
|
372 " movl %%r14d, %%eax\n" /* eax = len */
|
Chris@4
|
373 " cmpl %%ecx, %%r14d\n"
|
Chris@4
|
374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
375
|
Chris@4
|
376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
|
Chris@4
|
377 " rep movsb\n"
|
Chris@4
|
378 " movq %%rdi, %%rsi\n"
|
Chris@4
|
379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
|
Chris@4
|
380 " jmp .L_do_copy\n"
|
Chris@4
|
381
|
Chris@4
|
382 ".align 32,0x90\n"
|
Chris@4
|
383 ".L_wrap_around_window:\n"
|
Chris@4
|
384 " movl 96(%%rsp), %%eax\n" /* eax = write */
|
Chris@4
|
385 " cmpl %%eax, %%ecx\n"
|
Chris@4
|
386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
|
Chris@4
|
387
|
Chris@4
|
388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
|
Chris@4
|
389 " addq 56(%%rsp), %%rsi\n" /* from += window */
|
Chris@4
|
390 " addq %%rax, %%rsi\n" /* from += write */
|
Chris@4
|
391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
|
Chris@4
|
392 " subl %%eax, %%ecx\n" /* nbytes -= write */
|
Chris@4
|
393
|
Chris@4
|
394 " movl %%r14d, %%eax\n" /* eax = len */
|
Chris@4
|
395 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
397
|
Chris@4
|
398 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
399 " rep movsb\n"
|
Chris@4
|
400 " movq 56(%%rsp), %%rsi\n" /* from = window */
|
Chris@4
|
401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
|
Chris@4
|
402 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
404
|
Chris@4
|
405 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
406 " rep movsb\n"
|
Chris@4
|
407 " movq %%rdi, %%rsi\n"
|
Chris@4
|
408 " subq %%r15, %%rsi\n" /* from = out - dist */
|
Chris@4
|
409 " jmp .L_do_copy\n"
|
Chris@4
|
410
|
Chris@4
|
411 ".align 32,0x90\n"
|
Chris@4
|
412 ".L_contiguous_in_window:\n"
|
Chris@4
|
413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
|
Chris@4
|
414 " addq %%rax, %%rsi\n"
|
Chris@4
|
415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
|
Chris@4
|
416
|
Chris@4
|
417 " movl %%r14d, %%eax\n" /* eax = len */
|
Chris@4
|
418 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
420
|
Chris@4
|
421 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
422 " rep movsb\n"
|
Chris@4
|
423 " movq %%rdi, %%rsi\n"
|
Chris@4
|
424 " subq %%r15, %%rsi\n" /* from = out - dist */
|
Chris@4
|
425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
426
|
Chris@4
|
427 ".align 32,0x90\n"
|
Chris@4
|
428 ".L_do_copy:\n"
|
Chris@4
|
429 " movl %%eax, %%ecx\n" /* ecx = len */
|
Chris@4
|
430 " rep movsb\n"
|
Chris@4
|
431
|
Chris@4
|
432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
|
Chris@4
|
433 " jmp .L_while_test\n"
|
Chris@4
|
434
|
Chris@4
|
435 ".L_test_for_end_of_block:\n"
|
Chris@4
|
436 " testb $32, %%al\n"
|
Chris@4
|
437 " jz .L_invalid_literal_length_code\n"
|
Chris@4
|
438 " movl $1, 116(%%rsp)\n"
|
Chris@4
|
439 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
440
|
Chris@4
|
441 ".L_invalid_literal_length_code:\n"
|
Chris@4
|
442 " movl $2, 116(%%rsp)\n"
|
Chris@4
|
443 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
444
|
Chris@4
|
445 ".L_invalid_distance_code:\n"
|
Chris@4
|
446 " movl $3, 116(%%rsp)\n"
|
Chris@4
|
447 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
448
|
Chris@4
|
449 ".L_invalid_distance_too_far:\n"
|
Chris@4
|
450 " movl $4, 116(%%rsp)\n"
|
Chris@4
|
451 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
452
|
Chris@4
|
453 ".L_break_loop:\n"
|
Chris@4
|
454 " movl $0, 116(%%rsp)\n"
|
Chris@4
|
455
|
Chris@4
|
456 ".L_break_loop_with_status:\n"
|
Chris@4
|
457 /* put in, out, bits, and hold back into ar and pop esp */
|
Chris@4
|
458 " movq %%rsi, 16(%%rsp)\n" /* in */
|
Chris@4
|
459 " movq %%rdi, 32(%%rsp)\n" /* out */
|
Chris@4
|
460 " movl %%ebx, 88(%%rsp)\n" /* bits */
|
Chris@4
|
461 " movq %%rdx, 80(%%rsp)\n" /* hold */
|
Chris@4
|
462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
|
Chris@4
|
463 " movq 8(%%rsp), %%rbp\n"
|
Chris@4
|
464 " movq %%rax, %%rsp\n"
|
Chris@4
|
465 :
|
Chris@4
|
466 : "m" (ar)
|
Chris@4
|
467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
|
Chris@4
|
468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
|
Chris@4
|
469 );
|
Chris@4
|
470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
|
Chris@4
|
471 __asm__ __volatile__ (
|
Chris@4
|
472 " leal %0, %%eax\n"
|
Chris@4
|
473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
|
Chris@4
|
474 " movl %%ebp, 4(%%eax)\n"
|
Chris@4
|
475 " movl %%eax, %%esp\n"
|
Chris@4
|
476 " movl 8(%%esp), %%esi\n" /* esi = in */
|
Chris@4
|
477 " movl 16(%%esp), %%edi\n" /* edi = out */
|
Chris@4
|
478 " movl 40(%%esp), %%edx\n" /* edx = hold */
|
Chris@4
|
479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
|
Chris@4
|
480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
|
Chris@4
|
481
|
Chris@4
|
482 " cld\n"
|
Chris@4
|
483 " jmp .L_do_loop\n"
|
Chris@4
|
484
|
Chris@4
|
485 ".align 32,0x90\n"
|
Chris@4
|
486 ".L_while_test:\n"
|
Chris@4
|
487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
|
Chris@4
|
488 " jbe .L_break_loop\n"
|
Chris@4
|
489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
|
Chris@4
|
490 " jbe .L_break_loop\n"
|
Chris@4
|
491
|
Chris@4
|
492 ".L_do_loop:\n"
|
Chris@4
|
493 " cmpb $15, %%bl\n"
|
Chris@4
|
494 " ja .L_get_length_code\n" /* if (15 < bits) */
|
Chris@4
|
495
|
Chris@4
|
496 " xorl %%eax, %%eax\n"
|
Chris@4
|
497 " lodsw\n" /* al = *(ushort *)in++ */
|
Chris@4
|
498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
499 " addb $16, %%bl\n" /* bits += 16 */
|
Chris@4
|
500 " shll %%cl, %%eax\n"
|
Chris@4
|
501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
502
|
Chris@4
|
503 ".L_get_length_code:\n"
|
Chris@4
|
504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
|
Chris@4
|
505 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
|
Chris@4
|
507
|
Chris@4
|
508 ".L_dolen:\n"
|
Chris@4
|
509 " movb %%ah, %%cl\n" /* cl = this.bits */
|
Chris@4
|
510 " subb %%ah, %%bl\n" /* bits -= this.bits */
|
Chris@4
|
511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
|
Chris@4
|
512
|
Chris@4
|
513 " testb %%al, %%al\n"
|
Chris@4
|
514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
|
Chris@4
|
515
|
Chris@4
|
516 " shrl $16, %%eax\n" /* output this.val char */
|
Chris@4
|
517 " stosb\n"
|
Chris@4
|
518 " jmp .L_while_test\n"
|
Chris@4
|
519
|
Chris@4
|
520 ".align 32,0x90\n"
|
Chris@4
|
521 ".L_test_for_length_base:\n"
|
Chris@4
|
522 " movl %%eax, %%ecx\n" /* len = this */
|
Chris@4
|
523 " shrl $16, %%ecx\n" /* len = this.val */
|
Chris@4
|
524 " movl %%ecx, 64(%%esp)\n" /* save len */
|
Chris@4
|
525 " movb %%al, %%cl\n"
|
Chris@4
|
526
|
Chris@4
|
527 " testb $16, %%al\n"
|
Chris@4
|
528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
|
Chris@4
|
529 " andb $15, %%cl\n" /* op &= 15 */
|
Chris@4
|
530 " jz .L_decode_distance\n" /* if (!op) */
|
Chris@4
|
531 " cmpb %%cl, %%bl\n"
|
Chris@4
|
532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
|
Chris@4
|
533
|
Chris@4
|
534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
|
Chris@4
|
535 " xorl %%eax, %%eax\n"
|
Chris@4
|
536 " lodsw\n" /* al = *(ushort *)in++ */
|
Chris@4
|
537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
538 " addb $16, %%bl\n" /* bits += 16 */
|
Chris@4
|
539 " shll %%cl, %%eax\n"
|
Chris@4
|
540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
541 " movb %%ch, %%cl\n" /* move op back to ecx */
|
Chris@4
|
542
|
Chris@4
|
543 ".L_add_bits_to_len:\n"
|
Chris@4
|
544 " subb %%cl, %%bl\n"
|
Chris@4
|
545 " xorl %%eax, %%eax\n"
|
Chris@4
|
546 " incl %%eax\n"
|
Chris@4
|
547 " shll %%cl, %%eax\n"
|
Chris@4
|
548 " decl %%eax\n"
|
Chris@4
|
549 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
550 " shrl %%cl, %%edx\n"
|
Chris@4
|
551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
|
Chris@4
|
552
|
Chris@4
|
553 ".L_decode_distance:\n"
|
Chris@4
|
554 " cmpb $15, %%bl\n"
|
Chris@4
|
555 " ja .L_get_distance_code\n" /* if (15 < bits) */
|
Chris@4
|
556
|
Chris@4
|
557 " xorl %%eax, %%eax\n"
|
Chris@4
|
558 " lodsw\n" /* al = *(ushort *)in++ */
|
Chris@4
|
559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
560 " addb $16, %%bl\n" /* bits += 16 */
|
Chris@4
|
561 " shll %%cl, %%eax\n"
|
Chris@4
|
562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
563
|
Chris@4
|
564 ".L_get_distance_code:\n"
|
Chris@4
|
565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
|
Chris@4
|
566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
|
Chris@4
|
567 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
|
Chris@4
|
569
|
Chris@4
|
570 ".L_dodist:\n"
|
Chris@4
|
571 " movl %%eax, %%ebp\n" /* dist = this */
|
Chris@4
|
572 " shrl $16, %%ebp\n" /* dist = this.val */
|
Chris@4
|
573 " movb %%ah, %%cl\n"
|
Chris@4
|
574 " subb %%ah, %%bl\n" /* bits -= this.bits */
|
Chris@4
|
575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
|
Chris@4
|
576 " movb %%al, %%cl\n" /* cl = this.op */
|
Chris@4
|
577
|
Chris@4
|
578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
|
Chris@4
|
579 " jz .L_test_for_second_level_dist\n"
|
Chris@4
|
580 " andb $15, %%cl\n" /* op &= 15 */
|
Chris@4
|
581 " jz .L_check_dist_one\n"
|
Chris@4
|
582 " cmpb %%cl, %%bl\n"
|
Chris@4
|
583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
|
Chris@4
|
584
|
Chris@4
|
585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
|
Chris@4
|
586 " xorl %%eax, %%eax\n"
|
Chris@4
|
587 " lodsw\n" /* al = *(ushort *)in++ */
|
Chris@4
|
588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
|
Chris@4
|
589 " addb $16, %%bl\n" /* bits += 16 */
|
Chris@4
|
590 " shll %%cl, %%eax\n"
|
Chris@4
|
591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
592 " movb %%ch, %%cl\n" /* move op back to ecx */
|
Chris@4
|
593
|
Chris@4
|
594 ".L_add_bits_to_dist:\n"
|
Chris@4
|
595 " subb %%cl, %%bl\n"
|
Chris@4
|
596 " xorl %%eax, %%eax\n"
|
Chris@4
|
597 " incl %%eax\n"
|
Chris@4
|
598 " shll %%cl, %%eax\n"
|
Chris@4
|
599 " decl %%eax\n" /* (1 << op) - 1 */
|
Chris@4
|
600 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
601 " shrl %%cl, %%edx\n"
|
Chris@4
|
602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
|
Chris@4
|
603
|
Chris@4
|
604 ".L_check_window:\n"
|
Chris@4
|
605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
|
Chris@4
|
606 " movl %%edi, %%eax\n"
|
Chris@4
|
607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
|
Chris@4
|
608
|
Chris@4
|
609 " cmpl %%ebp, %%eax\n"
|
Chris@4
|
610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
|
Chris@4
|
611
|
Chris@4
|
612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
|
Chris@4
|
613 " movl %%edi, %%esi\n"
|
Chris@4
|
614 " subl %%ebp, %%esi\n" /* from = out - dist */
|
Chris@4
|
615
|
Chris@4
|
616 " sarl %%ecx\n"
|
Chris@4
|
617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
|
Chris@4
|
618
|
Chris@4
|
619 " rep movsw\n"
|
Chris@4
|
620 " movb (%%esi), %%al\n"
|
Chris@4
|
621 " movb %%al, (%%edi)\n"
|
Chris@4
|
622 " incl %%edi\n"
|
Chris@4
|
623
|
Chris@4
|
624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
|
Chris@4
|
625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
|
Chris@4
|
626 " jmp .L_while_test\n"
|
Chris@4
|
627
|
Chris@4
|
628 ".L_copy_two:\n"
|
Chris@4
|
629 " rep movsw\n"
|
Chris@4
|
630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
|
Chris@4
|
631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
|
Chris@4
|
632 " jmp .L_while_test\n"
|
Chris@4
|
633
|
Chris@4
|
634 ".align 32,0x90\n"
|
Chris@4
|
635 ".L_check_dist_one:\n"
|
Chris@4
|
636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
|
Chris@4
|
637 " jne .L_check_window\n"
|
Chris@4
|
638 " cmpl %%edi, 20(%%esp)\n"
|
Chris@4
|
639 " je .L_check_window\n" /* out == beg, if outside window */
|
Chris@4
|
640
|
Chris@4
|
641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
|
Chris@4
|
642 " movb -1(%%edi), %%al\n"
|
Chris@4
|
643 " movb %%al, %%ah\n"
|
Chris@4
|
644
|
Chris@4
|
645 " sarl %%ecx\n"
|
Chris@4
|
646 " jnc .L_set_two\n"
|
Chris@4
|
647 " movb %%al, (%%edi)\n"
|
Chris@4
|
648 " incl %%edi\n"
|
Chris@4
|
649
|
Chris@4
|
650 ".L_set_two:\n"
|
Chris@4
|
651 " rep stosw\n"
|
Chris@4
|
652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
|
Chris@4
|
653 " jmp .L_while_test\n"
|
Chris@4
|
654
|
Chris@4
|
655 ".align 32,0x90\n"
|
Chris@4
|
656 ".L_test_for_second_level_length:\n"
|
Chris@4
|
657 " testb $64, %%al\n"
|
Chris@4
|
658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
|
Chris@4
|
659
|
Chris@4
|
660 " xorl %%eax, %%eax\n"
|
Chris@4
|
661 " incl %%eax\n"
|
Chris@4
|
662 " shll %%cl, %%eax\n"
|
Chris@4
|
663 " decl %%eax\n"
|
Chris@4
|
664 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
665 " addl 64(%%esp), %%eax\n" /* eax += len */
|
Chris@4
|
666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
|
Chris@4
|
667 " jmp .L_dolen\n"
|
Chris@4
|
668
|
Chris@4
|
669 ".align 32,0x90\n"
|
Chris@4
|
670 ".L_test_for_second_level_dist:\n"
|
Chris@4
|
671 " testb $64, %%al\n"
|
Chris@4
|
672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
|
Chris@4
|
673
|
Chris@4
|
674 " xorl %%eax, %%eax\n"
|
Chris@4
|
675 " incl %%eax\n"
|
Chris@4
|
676 " shll %%cl, %%eax\n"
|
Chris@4
|
677 " decl %%eax\n"
|
Chris@4
|
678 " andl %%edx, %%eax\n" /* eax &= hold */
|
Chris@4
|
679 " addl %%ebp, %%eax\n" /* eax += dist */
|
Chris@4
|
680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
|
Chris@4
|
681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
|
Chris@4
|
682 " jmp .L_dodist\n"
|
Chris@4
|
683
|
Chris@4
|
684 ".align 32,0x90\n"
|
Chris@4
|
685 ".L_clip_window:\n"
|
Chris@4
|
686 " movl %%eax, %%ecx\n"
|
Chris@4
|
687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
|
Chris@4
|
688 " negl %%ecx\n" /* nbytes = -nbytes */
|
Chris@4
|
689 " movl 28(%%esp), %%esi\n" /* from = window */
|
Chris@4
|
690
|
Chris@4
|
691 " cmpl %%ebp, %%eax\n"
|
Chris@4
|
692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
|
Chris@4
|
693
|
Chris@4
|
694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
|
Chris@4
|
695 " cmpl $0, 52(%%esp)\n"
|
Chris@4
|
696 " jne .L_wrap_around_window\n" /* if (write != 0) */
|
Chris@4
|
697
|
Chris@4
|
698 " subl %%ecx, %%eax\n"
|
Chris@4
|
699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
|
Chris@4
|
700
|
Chris@4
|
701 " movl 64(%%esp), %%eax\n" /* eax = len */
|
Chris@4
|
702 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
704
|
Chris@4
|
705 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
706 " rep movsb\n"
|
Chris@4
|
707 " movl %%edi, %%esi\n"
|
Chris@4
|
708 " subl %%ebp, %%esi\n" /* from = out - dist */
|
Chris@4
|
709 " jmp .L_do_copy\n"
|
Chris@4
|
710
|
Chris@4
|
711 ".align 32,0x90\n"
|
Chris@4
|
712 ".L_wrap_around_window:\n"
|
Chris@4
|
713 " movl 52(%%esp), %%eax\n" /* eax = write */
|
Chris@4
|
714 " cmpl %%eax, %%ecx\n"
|
Chris@4
|
715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
|
Chris@4
|
716
|
Chris@4
|
717 " addl 48(%%esp), %%esi\n" /* from += wsize */
|
Chris@4
|
718 " addl %%eax, %%esi\n" /* from += write */
|
Chris@4
|
719 " subl %%ecx, %%esi\n" /* from -= nbytes */
|
Chris@4
|
720 " subl %%eax, %%ecx\n" /* nbytes -= write */
|
Chris@4
|
721
|
Chris@4
|
722 " movl 64(%%esp), %%eax\n" /* eax = len */
|
Chris@4
|
723 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
725
|
Chris@4
|
726 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
727 " rep movsb\n"
|
Chris@4
|
728 " movl 28(%%esp), %%esi\n" /* from = window */
|
Chris@4
|
729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
|
Chris@4
|
730 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
732
|
Chris@4
|
733 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
734 " rep movsb\n"
|
Chris@4
|
735 " movl %%edi, %%esi\n"
|
Chris@4
|
736 " subl %%ebp, %%esi\n" /* from = out - dist */
|
Chris@4
|
737 " jmp .L_do_copy\n"
|
Chris@4
|
738
|
Chris@4
|
739 ".align 32,0x90\n"
|
Chris@4
|
740 ".L_contiguous_in_window:\n"
|
Chris@4
|
741 " addl %%eax, %%esi\n"
|
Chris@4
|
742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
|
Chris@4
|
743
|
Chris@4
|
744 " movl 64(%%esp), %%eax\n" /* eax = len */
|
Chris@4
|
745 " cmpl %%ecx, %%eax\n"
|
Chris@4
|
746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
747
|
Chris@4
|
748 " subl %%ecx, %%eax\n" /* len -= nbytes */
|
Chris@4
|
749 " rep movsb\n"
|
Chris@4
|
750 " movl %%edi, %%esi\n"
|
Chris@4
|
751 " subl %%ebp, %%esi\n" /* from = out - dist */
|
Chris@4
|
752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
|
Chris@4
|
753
|
Chris@4
|
754 ".align 32,0x90\n"
|
Chris@4
|
755 ".L_do_copy:\n"
|
Chris@4
|
756 " movl %%eax, %%ecx\n"
|
Chris@4
|
757 " rep movsb\n"
|
Chris@4
|
758
|
Chris@4
|
759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
|
Chris@4
|
760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
|
Chris@4
|
761 " jmp .L_while_test\n"
|
Chris@4
|
762
|
Chris@4
|
763 ".L_test_for_end_of_block:\n"
|
Chris@4
|
764 " testb $32, %%al\n"
|
Chris@4
|
765 " jz .L_invalid_literal_length_code\n"
|
Chris@4
|
766 " movl $1, 72(%%esp)\n"
|
Chris@4
|
767 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
768
|
Chris@4
|
769 ".L_invalid_literal_length_code:\n"
|
Chris@4
|
770 " movl $2, 72(%%esp)\n"
|
Chris@4
|
771 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
772
|
Chris@4
|
773 ".L_invalid_distance_code:\n"
|
Chris@4
|
774 " movl $3, 72(%%esp)\n"
|
Chris@4
|
775 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
776
|
Chris@4
|
777 ".L_invalid_distance_too_far:\n"
|
Chris@4
|
778 " movl 8(%%esp), %%esi\n"
|
Chris@4
|
779 " movl $4, 72(%%esp)\n"
|
Chris@4
|
780 " jmp .L_break_loop_with_status\n"
|
Chris@4
|
781
|
Chris@4
|
782 ".L_break_loop:\n"
|
Chris@4
|
783 " movl $0, 72(%%esp)\n"
|
Chris@4
|
784
|
Chris@4
|
785 ".L_break_loop_with_status:\n"
|
Chris@4
|
786 /* put in, out, bits, and hold back into ar and pop esp */
|
Chris@4
|
787 " movl %%esi, 8(%%esp)\n" /* save in */
|
Chris@4
|
788 " movl %%edi, 16(%%esp)\n" /* save out */
|
Chris@4
|
789 " movl %%ebx, 44(%%esp)\n" /* save bits */
|
Chris@4
|
790 " movl %%edx, 40(%%esp)\n" /* save hold */
|
Chris@4
|
791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
|
Chris@4
|
792 " movl (%%esp), %%esp\n"
|
Chris@4
|
793 :
|
Chris@4
|
794 : "m" (ar)
|
Chris@4
|
795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
|
Chris@4
|
796 );
|
Chris@4
|
797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
|
Chris@4
|
798 __asm {
|
Chris@4
|
799 lea eax, ar
|
Chris@4
|
800 mov [eax], esp /* save esp, ebp */
|
Chris@4
|
801 mov [eax+4], ebp
|
Chris@4
|
802 mov esp, eax
|
Chris@4
|
803 mov esi, [esp+8] /* esi = in */
|
Chris@4
|
804 mov edi, [esp+16] /* edi = out */
|
Chris@4
|
805 mov edx, [esp+40] /* edx = hold */
|
Chris@4
|
806 mov ebx, [esp+44] /* ebx = bits */
|
Chris@4
|
807 mov ebp, [esp+32] /* ebp = lcode */
|
Chris@4
|
808
|
Chris@4
|
809 cld
|
Chris@4
|
810 jmp L_do_loop
|
Chris@4
|
811
|
Chris@4
|
812 ALIGN 4
|
Chris@4
|
813 L_while_test:
|
Chris@4
|
814 cmp [esp+24], edi
|
Chris@4
|
815 jbe L_break_loop
|
Chris@4
|
816 cmp [esp+12], esi
|
Chris@4
|
817 jbe L_break_loop
|
Chris@4
|
818
|
Chris@4
|
819 L_do_loop:
|
Chris@4
|
820 cmp bl, 15
|
Chris@4
|
821 ja L_get_length_code /* if (15 < bits) */
|
Chris@4
|
822
|
Chris@4
|
823 xor eax, eax
|
Chris@4
|
824 lodsw /* al = *(ushort *)in++ */
|
Chris@4
|
825 mov cl, bl /* cl = bits, needs it for shifting */
|
Chris@4
|
826 add bl, 16 /* bits += 16 */
|
Chris@4
|
827 shl eax, cl
|
Chris@4
|
828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
829
|
Chris@4
|
830 L_get_length_code:
|
Chris@4
|
831 mov eax, [esp+56] /* eax = lmask */
|
Chris@4
|
832 and eax, edx /* eax &= hold */
|
Chris@4
|
833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
|
Chris@4
|
834
|
Chris@4
|
835 L_dolen:
|
Chris@4
|
836 mov cl, ah /* cl = this.bits */
|
Chris@4
|
837 sub bl, ah /* bits -= this.bits */
|
Chris@4
|
838 shr edx, cl /* hold >>= this.bits */
|
Chris@4
|
839
|
Chris@4
|
840 test al, al
|
Chris@4
|
841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
|
Chris@4
|
842
|
Chris@4
|
843 shr eax, 16 /* output this.val char */
|
Chris@4
|
844 stosb
|
Chris@4
|
845 jmp L_while_test
|
Chris@4
|
846
|
Chris@4
|
847 ALIGN 4
|
Chris@4
|
848 L_test_for_length_base:
|
Chris@4
|
849 mov ecx, eax /* len = this */
|
Chris@4
|
850 shr ecx, 16 /* len = this.val */
|
Chris@4
|
851 mov [esp+64], ecx /* save len */
|
Chris@4
|
852 mov cl, al
|
Chris@4
|
853
|
Chris@4
|
854 test al, 16
|
Chris@4
|
855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
|
Chris@4
|
856 and cl, 15 /* op &= 15 */
|
Chris@4
|
857 jz L_decode_distance /* if (!op) */
|
Chris@4
|
858 cmp bl, cl
|
Chris@4
|
859 jae L_add_bits_to_len /* if (op <= bits) */
|
Chris@4
|
860
|
Chris@4
|
861 mov ch, cl /* stash op in ch, freeing cl */
|
Chris@4
|
862 xor eax, eax
|
Chris@4
|
863 lodsw /* al = *(ushort *)in++ */
|
Chris@4
|
864 mov cl, bl /* cl = bits, needs it for shifting */
|
Chris@4
|
865 add bl, 16 /* bits += 16 */
|
Chris@4
|
866 shl eax, cl
|
Chris@4
|
867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
868 mov cl, ch /* move op back to ecx */
|
Chris@4
|
869
|
Chris@4
|
870 L_add_bits_to_len:
|
Chris@4
|
871 sub bl, cl
|
Chris@4
|
872 xor eax, eax
|
Chris@4
|
873 inc eax
|
Chris@4
|
874 shl eax, cl
|
Chris@4
|
875 dec eax
|
Chris@4
|
876 and eax, edx /* eax &= hold */
|
Chris@4
|
877 shr edx, cl
|
Chris@4
|
878 add [esp+64], eax /* len += hold & mask[op] */
|
Chris@4
|
879
|
Chris@4
|
880 L_decode_distance:
|
Chris@4
|
881 cmp bl, 15
|
Chris@4
|
882 ja L_get_distance_code /* if (15 < bits) */
|
Chris@4
|
883
|
Chris@4
|
884 xor eax, eax
|
Chris@4
|
885 lodsw /* al = *(ushort *)in++ */
|
Chris@4
|
886 mov cl, bl /* cl = bits, needs it for shifting */
|
Chris@4
|
887 add bl, 16 /* bits += 16 */
|
Chris@4
|
888 shl eax, cl
|
Chris@4
|
889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
890
|
Chris@4
|
891 L_get_distance_code:
|
Chris@4
|
892 mov eax, [esp+60] /* eax = dmask */
|
Chris@4
|
893 mov ecx, [esp+36] /* ecx = dcode */
|
Chris@4
|
894 and eax, edx /* eax &= hold */
|
Chris@4
|
895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
|
Chris@4
|
896
|
Chris@4
|
897 L_dodist:
|
Chris@4
|
898 mov ebp, eax /* dist = this */
|
Chris@4
|
899 shr ebp, 16 /* dist = this.val */
|
Chris@4
|
900 mov cl, ah
|
Chris@4
|
901 sub bl, ah /* bits -= this.bits */
|
Chris@4
|
902 shr edx, cl /* hold >>= this.bits */
|
Chris@4
|
903 mov cl, al /* cl = this.op */
|
Chris@4
|
904
|
Chris@4
|
905 test al, 16 /* if ((op & 16) == 0) */
|
Chris@4
|
906 jz L_test_for_second_level_dist
|
Chris@4
|
907 and cl, 15 /* op &= 15 */
|
Chris@4
|
908 jz L_check_dist_one
|
Chris@4
|
909 cmp bl, cl
|
Chris@4
|
910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
|
Chris@4
|
911
|
Chris@4
|
912 mov ch, cl /* stash op in ch, freeing cl */
|
Chris@4
|
913 xor eax, eax
|
Chris@4
|
914 lodsw /* al = *(ushort *)in++ */
|
Chris@4
|
915 mov cl, bl /* cl = bits, needs it for shifting */
|
Chris@4
|
916 add bl, 16 /* bits += 16 */
|
Chris@4
|
917 shl eax, cl
|
Chris@4
|
918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
|
Chris@4
|
919 mov cl, ch /* move op back to ecx */
|
Chris@4
|
920
|
Chris@4
|
921 L_add_bits_to_dist:
|
Chris@4
|
922 sub bl, cl
|
Chris@4
|
923 xor eax, eax
|
Chris@4
|
924 inc eax
|
Chris@4
|
925 shl eax, cl
|
Chris@4
|
926 dec eax /* (1 << op) - 1 */
|
Chris@4
|
927 and eax, edx /* eax &= hold */
|
Chris@4
|
928 shr edx, cl
|
Chris@4
|
929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
|
Chris@4
|
930
|
Chris@4
|
931 L_check_window:
|
Chris@4
|
932 mov [esp+8], esi /* save in so from can use it's reg */
|
Chris@4
|
933 mov eax, edi
|
Chris@4
|
934 sub eax, [esp+20] /* nbytes = out - beg */
|
Chris@4
|
935
|
Chris@4
|
936 cmp eax, ebp
|
Chris@4
|
937 jb L_clip_window /* if (dist > nbytes) 4.2% */
|
Chris@4
|
938
|
Chris@4
|
939 mov ecx, [esp+64] /* ecx = len */
|
Chris@4
|
940 mov esi, edi
|
Chris@4
|
941 sub esi, ebp /* from = out - dist */
|
Chris@4
|
942
|
Chris@4
|
943 sar ecx, 1
|
Chris@4
|
944 jnc L_copy_two
|
Chris@4
|
945
|
Chris@4
|
946 rep movsw
|
Chris@4
|
947 mov al, [esi]
|
Chris@4
|
948 mov [edi], al
|
Chris@4
|
949 inc edi
|
Chris@4
|
950
|
Chris@4
|
951 mov esi, [esp+8] /* move in back to %esi, toss from */
|
Chris@4
|
952 mov ebp, [esp+32] /* ebp = lcode */
|
Chris@4
|
953 jmp L_while_test
|
Chris@4
|
954
|
Chris@4
|
955 L_copy_two:
|
Chris@4
|
956 rep movsw
|
Chris@4
|
957 mov esi, [esp+8] /* move in back to %esi, toss from */
|
Chris@4
|
958 mov ebp, [esp+32] /* ebp = lcode */
|
Chris@4
|
959 jmp L_while_test
|
Chris@4
|
960
|
Chris@4
|
961 ALIGN 4
|
Chris@4
|
962 L_check_dist_one:
|
Chris@4
|
963 cmp ebp, 1 /* if dist 1, is a memset */
|
Chris@4
|
964 jne L_check_window
|
Chris@4
|
965 cmp [esp+20], edi
|
Chris@4
|
966 je L_check_window /* out == beg, if outside window */
|
Chris@4
|
967
|
Chris@4
|
968 mov ecx, [esp+64] /* ecx = len */
|
Chris@4
|
969 mov al, [edi-1]
|
Chris@4
|
970 mov ah, al
|
Chris@4
|
971
|
Chris@4
|
972 sar ecx, 1
|
Chris@4
|
973 jnc L_set_two
|
Chris@4
|
974 mov [edi], al /* memset out with from[-1] */
|
Chris@4
|
975 inc edi
|
Chris@4
|
976
|
Chris@4
|
977 L_set_two:
|
Chris@4
|
978 rep stosw
|
Chris@4
|
979 mov ebp, [esp+32] /* ebp = lcode */
|
Chris@4
|
980 jmp L_while_test
|
Chris@4
|
981
|
Chris@4
|
982 ALIGN 4
|
Chris@4
|
983 L_test_for_second_level_length:
|
Chris@4
|
984 test al, 64
|
Chris@4
|
985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
|
Chris@4
|
986
|
Chris@4
|
987 xor eax, eax
|
Chris@4
|
988 inc eax
|
Chris@4
|
989 shl eax, cl
|
Chris@4
|
990 dec eax
|
Chris@4
|
991 and eax, edx /* eax &= hold */
|
Chris@4
|
992 add eax, [esp+64] /* eax += len */
|
Chris@4
|
993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
|
Chris@4
|
994 jmp L_dolen
|
Chris@4
|
995
|
Chris@4
|
996 ALIGN 4
|
Chris@4
|
997 L_test_for_second_level_dist:
|
Chris@4
|
998 test al, 64
|
Chris@4
|
999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
|
Chris@4
|
1000
|
Chris@4
|
1001 xor eax, eax
|
Chris@4
|
1002 inc eax
|
Chris@4
|
1003 shl eax, cl
|
Chris@4
|
1004 dec eax
|
Chris@4
|
1005 and eax, edx /* eax &= hold */
|
Chris@4
|
1006 add eax, ebp /* eax += dist */
|
Chris@4
|
1007 mov ecx, [esp+36] /* ecx = dcode */
|
Chris@4
|
1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
|
Chris@4
|
1009 jmp L_dodist
|
Chris@4
|
1010
|
Chris@4
|
1011 ALIGN 4
|
Chris@4
|
1012 L_clip_window:
|
Chris@4
|
1013 mov ecx, eax
|
Chris@4
|
1014 mov eax, [esp+48] /* eax = wsize */
|
Chris@4
|
1015 neg ecx /* nbytes = -nbytes */
|
Chris@4
|
1016 mov esi, [esp+28] /* from = window */
|
Chris@4
|
1017
|
Chris@4
|
1018 cmp eax, ebp
|
Chris@4
|
1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
|
Chris@4
|
1020
|
Chris@4
|
1021 add ecx, ebp /* nbytes = dist - nbytes */
|
Chris@4
|
1022 cmp dword ptr [esp+52], 0
|
Chris@4
|
1023 jne L_wrap_around_window /* if (write != 0) */
|
Chris@4
|
1024
|
Chris@4
|
1025 sub eax, ecx
|
Chris@4
|
1026 add esi, eax /* from += wsize - nbytes */
|
Chris@4
|
1027
|
Chris@4
|
1028 mov eax, [esp+64] /* eax = len */
|
Chris@4
|
1029 cmp eax, ecx
|
Chris@4
|
1030 jbe L_do_copy /* if (nbytes >= len) */
|
Chris@4
|
1031
|
Chris@4
|
1032 sub eax, ecx /* len -= nbytes */
|
Chris@4
|
1033 rep movsb
|
Chris@4
|
1034 mov esi, edi
|
Chris@4
|
1035 sub esi, ebp /* from = out - dist */
|
Chris@4
|
1036 jmp L_do_copy
|
Chris@4
|
1037
|
Chris@4
|
1038 ALIGN 4
|
Chris@4
|
1039 L_wrap_around_window:
|
Chris@4
|
1040 mov eax, [esp+52] /* eax = write */
|
Chris@4
|
1041 cmp ecx, eax
|
Chris@4
|
1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
|
Chris@4
|
1043
|
Chris@4
|
1044 add esi, [esp+48] /* from += wsize */
|
Chris@4
|
1045 add esi, eax /* from += write */
|
Chris@4
|
1046 sub esi, ecx /* from -= nbytes */
|
Chris@4
|
1047 sub ecx, eax /* nbytes -= write */
|
Chris@4
|
1048
|
Chris@4
|
1049 mov eax, [esp+64] /* eax = len */
|
Chris@4
|
1050 cmp eax, ecx
|
Chris@4
|
1051 jbe L_do_copy /* if (nbytes >= len) */
|
Chris@4
|
1052
|
Chris@4
|
1053 sub eax, ecx /* len -= nbytes */
|
Chris@4
|
1054 rep movsb
|
Chris@4
|
1055 mov esi, [esp+28] /* from = window */
|
Chris@4
|
1056 mov ecx, [esp+52] /* nbytes = write */
|
Chris@4
|
1057 cmp eax, ecx
|
Chris@4
|
1058 jbe L_do_copy /* if (nbytes >= len) */
|
Chris@4
|
1059
|
Chris@4
|
1060 sub eax, ecx /* len -= nbytes */
|
Chris@4
|
1061 rep movsb
|
Chris@4
|
1062 mov esi, edi
|
Chris@4
|
1063 sub esi, ebp /* from = out - dist */
|
Chris@4
|
1064 jmp L_do_copy
|
Chris@4
|
1065
|
Chris@4
|
1066 ALIGN 4
|
Chris@4
|
1067 L_contiguous_in_window:
|
Chris@4
|
1068 add esi, eax
|
Chris@4
|
1069 sub esi, ecx /* from += write - nbytes */
|
Chris@4
|
1070
|
Chris@4
|
1071 mov eax, [esp+64] /* eax = len */
|
Chris@4
|
1072 cmp eax, ecx
|
Chris@4
|
1073 jbe L_do_copy /* if (nbytes >= len) */
|
Chris@4
|
1074
|
Chris@4
|
1075 sub eax, ecx /* len -= nbytes */
|
Chris@4
|
1076 rep movsb
|
Chris@4
|
1077 mov esi, edi
|
Chris@4
|
1078 sub esi, ebp /* from = out - dist */
|
Chris@4
|
1079 jmp L_do_copy
|
Chris@4
|
1080
|
Chris@4
|
1081 ALIGN 4
|
Chris@4
|
1082 L_do_copy:
|
Chris@4
|
1083 mov ecx, eax
|
Chris@4
|
1084 rep movsb
|
Chris@4
|
1085
|
Chris@4
|
1086 mov esi, [esp+8] /* move in back to %esi, toss from */
|
Chris@4
|
1087 mov ebp, [esp+32] /* ebp = lcode */
|
Chris@4
|
1088 jmp L_while_test
|
Chris@4
|
1089
|
Chris@4
|
1090 L_test_for_end_of_block:
|
Chris@4
|
1091 test al, 32
|
Chris@4
|
1092 jz L_invalid_literal_length_code
|
Chris@4
|
1093 mov dword ptr [esp+72], 1
|
Chris@4
|
1094 jmp L_break_loop_with_status
|
Chris@4
|
1095
|
Chris@4
|
1096 L_invalid_literal_length_code:
|
Chris@4
|
1097 mov dword ptr [esp+72], 2
|
Chris@4
|
1098 jmp L_break_loop_with_status
|
Chris@4
|
1099
|
Chris@4
|
1100 L_invalid_distance_code:
|
Chris@4
|
1101 mov dword ptr [esp+72], 3
|
Chris@4
|
1102 jmp L_break_loop_with_status
|
Chris@4
|
1103
|
Chris@4
|
1104 L_invalid_distance_too_far:
|
Chris@4
|
1105 mov esi, [esp+4]
|
Chris@4
|
1106 mov dword ptr [esp+72], 4
|
Chris@4
|
1107 jmp L_break_loop_with_status
|
Chris@4
|
1108
|
Chris@4
|
1109 L_break_loop:
|
Chris@4
|
1110 mov dword ptr [esp+72], 0
|
Chris@4
|
1111
|
Chris@4
|
1112 L_break_loop_with_status:
|
Chris@4
|
1113 /* put in, out, bits, and hold back into ar and pop esp */
|
Chris@4
|
1114 mov [esp+8], esi /* save in */
|
Chris@4
|
1115 mov [esp+16], edi /* save out */
|
Chris@4
|
1116 mov [esp+44], ebx /* save bits */
|
Chris@4
|
1117 mov [esp+40], edx /* save hold */
|
Chris@4
|
1118 mov ebp, [esp+4] /* restore esp, ebp */
|
Chris@4
|
1119 mov esp, [esp]
|
Chris@4
|
1120 }
|
Chris@4
|
1121 #else
|
Chris@4
|
1122 #error "x86 architecture not defined"
|
Chris@4
|
1123 #endif
|
Chris@4
|
1124
|
Chris@4
|
1125 if (ar.status > 1) {
|
Chris@4
|
1126 if (ar.status == 2)
|
Chris@4
|
1127 strm->msg = "invalid literal/length code";
|
Chris@4
|
1128 else if (ar.status == 3)
|
Chris@4
|
1129 strm->msg = "invalid distance code";
|
Chris@4
|
1130 else
|
Chris@4
|
1131 strm->msg = "invalid distance too far back";
|
Chris@4
|
1132 state->mode = BAD;
|
Chris@4
|
1133 }
|
Chris@4
|
1134 else if ( ar.status == 1 ) {
|
Chris@4
|
1135 state->mode = TYPE;
|
Chris@4
|
1136 }
|
Chris@4
|
1137
|
Chris@4
|
1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
|
Chris@4
|
1139 ar.len = ar.bits >> 3;
|
Chris@4
|
1140 ar.in -= ar.len;
|
Chris@4
|
1141 ar.bits -= ar.len << 3;
|
Chris@4
|
1142 ar.hold &= (1U << ar.bits) - 1;
|
Chris@4
|
1143
|
Chris@4
|
1144 /* update state and return */
|
Chris@4
|
1145 strm->next_in = ar.in;
|
Chris@4
|
1146 strm->next_out = ar.out;
|
Chris@4
|
1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
|
Chris@4
|
1148 PAD_AVAIL_IN + (ar.last - ar.in) :
|
Chris@4
|
1149 PAD_AVAIL_IN - (ar.in - ar.last));
|
Chris@4
|
1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
|
Chris@4
|
1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
|
Chris@4
|
1152 PAD_AVAIL_OUT - (ar.out - ar.end));
|
Chris@4
|
1153 state->hold = ar.hold;
|
Chris@4
|
1154 state->bits = ar.bits;
|
Chris@4
|
1155 return;
|
Chris@4
|
1156 }
|
Chris@4
|
1157
|