annotate src/zlib-1.2.7/contrib/inflate86/inffas86.c @ 23:619f715526df sv_v2.1

Update Vamp plugin SDK to 2.5
author Chris Cannam
date Thu, 09 May 2013 10:52:46 +0100
parents e13257ea84a4
children
rev   line source
Chris@4 1 /* inffas86.c is a hand tuned assembler version of
Chris@4 2 *
Chris@4 3 * inffast.c -- fast decoding
Chris@4 4 * Copyright (C) 1995-2003 Mark Adler
Chris@4 5 * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 6 *
Chris@4 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@4 8 * Please use the copyright conditions above.
Chris@4 9 *
Chris@4 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
Chris@4 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
Chris@4 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
Chris@4 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
Chris@4 14 * from http://fedora.linux.duke.edu/fc1_x86_64
Chris@4 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
Chris@4 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
Chris@4 17 * when decompressing mozilla-source-1.3.tar.gz.
Chris@4 18 *
Chris@4 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@4 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@4 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@4 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@4 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@4 24 * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@4 25 * versions of this and inffast.S can be found at
Chris@4 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@4 27 */
Chris@4 28
Chris@4 29 #include "zutil.h"
Chris@4 30 #include "inftrees.h"
Chris@4 31 #include "inflate.h"
Chris@4 32 #include "inffast.h"
Chris@4 33
Chris@4 34 /* Mark Adler's comments from inffast.c: */
Chris@4 35
Chris@4 36 /*
Chris@4 37 Decode literal, length, and distance codes and write out the resulting
Chris@4 38 literal and match bytes until either not enough input or output is
Chris@4 39 available, an end-of-block is encountered, or a data error is encountered.
Chris@4 40 When large enough input and output buffers are supplied to inflate(), for
Chris@4 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
Chris@4 42 inflate execution time is spent in this routine.
Chris@4 43
Chris@4 44 Entry assumptions:
Chris@4 45
Chris@4 46 state->mode == LEN
Chris@4 47 strm->avail_in >= 6
Chris@4 48 strm->avail_out >= 258
Chris@4 49 start >= strm->avail_out
Chris@4 50 state->bits < 8
Chris@4 51
Chris@4 52 On return, state->mode is one of:
Chris@4 53
Chris@4 54 LEN -- ran out of enough output space or enough available input
Chris@4 55 TYPE -- reached end of block code, inflate() to interpret next block
Chris@4 56 BAD -- error in block data
Chris@4 57
Chris@4 58 Notes:
Chris@4 59
Chris@4 60 - The maximum input bits used by a length/distance pair is 15 bits for the
Chris@4 61 length code, 5 bits for the length extra, 15 bits for the distance code,
Chris@4 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
Chris@4 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
Chris@4 64 checking for available input while decoding.
Chris@4 65
Chris@4 66 - The maximum bytes that a single length/distance pair can output is 258
Chris@4 67 bytes, which is the maximum length that can be coded. inflate_fast()
Chris@4 68 requires strm->avail_out >= 258 for each loop to avoid checking for
Chris@4 69 output space.
Chris@4 70 */
Chris@4 71 void inflate_fast(strm, start)
Chris@4 72 z_streamp strm;
Chris@4 73 unsigned start; /* inflate()'s starting value for strm->avail_out */
Chris@4 74 {
Chris@4 75 struct inflate_state FAR *state;
Chris@4 76 struct inffast_ar {
Chris@4 77 /* 64 32 x86 x86_64 */
Chris@4 78 /* ar offset register */
Chris@4 79 /* 0 0 */ void *esp; /* esp save */
Chris@4 80 /* 8 4 */ void *ebp; /* ebp save */
Chris@4 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
Chris@4 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
Chris@4 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
Chris@4 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
Chris@4 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
Chris@4 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
Chris@4 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
Chris@4 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
Chris@4 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
Chris@4 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
Chris@4 91 /* 92 48 */ unsigned wsize; /* window size */
Chris@4 92 /* 96 52 */ unsigned write; /* window write index */
Chris@4 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
Chris@4 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
Chris@4 95 /*108 64 */ unsigned len; /* r14 match length */
Chris@4 96 /*112 68 */ unsigned dist; /* r15 match distance */
Chris@4 97 /*116 72 */ unsigned status; /* set when state chng*/
Chris@4 98 } ar;
Chris@4 99
Chris@4 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@4 101 #define PAD_AVAIL_IN 6
Chris@4 102 #define PAD_AVAIL_OUT 258
Chris@4 103 #else
Chris@4 104 #define PAD_AVAIL_IN 5
Chris@4 105 #define PAD_AVAIL_OUT 257
Chris@4 106 #endif
Chris@4 107
Chris@4 108 /* copy state to local variables */
Chris@4 109 state = (struct inflate_state FAR *)strm->state;
Chris@4 110 ar.in = strm->next_in;
Chris@4 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
Chris@4 112 ar.out = strm->next_out;
Chris@4 113 ar.beg = ar.out - (start - strm->avail_out);
Chris@4 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
Chris@4 115 ar.wsize = state->wsize;
Chris@4 116 ar.write = state->wnext;
Chris@4 117 ar.window = state->window;
Chris@4 118 ar.hold = state->hold;
Chris@4 119 ar.bits = state->bits;
Chris@4 120 ar.lcode = state->lencode;
Chris@4 121 ar.dcode = state->distcode;
Chris@4 122 ar.lmask = (1U << state->lenbits) - 1;
Chris@4 123 ar.dmask = (1U << state->distbits) - 1;
Chris@4 124
Chris@4 125 /* decode literals and length/distances until end-of-block or not enough
Chris@4 126 input data or output space */
Chris@4 127
Chris@4 128 /* align in on 1/2 hold size boundary */
Chris@4 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
Chris@4 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
Chris@4 131 ar.bits += 8;
Chris@4 132 }
Chris@4 133
Chris@4 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@4 135 __asm__ __volatile__ (
Chris@4 136 " leaq %0, %%rax\n"
Chris@4 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
Chris@4 138 " movq %%rsp, (%%rax)\n"
Chris@4 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
Chris@4 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
Chris@4 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
Chris@4 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
Chris@4 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
Chris@4 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
Chris@4 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
Chris@4 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
Chris@4 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
Chris@4 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
Chris@4 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
Chris@4 150 /* r14d = len */
Chris@4 151 /* r15d = dist */
Chris@4 152 " cld\n"
Chris@4 153 " cmpq %%rdi, %%r10\n"
Chris@4 154 " je .L_one_time\n" /* if only one decode left */
Chris@4 155 " cmpq %%rsi, %%r9\n"
Chris@4 156 " je .L_one_time\n"
Chris@4 157 " jmp .L_do_loop\n"
Chris@4 158
Chris@4 159 ".L_one_time:\n"
Chris@4 160 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 161 " cmpb $32, %%bl\n"
Chris@4 162 " ja .L_get_length_code_one_time\n"
Chris@4 163
Chris@4 164 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 166 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 167 " shlq %%cl, %%rax\n"
Chris@4 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 169 " jmp .L_get_length_code_one_time\n"
Chris@4 170
Chris@4 171 ".align 32,0x90\n"
Chris@4 172 ".L_while_test:\n"
Chris@4 173 " cmpq %%rdi, %%r10\n"
Chris@4 174 " jbe .L_break_loop\n"
Chris@4 175 " cmpq %%rsi, %%r9\n"
Chris@4 176 " jbe .L_break_loop\n"
Chris@4 177
Chris@4 178 ".L_do_loop:\n"
Chris@4 179 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 180 " cmpb $32, %%bl\n"
Chris@4 181 " ja .L_get_length_code\n" /* if (32 < bits) */
Chris@4 182
Chris@4 183 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 185 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 186 " shlq %%cl, %%rax\n"
Chris@4 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 188
Chris@4 189 ".L_get_length_code:\n"
Chris@4 190 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 192
Chris@4 193 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 194 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 196
Chris@4 197 " testb %%al, %%al\n"
Chris@4 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 199
Chris@4 200 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 201 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 202 " stosb\n"
Chris@4 203
Chris@4 204 ".L_get_length_code_one_time:\n"
Chris@4 205 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 207
Chris@4 208 ".L_dolen:\n"
Chris@4 209 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 210 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 212
Chris@4 213 " testb %%al, %%al\n"
Chris@4 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 215
Chris@4 216 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 217 " stosb\n"
Chris@4 218 " jmp .L_while_test\n"
Chris@4 219
Chris@4 220 ".align 32,0x90\n"
Chris@4 221 ".L_test_for_length_base:\n"
Chris@4 222 " movl %%eax, %%r14d\n" /* len = this */
Chris@4 223 " shrl $16, %%r14d\n" /* len = this.val */
Chris@4 224 " movb %%al, %%cl\n"
Chris@4 225
Chris@4 226 " testb $16, %%al\n"
Chris@4 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@4 228 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 229 " jz .L_decode_distance\n" /* if (!op) */
Chris@4 230
Chris@4 231 ".L_add_bits_to_len:\n"
Chris@4 232 " subb %%cl, %%bl\n"
Chris@4 233 " xorl %%eax, %%eax\n"
Chris@4 234 " incl %%eax\n"
Chris@4 235 " shll %%cl, %%eax\n"
Chris@4 236 " decl %%eax\n"
Chris@4 237 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 238 " shrq %%cl, %%rdx\n"
Chris@4 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
Chris@4 240
Chris@4 241 ".L_decode_distance:\n"
Chris@4 242 " movq %%r13, %%r8\n" /* r8 = dmask */
Chris@4 243 " cmpb $32, %%bl\n"
Chris@4 244 " ja .L_get_distance_code\n" /* if (32 < bits) */
Chris@4 245
Chris@4 246 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 248 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 249 " shlq %%cl, %%rax\n"
Chris@4 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 251
Chris@4 252 ".L_get_distance_code:\n"
Chris@4 253 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
Chris@4 255
Chris@4 256 ".L_dodist:\n"
Chris@4 257 " movl %%eax, %%r15d\n" /* dist = this */
Chris@4 258 " shrl $16, %%r15d\n" /* dist = this.val */
Chris@4 259 " movb %%ah, %%cl\n"
Chris@4 260 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 262 " movb %%al, %%cl\n" /* cl = this.op */
Chris@4 263
Chris@4 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@4 265 " jz .L_test_for_second_level_dist\n"
Chris@4 266 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 267 " jz .L_check_dist_one\n"
Chris@4 268
Chris@4 269 ".L_add_bits_to_dist:\n"
Chris@4 270 " subb %%cl, %%bl\n"
Chris@4 271 " xorl %%eax, %%eax\n"
Chris@4 272 " incl %%eax\n"
Chris@4 273 " shll %%cl, %%eax\n"
Chris@4 274 " decl %%eax\n" /* (1 << op) - 1 */
Chris@4 275 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 276 " shrq %%cl, %%rdx\n"
Chris@4 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
Chris@4 278
Chris@4 279 ".L_check_window:\n"
Chris@4 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
Chris@4 281 " movq %%rdi, %%rax\n"
Chris@4 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
Chris@4 283
Chris@4 284 " cmpl %%r15d, %%eax\n"
Chris@4 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@4 286
Chris@4 287 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@4 288 " movq %%rdi, %%rsi\n"
Chris@4 289 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 290
Chris@4 291 " sarl %%ecx\n"
Chris@4 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@4 293
Chris@4 294 " rep movsw\n"
Chris@4 295 " movb (%%rsi), %%al\n"
Chris@4 296 " movb %%al, (%%rdi)\n"
Chris@4 297 " incq %%rdi\n"
Chris@4 298
Chris@4 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@4 300 " jmp .L_while_test\n"
Chris@4 301
Chris@4 302 ".L_copy_two:\n"
Chris@4 303 " rep movsw\n"
Chris@4 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@4 305 " jmp .L_while_test\n"
Chris@4 306
Chris@4 307 ".align 32,0x90\n"
Chris@4 308 ".L_check_dist_one:\n"
Chris@4 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
Chris@4 310 " jne .L_check_window\n"
Chris@4 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
Chris@4 312 " je .L_check_window\n"
Chris@4 313
Chris@4 314 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@4 315 " movb -1(%%rdi), %%al\n"
Chris@4 316 " movb %%al, %%ah\n"
Chris@4 317
Chris@4 318 " sarl %%ecx\n"
Chris@4 319 " jnc .L_set_two\n"
Chris@4 320 " movb %%al, (%%rdi)\n"
Chris@4 321 " incq %%rdi\n"
Chris@4 322
Chris@4 323 ".L_set_two:\n"
Chris@4 324 " rep stosw\n"
Chris@4 325 " jmp .L_while_test\n"
Chris@4 326
Chris@4 327 ".align 32,0x90\n"
Chris@4 328 ".L_test_for_second_level_length:\n"
Chris@4 329 " testb $64, %%al\n"
Chris@4 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@4 331
Chris@4 332 " xorl %%eax, %%eax\n"
Chris@4 333 " incl %%eax\n"
Chris@4 334 " shll %%cl, %%eax\n"
Chris@4 335 " decl %%eax\n"
Chris@4 336 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 337 " addl %%r14d, %%eax\n" /* eax += len */
Chris@4 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 339 " jmp .L_dolen\n"
Chris@4 340
Chris@4 341 ".align 32,0x90\n"
Chris@4 342 ".L_test_for_second_level_dist:\n"
Chris@4 343 " testb $64, %%al\n"
Chris@4 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@4 345
Chris@4 346 " xorl %%eax, %%eax\n"
Chris@4 347 " incl %%eax\n"
Chris@4 348 " shll %%cl, %%eax\n"
Chris@4 349 " decl %%eax\n"
Chris@4 350 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 351 " addl %%r15d, %%eax\n" /* eax += dist */
Chris@4 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 353 " jmp .L_dodist\n"
Chris@4 354
Chris@4 355 ".align 32,0x90\n"
Chris@4 356 ".L_clip_window:\n"
Chris@4 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
Chris@4 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
Chris@4 359 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@4 360
Chris@4 361 " cmpl %%r15d, %%eax\n"
Chris@4 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@4 363
Chris@4 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
Chris@4 365 " cmpl $0, 96(%%rsp)\n"
Chris@4 366 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@4 367
Chris@4 368 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@4 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@4 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
Chris@4 371
Chris@4 372 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 373 " cmpl %%ecx, %%r14d\n"
Chris@4 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 375
Chris@4 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@4 377 " rep movsb\n"
Chris@4 378 " movq %%rdi, %%rsi\n"
Chris@4 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
Chris@4 380 " jmp .L_do_copy\n"
Chris@4 381
Chris@4 382 ".align 32,0x90\n"
Chris@4 383 ".L_wrap_around_window:\n"
Chris@4 384 " movl 96(%%rsp), %%eax\n" /* eax = write */
Chris@4 385 " cmpl %%eax, %%ecx\n"
Chris@4 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@4 387
Chris@4 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
Chris@4 389 " addq 56(%%rsp), %%rsi\n" /* from += window */
Chris@4 390 " addq %%rax, %%rsi\n" /* from += write */
Chris@4 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
Chris@4 392 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@4 393
Chris@4 394 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 395 " cmpl %%ecx, %%eax\n"
Chris@4 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 397
Chris@4 398 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 399 " rep movsb\n"
Chris@4 400 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@4 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
Chris@4 402 " cmpl %%ecx, %%eax\n"
Chris@4 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 404
Chris@4 405 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 406 " rep movsb\n"
Chris@4 407 " movq %%rdi, %%rsi\n"
Chris@4 408 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 409 " jmp .L_do_copy\n"
Chris@4 410
Chris@4 411 ".align 32,0x90\n"
Chris@4 412 ".L_contiguous_in_window:\n"
Chris@4 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
Chris@4 414 " addq %%rax, %%rsi\n"
Chris@4 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
Chris@4 416
Chris@4 417 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 418 " cmpl %%ecx, %%eax\n"
Chris@4 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 420
Chris@4 421 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 422 " rep movsb\n"
Chris@4 423 " movq %%rdi, %%rsi\n"
Chris@4 424 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 426
Chris@4 427 ".align 32,0x90\n"
Chris@4 428 ".L_do_copy:\n"
Chris@4 429 " movl %%eax, %%ecx\n" /* ecx = len */
Chris@4 430 " rep movsb\n"
Chris@4 431
Chris@4 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
Chris@4 433 " jmp .L_while_test\n"
Chris@4 434
Chris@4 435 ".L_test_for_end_of_block:\n"
Chris@4 436 " testb $32, %%al\n"
Chris@4 437 " jz .L_invalid_literal_length_code\n"
Chris@4 438 " movl $1, 116(%%rsp)\n"
Chris@4 439 " jmp .L_break_loop_with_status\n"
Chris@4 440
Chris@4 441 ".L_invalid_literal_length_code:\n"
Chris@4 442 " movl $2, 116(%%rsp)\n"
Chris@4 443 " jmp .L_break_loop_with_status\n"
Chris@4 444
Chris@4 445 ".L_invalid_distance_code:\n"
Chris@4 446 " movl $3, 116(%%rsp)\n"
Chris@4 447 " jmp .L_break_loop_with_status\n"
Chris@4 448
Chris@4 449 ".L_invalid_distance_too_far:\n"
Chris@4 450 " movl $4, 116(%%rsp)\n"
Chris@4 451 " jmp .L_break_loop_with_status\n"
Chris@4 452
Chris@4 453 ".L_break_loop:\n"
Chris@4 454 " movl $0, 116(%%rsp)\n"
Chris@4 455
Chris@4 456 ".L_break_loop_with_status:\n"
Chris@4 457 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 458 " movq %%rsi, 16(%%rsp)\n" /* in */
Chris@4 459 " movq %%rdi, 32(%%rsp)\n" /* out */
Chris@4 460 " movl %%ebx, 88(%%rsp)\n" /* bits */
Chris@4 461 " movq %%rdx, 80(%%rsp)\n" /* hold */
Chris@4 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
Chris@4 463 " movq 8(%%rsp), %%rbp\n"
Chris@4 464 " movq %%rax, %%rsp\n"
Chris@4 465 :
Chris@4 466 : "m" (ar)
Chris@4 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
Chris@4 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
Chris@4 469 );
Chris@4 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
Chris@4 471 __asm__ __volatile__ (
Chris@4 472 " leal %0, %%eax\n"
Chris@4 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
Chris@4 474 " movl %%ebp, 4(%%eax)\n"
Chris@4 475 " movl %%eax, %%esp\n"
Chris@4 476 " movl 8(%%esp), %%esi\n" /* esi = in */
Chris@4 477 " movl 16(%%esp), %%edi\n" /* edi = out */
Chris@4 478 " movl 40(%%esp), %%edx\n" /* edx = hold */
Chris@4 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
Chris@4 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 481
Chris@4 482 " cld\n"
Chris@4 483 " jmp .L_do_loop\n"
Chris@4 484
Chris@4 485 ".align 32,0x90\n"
Chris@4 486 ".L_while_test:\n"
Chris@4 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
Chris@4 488 " jbe .L_break_loop\n"
Chris@4 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
Chris@4 490 " jbe .L_break_loop\n"
Chris@4 491
Chris@4 492 ".L_do_loop:\n"
Chris@4 493 " cmpb $15, %%bl\n"
Chris@4 494 " ja .L_get_length_code\n" /* if (15 < bits) */
Chris@4 495
Chris@4 496 " xorl %%eax, %%eax\n"
Chris@4 497 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 499 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 500 " shll %%cl, %%eax\n"
Chris@4 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 502
Chris@4 503 ".L_get_length_code:\n"
Chris@4 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
Chris@4 505 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 507
Chris@4 508 ".L_dolen:\n"
Chris@4 509 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 510 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@4 512
Chris@4 513 " testb %%al, %%al\n"
Chris@4 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 515
Chris@4 516 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 517 " stosb\n"
Chris@4 518 " jmp .L_while_test\n"
Chris@4 519
Chris@4 520 ".align 32,0x90\n"
Chris@4 521 ".L_test_for_length_base:\n"
Chris@4 522 " movl %%eax, %%ecx\n" /* len = this */
Chris@4 523 " shrl $16, %%ecx\n" /* len = this.val */
Chris@4 524 " movl %%ecx, 64(%%esp)\n" /* save len */
Chris@4 525 " movb %%al, %%cl\n"
Chris@4 526
Chris@4 527 " testb $16, %%al\n"
Chris@4 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@4 529 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 530 " jz .L_decode_distance\n" /* if (!op) */
Chris@4 531 " cmpb %%cl, %%bl\n"
Chris@4 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
Chris@4 533
Chris@4 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@4 535 " xorl %%eax, %%eax\n"
Chris@4 536 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 538 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 539 " shll %%cl, %%eax\n"
Chris@4 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 541 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@4 542
Chris@4 543 ".L_add_bits_to_len:\n"
Chris@4 544 " subb %%cl, %%bl\n"
Chris@4 545 " xorl %%eax, %%eax\n"
Chris@4 546 " incl %%eax\n"
Chris@4 547 " shll %%cl, %%eax\n"
Chris@4 548 " decl %%eax\n"
Chris@4 549 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 550 " shrl %%cl, %%edx\n"
Chris@4 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
Chris@4 552
Chris@4 553 ".L_decode_distance:\n"
Chris@4 554 " cmpb $15, %%bl\n"
Chris@4 555 " ja .L_get_distance_code\n" /* if (15 < bits) */
Chris@4 556
Chris@4 557 " xorl %%eax, %%eax\n"
Chris@4 558 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 560 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 561 " shll %%cl, %%eax\n"
Chris@4 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 563
Chris@4 564 ".L_get_distance_code:\n"
Chris@4 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
Chris@4 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@4 567 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
Chris@4 569
Chris@4 570 ".L_dodist:\n"
Chris@4 571 " movl %%eax, %%ebp\n" /* dist = this */
Chris@4 572 " shrl $16, %%ebp\n" /* dist = this.val */
Chris@4 573 " movb %%ah, %%cl\n"
Chris@4 574 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@4 576 " movb %%al, %%cl\n" /* cl = this.op */
Chris@4 577
Chris@4 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@4 579 " jz .L_test_for_second_level_dist\n"
Chris@4 580 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 581 " jz .L_check_dist_one\n"
Chris@4 582 " cmpb %%cl, %%bl\n"
Chris@4 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
Chris@4 584
Chris@4 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@4 586 " xorl %%eax, %%eax\n"
Chris@4 587 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 589 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 590 " shll %%cl, %%eax\n"
Chris@4 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 592 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@4 593
Chris@4 594 ".L_add_bits_to_dist:\n"
Chris@4 595 " subb %%cl, %%bl\n"
Chris@4 596 " xorl %%eax, %%eax\n"
Chris@4 597 " incl %%eax\n"
Chris@4 598 " shll %%cl, %%eax\n"
Chris@4 599 " decl %%eax\n" /* (1 << op) - 1 */
Chris@4 600 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 601 " shrl %%cl, %%edx\n"
Chris@4 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
Chris@4 603
Chris@4 604 ".L_check_window:\n"
Chris@4 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
Chris@4 606 " movl %%edi, %%eax\n"
Chris@4 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
Chris@4 608
Chris@4 609 " cmpl %%ebp, %%eax\n"
Chris@4 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@4 611
Chris@4 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@4 613 " movl %%edi, %%esi\n"
Chris@4 614 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 615
Chris@4 616 " sarl %%ecx\n"
Chris@4 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@4 618
Chris@4 619 " rep movsw\n"
Chris@4 620 " movb (%%esi), %%al\n"
Chris@4 621 " movb %%al, (%%edi)\n"
Chris@4 622 " incl %%edi\n"
Chris@4 623
Chris@4 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 626 " jmp .L_while_test\n"
Chris@4 627
Chris@4 628 ".L_copy_two:\n"
Chris@4 629 " rep movsw\n"
Chris@4 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 632 " jmp .L_while_test\n"
Chris@4 633
Chris@4 634 ".align 32,0x90\n"
Chris@4 635 ".L_check_dist_one:\n"
Chris@4 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
Chris@4 637 " jne .L_check_window\n"
Chris@4 638 " cmpl %%edi, 20(%%esp)\n"
Chris@4 639 " je .L_check_window\n" /* out == beg, if outside window */
Chris@4 640
Chris@4 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@4 642 " movb -1(%%edi), %%al\n"
Chris@4 643 " movb %%al, %%ah\n"
Chris@4 644
Chris@4 645 " sarl %%ecx\n"
Chris@4 646 " jnc .L_set_two\n"
Chris@4 647 " movb %%al, (%%edi)\n"
Chris@4 648 " incl %%edi\n"
Chris@4 649
Chris@4 650 ".L_set_two:\n"
Chris@4 651 " rep stosw\n"
Chris@4 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 653 " jmp .L_while_test\n"
Chris@4 654
Chris@4 655 ".align 32,0x90\n"
Chris@4 656 ".L_test_for_second_level_length:\n"
Chris@4 657 " testb $64, %%al\n"
Chris@4 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@4 659
Chris@4 660 " xorl %%eax, %%eax\n"
Chris@4 661 " incl %%eax\n"
Chris@4 662 " shll %%cl, %%eax\n"
Chris@4 663 " decl %%eax\n"
Chris@4 664 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 665 " addl 64(%%esp), %%eax\n" /* eax += len */
Chris@4 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 667 " jmp .L_dolen\n"
Chris@4 668
Chris@4 669 ".align 32,0x90\n"
Chris@4 670 ".L_test_for_second_level_dist:\n"
Chris@4 671 " testb $64, %%al\n"
Chris@4 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@4 673
Chris@4 674 " xorl %%eax, %%eax\n"
Chris@4 675 " incl %%eax\n"
Chris@4 676 " shll %%cl, %%eax\n"
Chris@4 677 " decl %%eax\n"
Chris@4 678 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 679 " addl %%ebp, %%eax\n" /* eax += dist */
Chris@4 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@4 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 682 " jmp .L_dodist\n"
Chris@4 683
Chris@4 684 ".align 32,0x90\n"
Chris@4 685 ".L_clip_window:\n"
Chris@4 686 " movl %%eax, %%ecx\n"
Chris@4 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
Chris@4 688 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@4 689 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@4 690
Chris@4 691 " cmpl %%ebp, %%eax\n"
Chris@4 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@4 693
Chris@4 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
Chris@4 695 " cmpl $0, 52(%%esp)\n"
Chris@4 696 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@4 697
Chris@4 698 " subl %%ecx, %%eax\n"
Chris@4 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
Chris@4 700
Chris@4 701 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 702 " cmpl %%ecx, %%eax\n"
Chris@4 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 704
Chris@4 705 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 706 " rep movsb\n"
Chris@4 707 " movl %%edi, %%esi\n"
Chris@4 708 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 709 " jmp .L_do_copy\n"
Chris@4 710
Chris@4 711 ".align 32,0x90\n"
Chris@4 712 ".L_wrap_around_window:\n"
Chris@4 713 " movl 52(%%esp), %%eax\n" /* eax = write */
Chris@4 714 " cmpl %%eax, %%ecx\n"
Chris@4 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@4 716
Chris@4 717 " addl 48(%%esp), %%esi\n" /* from += wsize */
Chris@4 718 " addl %%eax, %%esi\n" /* from += write */
Chris@4 719 " subl %%ecx, %%esi\n" /* from -= nbytes */
Chris@4 720 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@4 721
Chris@4 722 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 723 " cmpl %%ecx, %%eax\n"
Chris@4 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 725
Chris@4 726 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 727 " rep movsb\n"
Chris@4 728 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@4 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
Chris@4 730 " cmpl %%ecx, %%eax\n"
Chris@4 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 732
Chris@4 733 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 734 " rep movsb\n"
Chris@4 735 " movl %%edi, %%esi\n"
Chris@4 736 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 737 " jmp .L_do_copy\n"
Chris@4 738
Chris@4 739 ".align 32,0x90\n"
Chris@4 740 ".L_contiguous_in_window:\n"
Chris@4 741 " addl %%eax, %%esi\n"
Chris@4 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
Chris@4 743
Chris@4 744 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 745 " cmpl %%ecx, %%eax\n"
Chris@4 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 747
Chris@4 748 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 749 " rep movsb\n"
Chris@4 750 " movl %%edi, %%esi\n"
Chris@4 751 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 753
Chris@4 754 ".align 32,0x90\n"
Chris@4 755 ".L_do_copy:\n"
Chris@4 756 " movl %%eax, %%ecx\n"
Chris@4 757 " rep movsb\n"
Chris@4 758
Chris@4 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 761 " jmp .L_while_test\n"
Chris@4 762
Chris@4 763 ".L_test_for_end_of_block:\n"
Chris@4 764 " testb $32, %%al\n"
Chris@4 765 " jz .L_invalid_literal_length_code\n"
Chris@4 766 " movl $1, 72(%%esp)\n"
Chris@4 767 " jmp .L_break_loop_with_status\n"
Chris@4 768
Chris@4 769 ".L_invalid_literal_length_code:\n"
Chris@4 770 " movl $2, 72(%%esp)\n"
Chris@4 771 " jmp .L_break_loop_with_status\n"
Chris@4 772
Chris@4 773 ".L_invalid_distance_code:\n"
Chris@4 774 " movl $3, 72(%%esp)\n"
Chris@4 775 " jmp .L_break_loop_with_status\n"
Chris@4 776
Chris@4 777 ".L_invalid_distance_too_far:\n"
Chris@4 778 " movl 8(%%esp), %%esi\n"
Chris@4 779 " movl $4, 72(%%esp)\n"
Chris@4 780 " jmp .L_break_loop_with_status\n"
Chris@4 781
Chris@4 782 ".L_break_loop:\n"
Chris@4 783 " movl $0, 72(%%esp)\n"
Chris@4 784
Chris@4 785 ".L_break_loop_with_status:\n"
Chris@4 786 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 787 " movl %%esi, 8(%%esp)\n" /* save in */
Chris@4 788 " movl %%edi, 16(%%esp)\n" /* save out */
Chris@4 789 " movl %%ebx, 44(%%esp)\n" /* save bits */
Chris@4 790 " movl %%edx, 40(%%esp)\n" /* save hold */
Chris@4 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
Chris@4 792 " movl (%%esp), %%esp\n"
Chris@4 793 :
Chris@4 794 : "m" (ar)
Chris@4 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
Chris@4 796 );
Chris@4 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
Chris@4 798 __asm {
Chris@4 799 lea eax, ar
Chris@4 800 mov [eax], esp /* save esp, ebp */
Chris@4 801 mov [eax+4], ebp
Chris@4 802 mov esp, eax
Chris@4 803 mov esi, [esp+8] /* esi = in */
Chris@4 804 mov edi, [esp+16] /* edi = out */
Chris@4 805 mov edx, [esp+40] /* edx = hold */
Chris@4 806 mov ebx, [esp+44] /* ebx = bits */
Chris@4 807 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 808
Chris@4 809 cld
Chris@4 810 jmp L_do_loop
Chris@4 811
Chris@4 812 ALIGN 4
Chris@4 813 L_while_test:
Chris@4 814 cmp [esp+24], edi
Chris@4 815 jbe L_break_loop
Chris@4 816 cmp [esp+12], esi
Chris@4 817 jbe L_break_loop
Chris@4 818
Chris@4 819 L_do_loop:
Chris@4 820 cmp bl, 15
Chris@4 821 ja L_get_length_code /* if (15 < bits) */
Chris@4 822
Chris@4 823 xor eax, eax
Chris@4 824 lodsw /* al = *(ushort *)in++ */
Chris@4 825 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 826 add bl, 16 /* bits += 16 */
Chris@4 827 shl eax, cl
Chris@4 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 829
Chris@4 830 L_get_length_code:
Chris@4 831 mov eax, [esp+56] /* eax = lmask */
Chris@4 832 and eax, edx /* eax &= hold */
Chris@4 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
Chris@4 834
Chris@4 835 L_dolen:
Chris@4 836 mov cl, ah /* cl = this.bits */
Chris@4 837 sub bl, ah /* bits -= this.bits */
Chris@4 838 shr edx, cl /* hold >>= this.bits */
Chris@4 839
Chris@4 840 test al, al
Chris@4 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
Chris@4 842
Chris@4 843 shr eax, 16 /* output this.val char */
Chris@4 844 stosb
Chris@4 845 jmp L_while_test
Chris@4 846
Chris@4 847 ALIGN 4
Chris@4 848 L_test_for_length_base:
Chris@4 849 mov ecx, eax /* len = this */
Chris@4 850 shr ecx, 16 /* len = this.val */
Chris@4 851 mov [esp+64], ecx /* save len */
Chris@4 852 mov cl, al
Chris@4 853
Chris@4 854 test al, 16
Chris@4 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
Chris@4 856 and cl, 15 /* op &= 15 */
Chris@4 857 jz L_decode_distance /* if (!op) */
Chris@4 858 cmp bl, cl
Chris@4 859 jae L_add_bits_to_len /* if (op <= bits) */
Chris@4 860
Chris@4 861 mov ch, cl /* stash op in ch, freeing cl */
Chris@4 862 xor eax, eax
Chris@4 863 lodsw /* al = *(ushort *)in++ */
Chris@4 864 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 865 add bl, 16 /* bits += 16 */
Chris@4 866 shl eax, cl
Chris@4 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 868 mov cl, ch /* move op back to ecx */
Chris@4 869
Chris@4 870 L_add_bits_to_len:
Chris@4 871 sub bl, cl
Chris@4 872 xor eax, eax
Chris@4 873 inc eax
Chris@4 874 shl eax, cl
Chris@4 875 dec eax
Chris@4 876 and eax, edx /* eax &= hold */
Chris@4 877 shr edx, cl
Chris@4 878 add [esp+64], eax /* len += hold & mask[op] */
Chris@4 879
Chris@4 880 L_decode_distance:
Chris@4 881 cmp bl, 15
Chris@4 882 ja L_get_distance_code /* if (15 < bits) */
Chris@4 883
Chris@4 884 xor eax, eax
Chris@4 885 lodsw /* al = *(ushort *)in++ */
Chris@4 886 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 887 add bl, 16 /* bits += 16 */
Chris@4 888 shl eax, cl
Chris@4 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 890
Chris@4 891 L_get_distance_code:
Chris@4 892 mov eax, [esp+60] /* eax = dmask */
Chris@4 893 mov ecx, [esp+36] /* ecx = dcode */
Chris@4 894 and eax, edx /* eax &= hold */
Chris@4 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
Chris@4 896
Chris@4 897 L_dodist:
Chris@4 898 mov ebp, eax /* dist = this */
Chris@4 899 shr ebp, 16 /* dist = this.val */
Chris@4 900 mov cl, ah
Chris@4 901 sub bl, ah /* bits -= this.bits */
Chris@4 902 shr edx, cl /* hold >>= this.bits */
Chris@4 903 mov cl, al /* cl = this.op */
Chris@4 904
Chris@4 905 test al, 16 /* if ((op & 16) == 0) */
Chris@4 906 jz L_test_for_second_level_dist
Chris@4 907 and cl, 15 /* op &= 15 */
Chris@4 908 jz L_check_dist_one
Chris@4 909 cmp bl, cl
Chris@4 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
Chris@4 911
Chris@4 912 mov ch, cl /* stash op in ch, freeing cl */
Chris@4 913 xor eax, eax
Chris@4 914 lodsw /* al = *(ushort *)in++ */
Chris@4 915 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 916 add bl, 16 /* bits += 16 */
Chris@4 917 shl eax, cl
Chris@4 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 919 mov cl, ch /* move op back to ecx */
Chris@4 920
Chris@4 921 L_add_bits_to_dist:
Chris@4 922 sub bl, cl
Chris@4 923 xor eax, eax
Chris@4 924 inc eax
Chris@4 925 shl eax, cl
Chris@4 926 dec eax /* (1 << op) - 1 */
Chris@4 927 and eax, edx /* eax &= hold */
Chris@4 928 shr edx, cl
Chris@4 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
Chris@4 930
Chris@4 931 L_check_window:
Chris@4 932 mov [esp+8], esi /* save in so from can use it's reg */
Chris@4 933 mov eax, edi
Chris@4 934 sub eax, [esp+20] /* nbytes = out - beg */
Chris@4 935
Chris@4 936 cmp eax, ebp
Chris@4 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
Chris@4 938
Chris@4 939 mov ecx, [esp+64] /* ecx = len */
Chris@4 940 mov esi, edi
Chris@4 941 sub esi, ebp /* from = out - dist */
Chris@4 942
Chris@4 943 sar ecx, 1
Chris@4 944 jnc L_copy_two
Chris@4 945
Chris@4 946 rep movsw
Chris@4 947 mov al, [esi]
Chris@4 948 mov [edi], al
Chris@4 949 inc edi
Chris@4 950
Chris@4 951 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 952 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 953 jmp L_while_test
Chris@4 954
Chris@4 955 L_copy_two:
Chris@4 956 rep movsw
Chris@4 957 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 958 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 959 jmp L_while_test
Chris@4 960
Chris@4 961 ALIGN 4
Chris@4 962 L_check_dist_one:
Chris@4 963 cmp ebp, 1 /* if dist 1, is a memset */
Chris@4 964 jne L_check_window
Chris@4 965 cmp [esp+20], edi
Chris@4 966 je L_check_window /* out == beg, if outside window */
Chris@4 967
Chris@4 968 mov ecx, [esp+64] /* ecx = len */
Chris@4 969 mov al, [edi-1]
Chris@4 970 mov ah, al
Chris@4 971
Chris@4 972 sar ecx, 1
Chris@4 973 jnc L_set_two
Chris@4 974 mov [edi], al /* memset out with from[-1] */
Chris@4 975 inc edi
Chris@4 976
Chris@4 977 L_set_two:
Chris@4 978 rep stosw
Chris@4 979 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 980 jmp L_while_test
Chris@4 981
Chris@4 982 ALIGN 4
Chris@4 983 L_test_for_second_level_length:
Chris@4 984 test al, 64
Chris@4 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
Chris@4 986
Chris@4 987 xor eax, eax
Chris@4 988 inc eax
Chris@4 989 shl eax, cl
Chris@4 990 dec eax
Chris@4 991 and eax, edx /* eax &= hold */
Chris@4 992 add eax, [esp+64] /* eax += len */
Chris@4 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 994 jmp L_dolen
Chris@4 995
Chris@4 996 ALIGN 4
Chris@4 997 L_test_for_second_level_dist:
Chris@4 998 test al, 64
Chris@4 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
Chris@4 1000
Chris@4 1001 xor eax, eax
Chris@4 1002 inc eax
Chris@4 1003 shl eax, cl
Chris@4 1004 dec eax
Chris@4 1005 and eax, edx /* eax &= hold */
Chris@4 1006 add eax, ebp /* eax += dist */
Chris@4 1007 mov ecx, [esp+36] /* ecx = dcode */
Chris@4 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 1009 jmp L_dodist
Chris@4 1010
Chris@4 1011 ALIGN 4
Chris@4 1012 L_clip_window:
Chris@4 1013 mov ecx, eax
Chris@4 1014 mov eax, [esp+48] /* eax = wsize */
Chris@4 1015 neg ecx /* nbytes = -nbytes */
Chris@4 1016 mov esi, [esp+28] /* from = window */
Chris@4 1017
Chris@4 1018 cmp eax, ebp
Chris@4 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
Chris@4 1020
Chris@4 1021 add ecx, ebp /* nbytes = dist - nbytes */
Chris@4 1022 cmp dword ptr [esp+52], 0
Chris@4 1023 jne L_wrap_around_window /* if (write != 0) */
Chris@4 1024
Chris@4 1025 sub eax, ecx
Chris@4 1026 add esi, eax /* from += wsize - nbytes */
Chris@4 1027
Chris@4 1028 mov eax, [esp+64] /* eax = len */
Chris@4 1029 cmp eax, ecx
Chris@4 1030 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1031
Chris@4 1032 sub eax, ecx /* len -= nbytes */
Chris@4 1033 rep movsb
Chris@4 1034 mov esi, edi
Chris@4 1035 sub esi, ebp /* from = out - dist */
Chris@4 1036 jmp L_do_copy
Chris@4 1037
Chris@4 1038 ALIGN 4
Chris@4 1039 L_wrap_around_window:
Chris@4 1040 mov eax, [esp+52] /* eax = write */
Chris@4 1041 cmp ecx, eax
Chris@4 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
Chris@4 1043
Chris@4 1044 add esi, [esp+48] /* from += wsize */
Chris@4 1045 add esi, eax /* from += write */
Chris@4 1046 sub esi, ecx /* from -= nbytes */
Chris@4 1047 sub ecx, eax /* nbytes -= write */
Chris@4 1048
Chris@4 1049 mov eax, [esp+64] /* eax = len */
Chris@4 1050 cmp eax, ecx
Chris@4 1051 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1052
Chris@4 1053 sub eax, ecx /* len -= nbytes */
Chris@4 1054 rep movsb
Chris@4 1055 mov esi, [esp+28] /* from = window */
Chris@4 1056 mov ecx, [esp+52] /* nbytes = write */
Chris@4 1057 cmp eax, ecx
Chris@4 1058 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1059
Chris@4 1060 sub eax, ecx /* len -= nbytes */
Chris@4 1061 rep movsb
Chris@4 1062 mov esi, edi
Chris@4 1063 sub esi, ebp /* from = out - dist */
Chris@4 1064 jmp L_do_copy
Chris@4 1065
Chris@4 1066 ALIGN 4
Chris@4 1067 L_contiguous_in_window:
Chris@4 1068 add esi, eax
Chris@4 1069 sub esi, ecx /* from += write - nbytes */
Chris@4 1070
Chris@4 1071 mov eax, [esp+64] /* eax = len */
Chris@4 1072 cmp eax, ecx
Chris@4 1073 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1074
Chris@4 1075 sub eax, ecx /* len -= nbytes */
Chris@4 1076 rep movsb
Chris@4 1077 mov esi, edi
Chris@4 1078 sub esi, ebp /* from = out - dist */
Chris@4 1079 jmp L_do_copy
Chris@4 1080
Chris@4 1081 ALIGN 4
Chris@4 1082 L_do_copy:
Chris@4 1083 mov ecx, eax
Chris@4 1084 rep movsb
Chris@4 1085
Chris@4 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 1087 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 1088 jmp L_while_test
Chris@4 1089
Chris@4 1090 L_test_for_end_of_block:
Chris@4 1091 test al, 32
Chris@4 1092 jz L_invalid_literal_length_code
Chris@4 1093 mov dword ptr [esp+72], 1
Chris@4 1094 jmp L_break_loop_with_status
Chris@4 1095
Chris@4 1096 L_invalid_literal_length_code:
Chris@4 1097 mov dword ptr [esp+72], 2
Chris@4 1098 jmp L_break_loop_with_status
Chris@4 1099
Chris@4 1100 L_invalid_distance_code:
Chris@4 1101 mov dword ptr [esp+72], 3
Chris@4 1102 jmp L_break_loop_with_status
Chris@4 1103
Chris@4 1104 L_invalid_distance_too_far:
Chris@4 1105 mov esi, [esp+4]
Chris@4 1106 mov dword ptr [esp+72], 4
Chris@4 1107 jmp L_break_loop_with_status
Chris@4 1108
Chris@4 1109 L_break_loop:
Chris@4 1110 mov dword ptr [esp+72], 0
Chris@4 1111
Chris@4 1112 L_break_loop_with_status:
Chris@4 1113 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 1114 mov [esp+8], esi /* save in */
Chris@4 1115 mov [esp+16], edi /* save out */
Chris@4 1116 mov [esp+44], ebx /* save bits */
Chris@4 1117 mov [esp+40], edx /* save hold */
Chris@4 1118 mov ebp, [esp+4] /* restore esp, ebp */
Chris@4 1119 mov esp, [esp]
Chris@4 1120 }
Chris@4 1121 #else
Chris@4 1122 #error "x86 architecture not defined"
Chris@4 1123 #endif
Chris@4 1124
Chris@4 1125 if (ar.status > 1) {
Chris@4 1126 if (ar.status == 2)
Chris@4 1127 strm->msg = "invalid literal/length code";
Chris@4 1128 else if (ar.status == 3)
Chris@4 1129 strm->msg = "invalid distance code";
Chris@4 1130 else
Chris@4 1131 strm->msg = "invalid distance too far back";
Chris@4 1132 state->mode = BAD;
Chris@4 1133 }
Chris@4 1134 else if ( ar.status == 1 ) {
Chris@4 1135 state->mode = TYPE;
Chris@4 1136 }
Chris@4 1137
Chris@4 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
Chris@4 1139 ar.len = ar.bits >> 3;
Chris@4 1140 ar.in -= ar.len;
Chris@4 1141 ar.bits -= ar.len << 3;
Chris@4 1142 ar.hold &= (1U << ar.bits) - 1;
Chris@4 1143
Chris@4 1144 /* update state and return */
Chris@4 1145 strm->next_in = ar.in;
Chris@4 1146 strm->next_out = ar.out;
Chris@4 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
Chris@4 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
Chris@4 1149 PAD_AVAIL_IN - (ar.in - ar.last));
Chris@4 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
Chris@4 1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
Chris@4 1152 PAD_AVAIL_OUT - (ar.out - ar.end));
Chris@4 1153 state->hold = ar.hold;
Chris@4 1154 state->bits = ar.bits;
Chris@4 1155 return;
Chris@4 1156 }
Chris@4 1157