annotate src/zlib-1.2.7/contrib/inflate86/inffas86.c @ 148:b4bfdf10c4b3

Update Win64 capnp builds to v0.6
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 22 May 2017 18:56:49 +0100
parents 8a15ff55d9af
children
rev   line source
cannam@89 1 /* inffas86.c is a hand tuned assembler version of
cannam@89 2 *
cannam@89 3 * inffast.c -- fast decoding
cannam@89 4 * Copyright (C) 1995-2003 Mark Adler
cannam@89 5 * For conditions of distribution and use, see copyright notice in zlib.h
cannam@89 6 *
cannam@89 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
cannam@89 8 * Please use the copyright conditions above.
cannam@89 9 *
cannam@89 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
cannam@89 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
cannam@89 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
cannam@89 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
cannam@89 14 * from http://fedora.linux.duke.edu/fc1_x86_64
cannam@89 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
cannam@89 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
cannam@89 17 * when decompressing mozilla-source-1.3.tar.gz.
cannam@89 18 *
cannam@89 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
cannam@89 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
cannam@89 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
cannam@89 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
cannam@89 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
cannam@89 24 * enabled. I will attempt to merge the MMX code into this version. Newer
cannam@89 25 * versions of this and inffast.S can be found at
cannam@89 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
cannam@89 27 */
cannam@89 28
cannam@89 29 #include "zutil.h"
cannam@89 30 #include "inftrees.h"
cannam@89 31 #include "inflate.h"
cannam@89 32 #include "inffast.h"
cannam@89 33
cannam@89 34 /* Mark Adler's comments from inffast.c: */
cannam@89 35
cannam@89 36 /*
cannam@89 37 Decode literal, length, and distance codes and write out the resulting
cannam@89 38 literal and match bytes until either not enough input or output is
cannam@89 39 available, an end-of-block is encountered, or a data error is encountered.
cannam@89 40 When large enough input and output buffers are supplied to inflate(), for
cannam@89 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
cannam@89 42 inflate execution time is spent in this routine.
cannam@89 43
cannam@89 44 Entry assumptions:
cannam@89 45
cannam@89 46 state->mode == LEN
cannam@89 47 strm->avail_in >= 6
cannam@89 48 strm->avail_out >= 258
cannam@89 49 start >= strm->avail_out
cannam@89 50 state->bits < 8
cannam@89 51
cannam@89 52 On return, state->mode is one of:
cannam@89 53
cannam@89 54 LEN -- ran out of enough output space or enough available input
cannam@89 55 TYPE -- reached end of block code, inflate() to interpret next block
cannam@89 56 BAD -- error in block data
cannam@89 57
cannam@89 58 Notes:
cannam@89 59
cannam@89 60 - The maximum input bits used by a length/distance pair is 15 bits for the
cannam@89 61 length code, 5 bits for the length extra, 15 bits for the distance code,
cannam@89 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
cannam@89 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
cannam@89 64 checking for available input while decoding.
cannam@89 65
cannam@89 66 - The maximum bytes that a single length/distance pair can output is 258
cannam@89 67 bytes, which is the maximum length that can be coded. inflate_fast()
cannam@89 68 requires strm->avail_out >= 258 for each loop to avoid checking for
cannam@89 69 output space.
cannam@89 70 */
cannam@89 71 void inflate_fast(strm, start)
cannam@89 72 z_streamp strm;
cannam@89 73 unsigned start; /* inflate()'s starting value for strm->avail_out */
cannam@89 74 {
cannam@89 75 struct inflate_state FAR *state;
cannam@89 76 struct inffast_ar {
cannam@89 77 /* 64 32 x86 x86_64 */
cannam@89 78 /* ar offset register */
cannam@89 79 /* 0 0 */ void *esp; /* esp save */
cannam@89 80 /* 8 4 */ void *ebp; /* ebp save */
cannam@89 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
cannam@89 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
cannam@89 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
cannam@89 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
cannam@89 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
cannam@89 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
cannam@89 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
cannam@89 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
cannam@89 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
cannam@89 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
cannam@89 91 /* 92 48 */ unsigned wsize; /* window size */
cannam@89 92 /* 96 52 */ unsigned write; /* window write index */
cannam@89 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
cannam@89 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
cannam@89 95 /*108 64 */ unsigned len; /* r14 match length */
cannam@89 96 /*112 68 */ unsigned dist; /* r15 match distance */
cannam@89 97 /*116 72 */ unsigned status; /* set when state chng*/
cannam@89 98 } ar;
cannam@89 99
cannam@89 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
cannam@89 101 #define PAD_AVAIL_IN 6
cannam@89 102 #define PAD_AVAIL_OUT 258
cannam@89 103 #else
cannam@89 104 #define PAD_AVAIL_IN 5
cannam@89 105 #define PAD_AVAIL_OUT 257
cannam@89 106 #endif
cannam@89 107
cannam@89 108 /* copy state to local variables */
cannam@89 109 state = (struct inflate_state FAR *)strm->state;
cannam@89 110 ar.in = strm->next_in;
cannam@89 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
cannam@89 112 ar.out = strm->next_out;
cannam@89 113 ar.beg = ar.out - (start - strm->avail_out);
cannam@89 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
cannam@89 115 ar.wsize = state->wsize;
cannam@89 116 ar.write = state->wnext;
cannam@89 117 ar.window = state->window;
cannam@89 118 ar.hold = state->hold;
cannam@89 119 ar.bits = state->bits;
cannam@89 120 ar.lcode = state->lencode;
cannam@89 121 ar.dcode = state->distcode;
cannam@89 122 ar.lmask = (1U << state->lenbits) - 1;
cannam@89 123 ar.dmask = (1U << state->distbits) - 1;
cannam@89 124
cannam@89 125 /* decode literals and length/distances until end-of-block or not enough
cannam@89 126 input data or output space */
cannam@89 127
cannam@89 128 /* align in on 1/2 hold size boundary */
cannam@89 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
cannam@89 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
cannam@89 131 ar.bits += 8;
cannam@89 132 }
cannam@89 133
cannam@89 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
cannam@89 135 __asm__ __volatile__ (
cannam@89 136 " leaq %0, %%rax\n"
cannam@89 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
cannam@89 138 " movq %%rsp, (%%rax)\n"
cannam@89 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
cannam@89 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
cannam@89 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
cannam@89 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
cannam@89 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
cannam@89 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
cannam@89 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
cannam@89 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
cannam@89 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
cannam@89 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
cannam@89 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
cannam@89 150 /* r14d = len */
cannam@89 151 /* r15d = dist */
cannam@89 152 " cld\n"
cannam@89 153 " cmpq %%rdi, %%r10\n"
cannam@89 154 " je .L_one_time\n" /* if only one decode left */
cannam@89 155 " cmpq %%rsi, %%r9\n"
cannam@89 156 " je .L_one_time\n"
cannam@89 157 " jmp .L_do_loop\n"
cannam@89 158
cannam@89 159 ".L_one_time:\n"
cannam@89 160 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@89 161 " cmpb $32, %%bl\n"
cannam@89 162 " ja .L_get_length_code_one_time\n"
cannam@89 163
cannam@89 164 " lodsl\n" /* eax = *(uint *)in++ */
cannam@89 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 166 " addb $32, %%bl\n" /* bits += 32 */
cannam@89 167 " shlq %%cl, %%rax\n"
cannam@89 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@89 169 " jmp .L_get_length_code_one_time\n"
cannam@89 170
cannam@89 171 ".align 32,0x90\n"
cannam@89 172 ".L_while_test:\n"
cannam@89 173 " cmpq %%rdi, %%r10\n"
cannam@89 174 " jbe .L_break_loop\n"
cannam@89 175 " cmpq %%rsi, %%r9\n"
cannam@89 176 " jbe .L_break_loop\n"
cannam@89 177
cannam@89 178 ".L_do_loop:\n"
cannam@89 179 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@89 180 " cmpb $32, %%bl\n"
cannam@89 181 " ja .L_get_length_code\n" /* if (32 < bits) */
cannam@89 182
cannam@89 183 " lodsl\n" /* eax = *(uint *)in++ */
cannam@89 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 185 " addb $32, %%bl\n" /* bits += 32 */
cannam@89 186 " shlq %%cl, %%rax\n"
cannam@89 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@89 188
cannam@89 189 ".L_get_length_code:\n"
cannam@89 190 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@89 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@89 192
cannam@89 193 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@89 194 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@89 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@89 196
cannam@89 197 " testb %%al, %%al\n"
cannam@89 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@89 199
cannam@89 200 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@89 201 " shrl $16, %%eax\n" /* output this.val char */
cannam@89 202 " stosb\n"
cannam@89 203
cannam@89 204 ".L_get_length_code_one_time:\n"
cannam@89 205 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@89 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@89 207
cannam@89 208 ".L_dolen:\n"
cannam@89 209 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@89 210 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@89 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@89 212
cannam@89 213 " testb %%al, %%al\n"
cannam@89 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@89 215
cannam@89 216 " shrl $16, %%eax\n" /* output this.val char */
cannam@89 217 " stosb\n"
cannam@89 218 " jmp .L_while_test\n"
cannam@89 219
cannam@89 220 ".align 32,0x90\n"
cannam@89 221 ".L_test_for_length_base:\n"
cannam@89 222 " movl %%eax, %%r14d\n" /* len = this */
cannam@89 223 " shrl $16, %%r14d\n" /* len = this.val */
cannam@89 224 " movb %%al, %%cl\n"
cannam@89 225
cannam@89 226 " testb $16, %%al\n"
cannam@89 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
cannam@89 228 " andb $15, %%cl\n" /* op &= 15 */
cannam@89 229 " jz .L_decode_distance\n" /* if (!op) */
cannam@89 230
cannam@89 231 ".L_add_bits_to_len:\n"
cannam@89 232 " subb %%cl, %%bl\n"
cannam@89 233 " xorl %%eax, %%eax\n"
cannam@89 234 " incl %%eax\n"
cannam@89 235 " shll %%cl, %%eax\n"
cannam@89 236 " decl %%eax\n"
cannam@89 237 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 238 " shrq %%cl, %%rdx\n"
cannam@89 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
cannam@89 240
cannam@89 241 ".L_decode_distance:\n"
cannam@89 242 " movq %%r13, %%r8\n" /* r8 = dmask */
cannam@89 243 " cmpb $32, %%bl\n"
cannam@89 244 " ja .L_get_distance_code\n" /* if (32 < bits) */
cannam@89 245
cannam@89 246 " lodsl\n" /* eax = *(uint *)in++ */
cannam@89 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 248 " addb $32, %%bl\n" /* bits += 32 */
cannam@89 249 " shlq %%cl, %%rax\n"
cannam@89 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@89 251
cannam@89 252 ".L_get_distance_code:\n"
cannam@89 253 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@89 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
cannam@89 255
cannam@89 256 ".L_dodist:\n"
cannam@89 257 " movl %%eax, %%r15d\n" /* dist = this */
cannam@89 258 " shrl $16, %%r15d\n" /* dist = this.val */
cannam@89 259 " movb %%ah, %%cl\n"
cannam@89 260 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@89 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@89 262 " movb %%al, %%cl\n" /* cl = this.op */
cannam@89 263
cannam@89 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
cannam@89 265 " jz .L_test_for_second_level_dist\n"
cannam@89 266 " andb $15, %%cl\n" /* op &= 15 */
cannam@89 267 " jz .L_check_dist_one\n"
cannam@89 268
cannam@89 269 ".L_add_bits_to_dist:\n"
cannam@89 270 " subb %%cl, %%bl\n"
cannam@89 271 " xorl %%eax, %%eax\n"
cannam@89 272 " incl %%eax\n"
cannam@89 273 " shll %%cl, %%eax\n"
cannam@89 274 " decl %%eax\n" /* (1 << op) - 1 */
cannam@89 275 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 276 " shrq %%cl, %%rdx\n"
cannam@89 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
cannam@89 278
cannam@89 279 ".L_check_window:\n"
cannam@89 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
cannam@89 281 " movq %%rdi, %%rax\n"
cannam@89 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
cannam@89 283
cannam@89 284 " cmpl %%r15d, %%eax\n"
cannam@89 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
cannam@89 286
cannam@89 287 " movl %%r14d, %%ecx\n" /* ecx = len */
cannam@89 288 " movq %%rdi, %%rsi\n"
cannam@89 289 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@89 290
cannam@89 291 " sarl %%ecx\n"
cannam@89 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
cannam@89 293
cannam@89 294 " rep movsw\n"
cannam@89 295 " movb (%%rsi), %%al\n"
cannam@89 296 " movb %%al, (%%rdi)\n"
cannam@89 297 " incq %%rdi\n"
cannam@89 298
cannam@89 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
cannam@89 300 " jmp .L_while_test\n"
cannam@89 301
cannam@89 302 ".L_copy_two:\n"
cannam@89 303 " rep movsw\n"
cannam@89 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
cannam@89 305 " jmp .L_while_test\n"
cannam@89 306
cannam@89 307 ".align 32,0x90\n"
cannam@89 308 ".L_check_dist_one:\n"
cannam@89 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
cannam@89 310 " jne .L_check_window\n"
cannam@89 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
cannam@89 312 " je .L_check_window\n"
cannam@89 313
cannam@89 314 " movl %%r14d, %%ecx\n" /* ecx = len */
cannam@89 315 " movb -1(%%rdi), %%al\n"
cannam@89 316 " movb %%al, %%ah\n"
cannam@89 317
cannam@89 318 " sarl %%ecx\n"
cannam@89 319 " jnc .L_set_two\n"
cannam@89 320 " movb %%al, (%%rdi)\n"
cannam@89 321 " incq %%rdi\n"
cannam@89 322
cannam@89 323 ".L_set_two:\n"
cannam@89 324 " rep stosw\n"
cannam@89 325 " jmp .L_while_test\n"
cannam@89 326
cannam@89 327 ".align 32,0x90\n"
cannam@89 328 ".L_test_for_second_level_length:\n"
cannam@89 329 " testb $64, %%al\n"
cannam@89 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
cannam@89 331
cannam@89 332 " xorl %%eax, %%eax\n"
cannam@89 333 " incl %%eax\n"
cannam@89 334 " shll %%cl, %%eax\n"
cannam@89 335 " decl %%eax\n"
cannam@89 336 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 337 " addl %%r14d, %%eax\n" /* eax += len */
cannam@89 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
cannam@89 339 " jmp .L_dolen\n"
cannam@89 340
cannam@89 341 ".align 32,0x90\n"
cannam@89 342 ".L_test_for_second_level_dist:\n"
cannam@89 343 " testb $64, %%al\n"
cannam@89 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
cannam@89 345
cannam@89 346 " xorl %%eax, %%eax\n"
cannam@89 347 " incl %%eax\n"
cannam@89 348 " shll %%cl, %%eax\n"
cannam@89 349 " decl %%eax\n"
cannam@89 350 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 351 " addl %%r15d, %%eax\n" /* eax += dist */
cannam@89 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
cannam@89 353 " jmp .L_dodist\n"
cannam@89 354
cannam@89 355 ".align 32,0x90\n"
cannam@89 356 ".L_clip_window:\n"
cannam@89 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
cannam@89 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
cannam@89 359 " negl %%ecx\n" /* nbytes = -nbytes */
cannam@89 360
cannam@89 361 " cmpl %%r15d, %%eax\n"
cannam@89 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
cannam@89 363
cannam@89 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
cannam@89 365 " cmpl $0, 96(%%rsp)\n"
cannam@89 366 " jne .L_wrap_around_window\n" /* if (write != 0) */
cannam@89 367
cannam@89 368 " movq 56(%%rsp), %%rsi\n" /* from = window */
cannam@89 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
cannam@89 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
cannam@89 371
cannam@89 372 " movl %%r14d, %%eax\n" /* eax = len */
cannam@89 373 " cmpl %%ecx, %%r14d\n"
cannam@89 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 375
cannam@89 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
cannam@89 377 " rep movsb\n"
cannam@89 378 " movq %%rdi, %%rsi\n"
cannam@89 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
cannam@89 380 " jmp .L_do_copy\n"
cannam@89 381
cannam@89 382 ".align 32,0x90\n"
cannam@89 383 ".L_wrap_around_window:\n"
cannam@89 384 " movl 96(%%rsp), %%eax\n" /* eax = write */
cannam@89 385 " cmpl %%eax, %%ecx\n"
cannam@89 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
cannam@89 387
cannam@89 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
cannam@89 389 " addq 56(%%rsp), %%rsi\n" /* from += window */
cannam@89 390 " addq %%rax, %%rsi\n" /* from += write */
cannam@89 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
cannam@89 392 " subl %%eax, %%ecx\n" /* nbytes -= write */
cannam@89 393
cannam@89 394 " movl %%r14d, %%eax\n" /* eax = len */
cannam@89 395 " cmpl %%ecx, %%eax\n"
cannam@89 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 397
cannam@89 398 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 399 " rep movsb\n"
cannam@89 400 " movq 56(%%rsp), %%rsi\n" /* from = window */
cannam@89 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
cannam@89 402 " cmpl %%ecx, %%eax\n"
cannam@89 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 404
cannam@89 405 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 406 " rep movsb\n"
cannam@89 407 " movq %%rdi, %%rsi\n"
cannam@89 408 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@89 409 " jmp .L_do_copy\n"
cannam@89 410
cannam@89 411 ".align 32,0x90\n"
cannam@89 412 ".L_contiguous_in_window:\n"
cannam@89 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
cannam@89 414 " addq %%rax, %%rsi\n"
cannam@89 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
cannam@89 416
cannam@89 417 " movl %%r14d, %%eax\n" /* eax = len */
cannam@89 418 " cmpl %%ecx, %%eax\n"
cannam@89 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 420
cannam@89 421 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 422 " rep movsb\n"
cannam@89 423 " movq %%rdi, %%rsi\n"
cannam@89 424 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@89 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 426
cannam@89 427 ".align 32,0x90\n"
cannam@89 428 ".L_do_copy:\n"
cannam@89 429 " movl %%eax, %%ecx\n" /* ecx = len */
cannam@89 430 " rep movsb\n"
cannam@89 431
cannam@89 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
cannam@89 433 " jmp .L_while_test\n"
cannam@89 434
cannam@89 435 ".L_test_for_end_of_block:\n"
cannam@89 436 " testb $32, %%al\n"
cannam@89 437 " jz .L_invalid_literal_length_code\n"
cannam@89 438 " movl $1, 116(%%rsp)\n"
cannam@89 439 " jmp .L_break_loop_with_status\n"
cannam@89 440
cannam@89 441 ".L_invalid_literal_length_code:\n"
cannam@89 442 " movl $2, 116(%%rsp)\n"
cannam@89 443 " jmp .L_break_loop_with_status\n"
cannam@89 444
cannam@89 445 ".L_invalid_distance_code:\n"
cannam@89 446 " movl $3, 116(%%rsp)\n"
cannam@89 447 " jmp .L_break_loop_with_status\n"
cannam@89 448
cannam@89 449 ".L_invalid_distance_too_far:\n"
cannam@89 450 " movl $4, 116(%%rsp)\n"
cannam@89 451 " jmp .L_break_loop_with_status\n"
cannam@89 452
cannam@89 453 ".L_break_loop:\n"
cannam@89 454 " movl $0, 116(%%rsp)\n"
cannam@89 455
cannam@89 456 ".L_break_loop_with_status:\n"
cannam@89 457 /* put in, out, bits, and hold back into ar and pop esp */
cannam@89 458 " movq %%rsi, 16(%%rsp)\n" /* in */
cannam@89 459 " movq %%rdi, 32(%%rsp)\n" /* out */
cannam@89 460 " movl %%ebx, 88(%%rsp)\n" /* bits */
cannam@89 461 " movq %%rdx, 80(%%rsp)\n" /* hold */
cannam@89 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
cannam@89 463 " movq 8(%%rsp), %%rbp\n"
cannam@89 464 " movq %%rax, %%rsp\n"
cannam@89 465 :
cannam@89 466 : "m" (ar)
cannam@89 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
cannam@89 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
cannam@89 469 );
cannam@89 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
cannam@89 471 __asm__ __volatile__ (
cannam@89 472 " leal %0, %%eax\n"
cannam@89 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
cannam@89 474 " movl %%ebp, 4(%%eax)\n"
cannam@89 475 " movl %%eax, %%esp\n"
cannam@89 476 " movl 8(%%esp), %%esi\n" /* esi = in */
cannam@89 477 " movl 16(%%esp), %%edi\n" /* edi = out */
cannam@89 478 " movl 40(%%esp), %%edx\n" /* edx = hold */
cannam@89 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
cannam@89 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@89 481
cannam@89 482 " cld\n"
cannam@89 483 " jmp .L_do_loop\n"
cannam@89 484
cannam@89 485 ".align 32,0x90\n"
cannam@89 486 ".L_while_test:\n"
cannam@89 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
cannam@89 488 " jbe .L_break_loop\n"
cannam@89 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
cannam@89 490 " jbe .L_break_loop\n"
cannam@89 491
cannam@89 492 ".L_do_loop:\n"
cannam@89 493 " cmpb $15, %%bl\n"
cannam@89 494 " ja .L_get_length_code\n" /* if (15 < bits) */
cannam@89 495
cannam@89 496 " xorl %%eax, %%eax\n"
cannam@89 497 " lodsw\n" /* al = *(ushort *)in++ */
cannam@89 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 499 " addb $16, %%bl\n" /* bits += 16 */
cannam@89 500 " shll %%cl, %%eax\n"
cannam@89 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@89 502
cannam@89 503 ".L_get_length_code:\n"
cannam@89 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
cannam@89 505 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@89 507
cannam@89 508 ".L_dolen:\n"
cannam@89 509 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@89 510 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@89 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
cannam@89 512
cannam@89 513 " testb %%al, %%al\n"
cannam@89 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@89 515
cannam@89 516 " shrl $16, %%eax\n" /* output this.val char */
cannam@89 517 " stosb\n"
cannam@89 518 " jmp .L_while_test\n"
cannam@89 519
cannam@89 520 ".align 32,0x90\n"
cannam@89 521 ".L_test_for_length_base:\n"
cannam@89 522 " movl %%eax, %%ecx\n" /* len = this */
cannam@89 523 " shrl $16, %%ecx\n" /* len = this.val */
cannam@89 524 " movl %%ecx, 64(%%esp)\n" /* save len */
cannam@89 525 " movb %%al, %%cl\n"
cannam@89 526
cannam@89 527 " testb $16, %%al\n"
cannam@89 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
cannam@89 529 " andb $15, %%cl\n" /* op &= 15 */
cannam@89 530 " jz .L_decode_distance\n" /* if (!op) */
cannam@89 531 " cmpb %%cl, %%bl\n"
cannam@89 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
cannam@89 533
cannam@89 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
cannam@89 535 " xorl %%eax, %%eax\n"
cannam@89 536 " lodsw\n" /* al = *(ushort *)in++ */
cannam@89 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 538 " addb $16, %%bl\n" /* bits += 16 */
cannam@89 539 " shll %%cl, %%eax\n"
cannam@89 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@89 541 " movb %%ch, %%cl\n" /* move op back to ecx */
cannam@89 542
cannam@89 543 ".L_add_bits_to_len:\n"
cannam@89 544 " subb %%cl, %%bl\n"
cannam@89 545 " xorl %%eax, %%eax\n"
cannam@89 546 " incl %%eax\n"
cannam@89 547 " shll %%cl, %%eax\n"
cannam@89 548 " decl %%eax\n"
cannam@89 549 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 550 " shrl %%cl, %%edx\n"
cannam@89 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
cannam@89 552
cannam@89 553 ".L_decode_distance:\n"
cannam@89 554 " cmpb $15, %%bl\n"
cannam@89 555 " ja .L_get_distance_code\n" /* if (15 < bits) */
cannam@89 556
cannam@89 557 " xorl %%eax, %%eax\n"
cannam@89 558 " lodsw\n" /* al = *(ushort *)in++ */
cannam@89 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 560 " addb $16, %%bl\n" /* bits += 16 */
cannam@89 561 " shll %%cl, %%eax\n"
cannam@89 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@89 563
cannam@89 564 ".L_get_distance_code:\n"
cannam@89 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
cannam@89 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
cannam@89 567 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
cannam@89 569
cannam@89 570 ".L_dodist:\n"
cannam@89 571 " movl %%eax, %%ebp\n" /* dist = this */
cannam@89 572 " shrl $16, %%ebp\n" /* dist = this.val */
cannam@89 573 " movb %%ah, %%cl\n"
cannam@89 574 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@89 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
cannam@89 576 " movb %%al, %%cl\n" /* cl = this.op */
cannam@89 577
cannam@89 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
cannam@89 579 " jz .L_test_for_second_level_dist\n"
cannam@89 580 " andb $15, %%cl\n" /* op &= 15 */
cannam@89 581 " jz .L_check_dist_one\n"
cannam@89 582 " cmpb %%cl, %%bl\n"
cannam@89 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
cannam@89 584
cannam@89 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
cannam@89 586 " xorl %%eax, %%eax\n"
cannam@89 587 " lodsw\n" /* al = *(ushort *)in++ */
cannam@89 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@89 589 " addb $16, %%bl\n" /* bits += 16 */
cannam@89 590 " shll %%cl, %%eax\n"
cannam@89 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@89 592 " movb %%ch, %%cl\n" /* move op back to ecx */
cannam@89 593
cannam@89 594 ".L_add_bits_to_dist:\n"
cannam@89 595 " subb %%cl, %%bl\n"
cannam@89 596 " xorl %%eax, %%eax\n"
cannam@89 597 " incl %%eax\n"
cannam@89 598 " shll %%cl, %%eax\n"
cannam@89 599 " decl %%eax\n" /* (1 << op) - 1 */
cannam@89 600 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 601 " shrl %%cl, %%edx\n"
cannam@89 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
cannam@89 603
cannam@89 604 ".L_check_window:\n"
cannam@89 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
cannam@89 606 " movl %%edi, %%eax\n"
cannam@89 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
cannam@89 608
cannam@89 609 " cmpl %%ebp, %%eax\n"
cannam@89 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
cannam@89 611
cannam@89 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
cannam@89 613 " movl %%edi, %%esi\n"
cannam@89 614 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@89 615
cannam@89 616 " sarl %%ecx\n"
cannam@89 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
cannam@89 618
cannam@89 619 " rep movsw\n"
cannam@89 620 " movb (%%esi), %%al\n"
cannam@89 621 " movb %%al, (%%edi)\n"
cannam@89 622 " incl %%edi\n"
cannam@89 623
cannam@89 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@89 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@89 626 " jmp .L_while_test\n"
cannam@89 627
cannam@89 628 ".L_copy_two:\n"
cannam@89 629 " rep movsw\n"
cannam@89 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@89 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@89 632 " jmp .L_while_test\n"
cannam@89 633
cannam@89 634 ".align 32,0x90\n"
cannam@89 635 ".L_check_dist_one:\n"
cannam@89 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
cannam@89 637 " jne .L_check_window\n"
cannam@89 638 " cmpl %%edi, 20(%%esp)\n"
cannam@89 639 " je .L_check_window\n" /* out == beg, if outside window */
cannam@89 640
cannam@89 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
cannam@89 642 " movb -1(%%edi), %%al\n"
cannam@89 643 " movb %%al, %%ah\n"
cannam@89 644
cannam@89 645 " sarl %%ecx\n"
cannam@89 646 " jnc .L_set_two\n"
cannam@89 647 " movb %%al, (%%edi)\n"
cannam@89 648 " incl %%edi\n"
cannam@89 649
cannam@89 650 ".L_set_two:\n"
cannam@89 651 " rep stosw\n"
cannam@89 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@89 653 " jmp .L_while_test\n"
cannam@89 654
cannam@89 655 ".align 32,0x90\n"
cannam@89 656 ".L_test_for_second_level_length:\n"
cannam@89 657 " testb $64, %%al\n"
cannam@89 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
cannam@89 659
cannam@89 660 " xorl %%eax, %%eax\n"
cannam@89 661 " incl %%eax\n"
cannam@89 662 " shll %%cl, %%eax\n"
cannam@89 663 " decl %%eax\n"
cannam@89 664 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 665 " addl 64(%%esp), %%eax\n" /* eax += len */
cannam@89 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
cannam@89 667 " jmp .L_dolen\n"
cannam@89 668
cannam@89 669 ".align 32,0x90\n"
cannam@89 670 ".L_test_for_second_level_dist:\n"
cannam@89 671 " testb $64, %%al\n"
cannam@89 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
cannam@89 673
cannam@89 674 " xorl %%eax, %%eax\n"
cannam@89 675 " incl %%eax\n"
cannam@89 676 " shll %%cl, %%eax\n"
cannam@89 677 " decl %%eax\n"
cannam@89 678 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@89 679 " addl %%ebp, %%eax\n" /* eax += dist */
cannam@89 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
cannam@89 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
cannam@89 682 " jmp .L_dodist\n"
cannam@89 683
cannam@89 684 ".align 32,0x90\n"
cannam@89 685 ".L_clip_window:\n"
cannam@89 686 " movl %%eax, %%ecx\n"
cannam@89 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
cannam@89 688 " negl %%ecx\n" /* nbytes = -nbytes */
cannam@89 689 " movl 28(%%esp), %%esi\n" /* from = window */
cannam@89 690
cannam@89 691 " cmpl %%ebp, %%eax\n"
cannam@89 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
cannam@89 693
cannam@89 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
cannam@89 695 " cmpl $0, 52(%%esp)\n"
cannam@89 696 " jne .L_wrap_around_window\n" /* if (write != 0) */
cannam@89 697
cannam@89 698 " subl %%ecx, %%eax\n"
cannam@89 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
cannam@89 700
cannam@89 701 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@89 702 " cmpl %%ecx, %%eax\n"
cannam@89 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 704
cannam@89 705 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 706 " rep movsb\n"
cannam@89 707 " movl %%edi, %%esi\n"
cannam@89 708 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@89 709 " jmp .L_do_copy\n"
cannam@89 710
cannam@89 711 ".align 32,0x90\n"
cannam@89 712 ".L_wrap_around_window:\n"
cannam@89 713 " movl 52(%%esp), %%eax\n" /* eax = write */
cannam@89 714 " cmpl %%eax, %%ecx\n"
cannam@89 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
cannam@89 716
cannam@89 717 " addl 48(%%esp), %%esi\n" /* from += wsize */
cannam@89 718 " addl %%eax, %%esi\n" /* from += write */
cannam@89 719 " subl %%ecx, %%esi\n" /* from -= nbytes */
cannam@89 720 " subl %%eax, %%ecx\n" /* nbytes -= write */
cannam@89 721
cannam@89 722 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@89 723 " cmpl %%ecx, %%eax\n"
cannam@89 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 725
cannam@89 726 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 727 " rep movsb\n"
cannam@89 728 " movl 28(%%esp), %%esi\n" /* from = window */
cannam@89 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
cannam@89 730 " cmpl %%ecx, %%eax\n"
cannam@89 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 732
cannam@89 733 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 734 " rep movsb\n"
cannam@89 735 " movl %%edi, %%esi\n"
cannam@89 736 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@89 737 " jmp .L_do_copy\n"
cannam@89 738
cannam@89 739 ".align 32,0x90\n"
cannam@89 740 ".L_contiguous_in_window:\n"
cannam@89 741 " addl %%eax, %%esi\n"
cannam@89 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
cannam@89 743
cannam@89 744 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@89 745 " cmpl %%ecx, %%eax\n"
cannam@89 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 747
cannam@89 748 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@89 749 " rep movsb\n"
cannam@89 750 " movl %%edi, %%esi\n"
cannam@89 751 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@89 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
cannam@89 753
cannam@89 754 ".align 32,0x90\n"
cannam@89 755 ".L_do_copy:\n"
cannam@89 756 " movl %%eax, %%ecx\n"
cannam@89 757 " rep movsb\n"
cannam@89 758
cannam@89 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@89 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@89 761 " jmp .L_while_test\n"
cannam@89 762
cannam@89 763 ".L_test_for_end_of_block:\n"
cannam@89 764 " testb $32, %%al\n"
cannam@89 765 " jz .L_invalid_literal_length_code\n"
cannam@89 766 " movl $1, 72(%%esp)\n"
cannam@89 767 " jmp .L_break_loop_with_status\n"
cannam@89 768
cannam@89 769 ".L_invalid_literal_length_code:\n"
cannam@89 770 " movl $2, 72(%%esp)\n"
cannam@89 771 " jmp .L_break_loop_with_status\n"
cannam@89 772
cannam@89 773 ".L_invalid_distance_code:\n"
cannam@89 774 " movl $3, 72(%%esp)\n"
cannam@89 775 " jmp .L_break_loop_with_status\n"
cannam@89 776
cannam@89 777 ".L_invalid_distance_too_far:\n"
cannam@89 778 " movl 8(%%esp), %%esi\n"
cannam@89 779 " movl $4, 72(%%esp)\n"
cannam@89 780 " jmp .L_break_loop_with_status\n"
cannam@89 781
cannam@89 782 ".L_break_loop:\n"
cannam@89 783 " movl $0, 72(%%esp)\n"
cannam@89 784
cannam@89 785 ".L_break_loop_with_status:\n"
cannam@89 786 /* put in, out, bits, and hold back into ar and pop esp */
cannam@89 787 " movl %%esi, 8(%%esp)\n" /* save in */
cannam@89 788 " movl %%edi, 16(%%esp)\n" /* save out */
cannam@89 789 " movl %%ebx, 44(%%esp)\n" /* save bits */
cannam@89 790 " movl %%edx, 40(%%esp)\n" /* save hold */
cannam@89 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
cannam@89 792 " movl (%%esp), %%esp\n"
cannam@89 793 :
cannam@89 794 : "m" (ar)
cannam@89 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
cannam@89 796 );
cannam@89 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
cannam@89 798 __asm {
cannam@89 799 lea eax, ar
cannam@89 800 mov [eax], esp /* save esp, ebp */
cannam@89 801 mov [eax+4], ebp
cannam@89 802 mov esp, eax
cannam@89 803 mov esi, [esp+8] /* esi = in */
cannam@89 804 mov edi, [esp+16] /* edi = out */
cannam@89 805 mov edx, [esp+40] /* edx = hold */
cannam@89 806 mov ebx, [esp+44] /* ebx = bits */
cannam@89 807 mov ebp, [esp+32] /* ebp = lcode */
cannam@89 808
cannam@89 809 cld
cannam@89 810 jmp L_do_loop
cannam@89 811
cannam@89 812 ALIGN 4
cannam@89 813 L_while_test:
cannam@89 814 cmp [esp+24], edi
cannam@89 815 jbe L_break_loop
cannam@89 816 cmp [esp+12], esi
cannam@89 817 jbe L_break_loop
cannam@89 818
cannam@89 819 L_do_loop:
cannam@89 820 cmp bl, 15
cannam@89 821 ja L_get_length_code /* if (15 < bits) */
cannam@89 822
cannam@89 823 xor eax, eax
cannam@89 824 lodsw /* al = *(ushort *)in++ */
cannam@89 825 mov cl, bl /* cl = bits, needs it for shifting */
cannam@89 826 add bl, 16 /* bits += 16 */
cannam@89 827 shl eax, cl
cannam@89 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@89 829
cannam@89 830 L_get_length_code:
cannam@89 831 mov eax, [esp+56] /* eax = lmask */
cannam@89 832 and eax, edx /* eax &= hold */
cannam@89 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
cannam@89 834
cannam@89 835 L_dolen:
cannam@89 836 mov cl, ah /* cl = this.bits */
cannam@89 837 sub bl, ah /* bits -= this.bits */
cannam@89 838 shr edx, cl /* hold >>= this.bits */
cannam@89 839
cannam@89 840 test al, al
cannam@89 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
cannam@89 842
cannam@89 843 shr eax, 16 /* output this.val char */
cannam@89 844 stosb
cannam@89 845 jmp L_while_test
cannam@89 846
cannam@89 847 ALIGN 4
cannam@89 848 L_test_for_length_base:
cannam@89 849 mov ecx, eax /* len = this */
cannam@89 850 shr ecx, 16 /* len = this.val */
cannam@89 851 mov [esp+64], ecx /* save len */
cannam@89 852 mov cl, al
cannam@89 853
cannam@89 854 test al, 16
cannam@89 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
cannam@89 856 and cl, 15 /* op &= 15 */
cannam@89 857 jz L_decode_distance /* if (!op) */
cannam@89 858 cmp bl, cl
cannam@89 859 jae L_add_bits_to_len /* if (op <= bits) */
cannam@89 860
cannam@89 861 mov ch, cl /* stash op in ch, freeing cl */
cannam@89 862 xor eax, eax
cannam@89 863 lodsw /* al = *(ushort *)in++ */
cannam@89 864 mov cl, bl /* cl = bits, needs it for shifting */
cannam@89 865 add bl, 16 /* bits += 16 */
cannam@89 866 shl eax, cl
cannam@89 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@89 868 mov cl, ch /* move op back to ecx */
cannam@89 869
cannam@89 870 L_add_bits_to_len:
cannam@89 871 sub bl, cl
cannam@89 872 xor eax, eax
cannam@89 873 inc eax
cannam@89 874 shl eax, cl
cannam@89 875 dec eax
cannam@89 876 and eax, edx /* eax &= hold */
cannam@89 877 shr edx, cl
cannam@89 878 add [esp+64], eax /* len += hold & mask[op] */
cannam@89 879
cannam@89 880 L_decode_distance:
cannam@89 881 cmp bl, 15
cannam@89 882 ja L_get_distance_code /* if (15 < bits) */
cannam@89 883
cannam@89 884 xor eax, eax
cannam@89 885 lodsw /* al = *(ushort *)in++ */
cannam@89 886 mov cl, bl /* cl = bits, needs it for shifting */
cannam@89 887 add bl, 16 /* bits += 16 */
cannam@89 888 shl eax, cl
cannam@89 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@89 890
cannam@89 891 L_get_distance_code:
cannam@89 892 mov eax, [esp+60] /* eax = dmask */
cannam@89 893 mov ecx, [esp+36] /* ecx = dcode */
cannam@89 894 and eax, edx /* eax &= hold */
cannam@89 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
cannam@89 896
cannam@89 897 L_dodist:
cannam@89 898 mov ebp, eax /* dist = this */
cannam@89 899 shr ebp, 16 /* dist = this.val */
cannam@89 900 mov cl, ah
cannam@89 901 sub bl, ah /* bits -= this.bits */
cannam@89 902 shr edx, cl /* hold >>= this.bits */
cannam@89 903 mov cl, al /* cl = this.op */
cannam@89 904
cannam@89 905 test al, 16 /* if ((op & 16) == 0) */
cannam@89 906 jz L_test_for_second_level_dist
cannam@89 907 and cl, 15 /* op &= 15 */
cannam@89 908 jz L_check_dist_one
cannam@89 909 cmp bl, cl
cannam@89 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
cannam@89 911
cannam@89 912 mov ch, cl /* stash op in ch, freeing cl */
cannam@89 913 xor eax, eax
cannam@89 914 lodsw /* al = *(ushort *)in++ */
cannam@89 915 mov cl, bl /* cl = bits, needs it for shifting */
cannam@89 916 add bl, 16 /* bits += 16 */
cannam@89 917 shl eax, cl
cannam@89 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@89 919 mov cl, ch /* move op back to ecx */
cannam@89 920
cannam@89 921 L_add_bits_to_dist:
cannam@89 922 sub bl, cl
cannam@89 923 xor eax, eax
cannam@89 924 inc eax
cannam@89 925 shl eax, cl
cannam@89 926 dec eax /* (1 << op) - 1 */
cannam@89 927 and eax, edx /* eax &= hold */
cannam@89 928 shr edx, cl
cannam@89 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
cannam@89 930
cannam@89 931 L_check_window:
cannam@89 932 mov [esp+8], esi /* save in so from can use it's reg */
cannam@89 933 mov eax, edi
cannam@89 934 sub eax, [esp+20] /* nbytes = out - beg */
cannam@89 935
cannam@89 936 cmp eax, ebp
cannam@89 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
cannam@89 938
cannam@89 939 mov ecx, [esp+64] /* ecx = len */
cannam@89 940 mov esi, edi
cannam@89 941 sub esi, ebp /* from = out - dist */
cannam@89 942
cannam@89 943 sar ecx, 1
cannam@89 944 jnc L_copy_two
cannam@89 945
cannam@89 946 rep movsw
cannam@89 947 mov al, [esi]
cannam@89 948 mov [edi], al
cannam@89 949 inc edi
cannam@89 950
cannam@89 951 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@89 952 mov ebp, [esp+32] /* ebp = lcode */
cannam@89 953 jmp L_while_test
cannam@89 954
cannam@89 955 L_copy_two:
cannam@89 956 rep movsw
cannam@89 957 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@89 958 mov ebp, [esp+32] /* ebp = lcode */
cannam@89 959 jmp L_while_test
cannam@89 960
cannam@89 961 ALIGN 4
cannam@89 962 L_check_dist_one:
cannam@89 963 cmp ebp, 1 /* if dist 1, is a memset */
cannam@89 964 jne L_check_window
cannam@89 965 cmp [esp+20], edi
cannam@89 966 je L_check_window /* out == beg, if outside window */
cannam@89 967
cannam@89 968 mov ecx, [esp+64] /* ecx = len */
cannam@89 969 mov al, [edi-1]
cannam@89 970 mov ah, al
cannam@89 971
cannam@89 972 sar ecx, 1
cannam@89 973 jnc L_set_two
cannam@89 974 mov [edi], al /* memset out with from[-1] */
cannam@89 975 inc edi
cannam@89 976
cannam@89 977 L_set_two:
cannam@89 978 rep stosw
cannam@89 979 mov ebp, [esp+32] /* ebp = lcode */
cannam@89 980 jmp L_while_test
cannam@89 981
cannam@89 982 ALIGN 4
cannam@89 983 L_test_for_second_level_length:
cannam@89 984 test al, 64
cannam@89 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
cannam@89 986
cannam@89 987 xor eax, eax
cannam@89 988 inc eax
cannam@89 989 shl eax, cl
cannam@89 990 dec eax
cannam@89 991 and eax, edx /* eax &= hold */
cannam@89 992 add eax, [esp+64] /* eax += len */
cannam@89 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
cannam@89 994 jmp L_dolen
cannam@89 995
cannam@89 996 ALIGN 4
cannam@89 997 L_test_for_second_level_dist:
cannam@89 998 test al, 64
cannam@89 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
cannam@89 1000
cannam@89 1001 xor eax, eax
cannam@89 1002 inc eax
cannam@89 1003 shl eax, cl
cannam@89 1004 dec eax
cannam@89 1005 and eax, edx /* eax &= hold */
cannam@89 1006 add eax, ebp /* eax += dist */
cannam@89 1007 mov ecx, [esp+36] /* ecx = dcode */
cannam@89 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
cannam@89 1009 jmp L_dodist
cannam@89 1010
cannam@89 1011 ALIGN 4
cannam@89 1012 L_clip_window:
cannam@89 1013 mov ecx, eax
cannam@89 1014 mov eax, [esp+48] /* eax = wsize */
cannam@89 1015 neg ecx /* nbytes = -nbytes */
cannam@89 1016 mov esi, [esp+28] /* from = window */
cannam@89 1017
cannam@89 1018 cmp eax, ebp
cannam@89 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
cannam@89 1020
cannam@89 1021 add ecx, ebp /* nbytes = dist - nbytes */
cannam@89 1022 cmp dword ptr [esp+52], 0
cannam@89 1023 jne L_wrap_around_window /* if (write != 0) */
cannam@89 1024
cannam@89 1025 sub eax, ecx
cannam@89 1026 add esi, eax /* from += wsize - nbytes */
cannam@89 1027
cannam@89 1028 mov eax, [esp+64] /* eax = len */
cannam@89 1029 cmp eax, ecx
cannam@89 1030 jbe L_do_copy /* if (nbytes >= len) */
cannam@89 1031
cannam@89 1032 sub eax, ecx /* len -= nbytes */
cannam@89 1033 rep movsb
cannam@89 1034 mov esi, edi
cannam@89 1035 sub esi, ebp /* from = out - dist */
cannam@89 1036 jmp L_do_copy
cannam@89 1037
cannam@89 1038 ALIGN 4
cannam@89 1039 L_wrap_around_window:
cannam@89 1040 mov eax, [esp+52] /* eax = write */
cannam@89 1041 cmp ecx, eax
cannam@89 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
cannam@89 1043
cannam@89 1044 add esi, [esp+48] /* from += wsize */
cannam@89 1045 add esi, eax /* from += write */
cannam@89 1046 sub esi, ecx /* from -= nbytes */
cannam@89 1047 sub ecx, eax /* nbytes -= write */
cannam@89 1048
cannam@89 1049 mov eax, [esp+64] /* eax = len */
cannam@89 1050 cmp eax, ecx
cannam@89 1051 jbe L_do_copy /* if (nbytes >= len) */
cannam@89 1052
cannam@89 1053 sub eax, ecx /* len -= nbytes */
cannam@89 1054 rep movsb
cannam@89 1055 mov esi, [esp+28] /* from = window */
cannam@89 1056 mov ecx, [esp+52] /* nbytes = write */
cannam@89 1057 cmp eax, ecx
cannam@89 1058 jbe L_do_copy /* if (nbytes >= len) */
cannam@89 1059
cannam@89 1060 sub eax, ecx /* len -= nbytes */
cannam@89 1061 rep movsb
cannam@89 1062 mov esi, edi
cannam@89 1063 sub esi, ebp /* from = out - dist */
cannam@89 1064 jmp L_do_copy
cannam@89 1065
cannam@89 1066 ALIGN 4
cannam@89 1067 L_contiguous_in_window:
cannam@89 1068 add esi, eax
cannam@89 1069 sub esi, ecx /* from += write - nbytes */
cannam@89 1070
cannam@89 1071 mov eax, [esp+64] /* eax = len */
cannam@89 1072 cmp eax, ecx
cannam@89 1073 jbe L_do_copy /* if (nbytes >= len) */
cannam@89 1074
cannam@89 1075 sub eax, ecx /* len -= nbytes */
cannam@89 1076 rep movsb
cannam@89 1077 mov esi, edi
cannam@89 1078 sub esi, ebp /* from = out - dist */
cannam@89 1079 jmp L_do_copy
cannam@89 1080
cannam@89 1081 ALIGN 4
cannam@89 1082 L_do_copy:
cannam@89 1083 mov ecx, eax
cannam@89 1084 rep movsb
cannam@89 1085
cannam@89 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@89 1087 mov ebp, [esp+32] /* ebp = lcode */
cannam@89 1088 jmp L_while_test
cannam@89 1089
cannam@89 1090 L_test_for_end_of_block:
cannam@89 1091 test al, 32
cannam@89 1092 jz L_invalid_literal_length_code
cannam@89 1093 mov dword ptr [esp+72], 1
cannam@89 1094 jmp L_break_loop_with_status
cannam@89 1095
cannam@89 1096 L_invalid_literal_length_code:
cannam@89 1097 mov dword ptr [esp+72], 2
cannam@89 1098 jmp L_break_loop_with_status
cannam@89 1099
cannam@89 1100 L_invalid_distance_code:
cannam@89 1101 mov dword ptr [esp+72], 3
cannam@89 1102 jmp L_break_loop_with_status
cannam@89 1103
cannam@89 1104 L_invalid_distance_too_far:
cannam@89 1105 mov esi, [esp+4]
cannam@89 1106 mov dword ptr [esp+72], 4
cannam@89 1107 jmp L_break_loop_with_status
cannam@89 1108
cannam@89 1109 L_break_loop:
cannam@89 1110 mov dword ptr [esp+72], 0
cannam@89 1111
cannam@89 1112 L_break_loop_with_status:
cannam@89 1113 /* put in, out, bits, and hold back into ar and pop esp */
cannam@89 1114 mov [esp+8], esi /* save in */
cannam@89 1115 mov [esp+16], edi /* save out */
cannam@89 1116 mov [esp+44], ebx /* save bits */
cannam@89 1117 mov [esp+40], edx /* save hold */
cannam@89 1118 mov ebp, [esp+4] /* restore esp, ebp */
cannam@89 1119 mov esp, [esp]
cannam@89 1120 }
cannam@89 1121 #else
cannam@89 1122 #error "x86 architecture not defined"
cannam@89 1123 #endif
cannam@89 1124
cannam@89 1125 if (ar.status > 1) {
cannam@89 1126 if (ar.status == 2)
cannam@89 1127 strm->msg = "invalid literal/length code";
cannam@89 1128 else if (ar.status == 3)
cannam@89 1129 strm->msg = "invalid distance code";
cannam@89 1130 else
cannam@89 1131 strm->msg = "invalid distance too far back";
cannam@89 1132 state->mode = BAD;
cannam@89 1133 }
cannam@89 1134 else if ( ar.status == 1 ) {
cannam@89 1135 state->mode = TYPE;
cannam@89 1136 }
cannam@89 1137
cannam@89 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
cannam@89 1139 ar.len = ar.bits >> 3;
cannam@89 1140 ar.in -= ar.len;
cannam@89 1141 ar.bits -= ar.len << 3;
cannam@89 1142 ar.hold &= (1U << ar.bits) - 1;
cannam@89 1143
cannam@89 1144 /* update state and return */
cannam@89 1145 strm->next_in = ar.in;
cannam@89 1146 strm->next_out = ar.out;
cannam@89 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
cannam@89 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
cannam@89 1149 PAD_AVAIL_IN - (ar.in - ar.last));
cannam@89 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
cannam@89 1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
cannam@89 1152 PAD_AVAIL_OUT - (ar.out - ar.end));
cannam@89 1153 state->hold = ar.hold;
cannam@89 1154 state->bits = ar.bits;
cannam@89 1155 return;
cannam@89 1156 }
cannam@89 1157