annotate src/zlib-1.2.8/contrib/inflate86/inffas86.c @ 155:54abead6ecce

Opus for Windows (MSVC)
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 25 Jan 2019 12:15:58 +0000
parents 5b4145a0d408
children
rev   line source
cannam@128 1 /* inffas86.c is a hand tuned assembler version of
cannam@128 2 *
cannam@128 3 * inffast.c -- fast decoding
cannam@128 4 * Copyright (C) 1995-2003 Mark Adler
cannam@128 5 * For conditions of distribution and use, see copyright notice in zlib.h
cannam@128 6 *
cannam@128 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
cannam@128 8 * Please use the copyright conditions above.
cannam@128 9 *
cannam@128 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
cannam@128 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
cannam@128 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
cannam@128 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
cannam@128 14 * from http://fedora.linux.duke.edu/fc1_x86_64
cannam@128 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
cannam@128 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
cannam@128 17 * when decompressing mozilla-source-1.3.tar.gz.
cannam@128 18 *
cannam@128 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
cannam@128 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
cannam@128 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
cannam@128 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
cannam@128 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
cannam@128 24 * enabled. I will attempt to merge the MMX code into this version. Newer
cannam@128 25 * versions of this and inffast.S can be found at
cannam@128 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
cannam@128 27 */
cannam@128 28
cannam@128 29 #include "zutil.h"
cannam@128 30 #include "inftrees.h"
cannam@128 31 #include "inflate.h"
cannam@128 32 #include "inffast.h"
cannam@128 33
cannam@128 34 /* Mark Adler's comments from inffast.c: */
cannam@128 35
cannam@128 36 /*
cannam@128 37 Decode literal, length, and distance codes and write out the resulting
cannam@128 38 literal and match bytes until either not enough input or output is
cannam@128 39 available, an end-of-block is encountered, or a data error is encountered.
cannam@128 40 When large enough input and output buffers are supplied to inflate(), for
cannam@128 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
cannam@128 42 inflate execution time is spent in this routine.
cannam@128 43
cannam@128 44 Entry assumptions:
cannam@128 45
cannam@128 46 state->mode == LEN
cannam@128 47 strm->avail_in >= 6
cannam@128 48 strm->avail_out >= 258
cannam@128 49 start >= strm->avail_out
cannam@128 50 state->bits < 8
cannam@128 51
cannam@128 52 On return, state->mode is one of:
cannam@128 53
cannam@128 54 LEN -- ran out of enough output space or enough available input
cannam@128 55 TYPE -- reached end of block code, inflate() to interpret next block
cannam@128 56 BAD -- error in block data
cannam@128 57
cannam@128 58 Notes:
cannam@128 59
cannam@128 60 - The maximum input bits used by a length/distance pair is 15 bits for the
cannam@128 61 length code, 5 bits for the length extra, 15 bits for the distance code,
cannam@128 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
cannam@128 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
cannam@128 64 checking for available input while decoding.
cannam@128 65
cannam@128 66 - The maximum bytes that a single length/distance pair can output is 258
cannam@128 67 bytes, which is the maximum length that can be coded. inflate_fast()
cannam@128 68 requires strm->avail_out >= 258 for each loop to avoid checking for
cannam@128 69 output space.
cannam@128 70 */
cannam@128 71 void inflate_fast(strm, start)
cannam@128 72 z_streamp strm;
cannam@128 73 unsigned start; /* inflate()'s starting value for strm->avail_out */
cannam@128 74 {
cannam@128 75 struct inflate_state FAR *state;
cannam@128 76 struct inffast_ar {
cannam@128 77 /* 64 32 x86 x86_64 */
cannam@128 78 /* ar offset register */
cannam@128 79 /* 0 0 */ void *esp; /* esp save */
cannam@128 80 /* 8 4 */ void *ebp; /* ebp save */
cannam@128 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
cannam@128 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
cannam@128 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
cannam@128 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
cannam@128 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
cannam@128 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
cannam@128 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
cannam@128 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
cannam@128 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
cannam@128 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
cannam@128 91 /* 92 48 */ unsigned wsize; /* window size */
cannam@128 92 /* 96 52 */ unsigned write; /* window write index */
cannam@128 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
cannam@128 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
cannam@128 95 /*108 64 */ unsigned len; /* r14 match length */
cannam@128 96 /*112 68 */ unsigned dist; /* r15 match distance */
cannam@128 97 /*116 72 */ unsigned status; /* set when state chng*/
cannam@128 98 } ar;
cannam@128 99
cannam@128 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
cannam@128 101 #define PAD_AVAIL_IN 6
cannam@128 102 #define PAD_AVAIL_OUT 258
cannam@128 103 #else
cannam@128 104 #define PAD_AVAIL_IN 5
cannam@128 105 #define PAD_AVAIL_OUT 257
cannam@128 106 #endif
cannam@128 107
cannam@128 108 /* copy state to local variables */
cannam@128 109 state = (struct inflate_state FAR *)strm->state;
cannam@128 110 ar.in = strm->next_in;
cannam@128 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
cannam@128 112 ar.out = strm->next_out;
cannam@128 113 ar.beg = ar.out - (start - strm->avail_out);
cannam@128 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
cannam@128 115 ar.wsize = state->wsize;
cannam@128 116 ar.write = state->wnext;
cannam@128 117 ar.window = state->window;
cannam@128 118 ar.hold = state->hold;
cannam@128 119 ar.bits = state->bits;
cannam@128 120 ar.lcode = state->lencode;
cannam@128 121 ar.dcode = state->distcode;
cannam@128 122 ar.lmask = (1U << state->lenbits) - 1;
cannam@128 123 ar.dmask = (1U << state->distbits) - 1;
cannam@128 124
cannam@128 125 /* decode literals and length/distances until end-of-block or not enough
cannam@128 126 input data or output space */
cannam@128 127
cannam@128 128 /* align in on 1/2 hold size boundary */
cannam@128 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
cannam@128 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
cannam@128 131 ar.bits += 8;
cannam@128 132 }
cannam@128 133
cannam@128 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
cannam@128 135 __asm__ __volatile__ (
cannam@128 136 " leaq %0, %%rax\n"
cannam@128 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
cannam@128 138 " movq %%rsp, (%%rax)\n"
cannam@128 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
cannam@128 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
cannam@128 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
cannam@128 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
cannam@128 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
cannam@128 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
cannam@128 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
cannam@128 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
cannam@128 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
cannam@128 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
cannam@128 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
cannam@128 150 /* r14d = len */
cannam@128 151 /* r15d = dist */
cannam@128 152 " cld\n"
cannam@128 153 " cmpq %%rdi, %%r10\n"
cannam@128 154 " je .L_one_time\n" /* if only one decode left */
cannam@128 155 " cmpq %%rsi, %%r9\n"
cannam@128 156 " je .L_one_time\n"
cannam@128 157 " jmp .L_do_loop\n"
cannam@128 158
cannam@128 159 ".L_one_time:\n"
cannam@128 160 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@128 161 " cmpb $32, %%bl\n"
cannam@128 162 " ja .L_get_length_code_one_time\n"
cannam@128 163
cannam@128 164 " lodsl\n" /* eax = *(uint *)in++ */
cannam@128 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 166 " addb $32, %%bl\n" /* bits += 32 */
cannam@128 167 " shlq %%cl, %%rax\n"
cannam@128 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@128 169 " jmp .L_get_length_code_one_time\n"
cannam@128 170
cannam@128 171 ".align 32,0x90\n"
cannam@128 172 ".L_while_test:\n"
cannam@128 173 " cmpq %%rdi, %%r10\n"
cannam@128 174 " jbe .L_break_loop\n"
cannam@128 175 " cmpq %%rsi, %%r9\n"
cannam@128 176 " jbe .L_break_loop\n"
cannam@128 177
cannam@128 178 ".L_do_loop:\n"
cannam@128 179 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@128 180 " cmpb $32, %%bl\n"
cannam@128 181 " ja .L_get_length_code\n" /* if (32 < bits) */
cannam@128 182
cannam@128 183 " lodsl\n" /* eax = *(uint *)in++ */
cannam@128 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 185 " addb $32, %%bl\n" /* bits += 32 */
cannam@128 186 " shlq %%cl, %%rax\n"
cannam@128 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@128 188
cannam@128 189 ".L_get_length_code:\n"
cannam@128 190 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@128 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@128 192
cannam@128 193 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@128 194 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@128 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@128 196
cannam@128 197 " testb %%al, %%al\n"
cannam@128 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@128 199
cannam@128 200 " movq %%r12, %%r8\n" /* r8 = lmask */
cannam@128 201 " shrl $16, %%eax\n" /* output this.val char */
cannam@128 202 " stosb\n"
cannam@128 203
cannam@128 204 ".L_get_length_code_one_time:\n"
cannam@128 205 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@128 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@128 207
cannam@128 208 ".L_dolen:\n"
cannam@128 209 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@128 210 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@128 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@128 212
cannam@128 213 " testb %%al, %%al\n"
cannam@128 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@128 215
cannam@128 216 " shrl $16, %%eax\n" /* output this.val char */
cannam@128 217 " stosb\n"
cannam@128 218 " jmp .L_while_test\n"
cannam@128 219
cannam@128 220 ".align 32,0x90\n"
cannam@128 221 ".L_test_for_length_base:\n"
cannam@128 222 " movl %%eax, %%r14d\n" /* len = this */
cannam@128 223 " shrl $16, %%r14d\n" /* len = this.val */
cannam@128 224 " movb %%al, %%cl\n"
cannam@128 225
cannam@128 226 " testb $16, %%al\n"
cannam@128 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
cannam@128 228 " andb $15, %%cl\n" /* op &= 15 */
cannam@128 229 " jz .L_decode_distance\n" /* if (!op) */
cannam@128 230
cannam@128 231 ".L_add_bits_to_len:\n"
cannam@128 232 " subb %%cl, %%bl\n"
cannam@128 233 " xorl %%eax, %%eax\n"
cannam@128 234 " incl %%eax\n"
cannam@128 235 " shll %%cl, %%eax\n"
cannam@128 236 " decl %%eax\n"
cannam@128 237 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 238 " shrq %%cl, %%rdx\n"
cannam@128 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
cannam@128 240
cannam@128 241 ".L_decode_distance:\n"
cannam@128 242 " movq %%r13, %%r8\n" /* r8 = dmask */
cannam@128 243 " cmpb $32, %%bl\n"
cannam@128 244 " ja .L_get_distance_code\n" /* if (32 < bits) */
cannam@128 245
cannam@128 246 " lodsl\n" /* eax = *(uint *)in++ */
cannam@128 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 248 " addb $32, %%bl\n" /* bits += 32 */
cannam@128 249 " shlq %%cl, %%rax\n"
cannam@128 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
cannam@128 251
cannam@128 252 ".L_get_distance_code:\n"
cannam@128 253 " andq %%rdx, %%r8\n" /* r8 &= hold */
cannam@128 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
cannam@128 255
cannam@128 256 ".L_dodist:\n"
cannam@128 257 " movl %%eax, %%r15d\n" /* dist = this */
cannam@128 258 " shrl $16, %%r15d\n" /* dist = this.val */
cannam@128 259 " movb %%ah, %%cl\n"
cannam@128 260 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@128 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
cannam@128 262 " movb %%al, %%cl\n" /* cl = this.op */
cannam@128 263
cannam@128 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
cannam@128 265 " jz .L_test_for_second_level_dist\n"
cannam@128 266 " andb $15, %%cl\n" /* op &= 15 */
cannam@128 267 " jz .L_check_dist_one\n"
cannam@128 268
cannam@128 269 ".L_add_bits_to_dist:\n"
cannam@128 270 " subb %%cl, %%bl\n"
cannam@128 271 " xorl %%eax, %%eax\n"
cannam@128 272 " incl %%eax\n"
cannam@128 273 " shll %%cl, %%eax\n"
cannam@128 274 " decl %%eax\n" /* (1 << op) - 1 */
cannam@128 275 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 276 " shrq %%cl, %%rdx\n"
cannam@128 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
cannam@128 278
cannam@128 279 ".L_check_window:\n"
cannam@128 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
cannam@128 281 " movq %%rdi, %%rax\n"
cannam@128 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
cannam@128 283
cannam@128 284 " cmpl %%r15d, %%eax\n"
cannam@128 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
cannam@128 286
cannam@128 287 " movl %%r14d, %%ecx\n" /* ecx = len */
cannam@128 288 " movq %%rdi, %%rsi\n"
cannam@128 289 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@128 290
cannam@128 291 " sarl %%ecx\n"
cannam@128 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
cannam@128 293
cannam@128 294 " rep movsw\n"
cannam@128 295 " movb (%%rsi), %%al\n"
cannam@128 296 " movb %%al, (%%rdi)\n"
cannam@128 297 " incq %%rdi\n"
cannam@128 298
cannam@128 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
cannam@128 300 " jmp .L_while_test\n"
cannam@128 301
cannam@128 302 ".L_copy_two:\n"
cannam@128 303 " rep movsw\n"
cannam@128 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
cannam@128 305 " jmp .L_while_test\n"
cannam@128 306
cannam@128 307 ".align 32,0x90\n"
cannam@128 308 ".L_check_dist_one:\n"
cannam@128 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
cannam@128 310 " jne .L_check_window\n"
cannam@128 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
cannam@128 312 " je .L_check_window\n"
cannam@128 313
cannam@128 314 " movl %%r14d, %%ecx\n" /* ecx = len */
cannam@128 315 " movb -1(%%rdi), %%al\n"
cannam@128 316 " movb %%al, %%ah\n"
cannam@128 317
cannam@128 318 " sarl %%ecx\n"
cannam@128 319 " jnc .L_set_two\n"
cannam@128 320 " movb %%al, (%%rdi)\n"
cannam@128 321 " incq %%rdi\n"
cannam@128 322
cannam@128 323 ".L_set_two:\n"
cannam@128 324 " rep stosw\n"
cannam@128 325 " jmp .L_while_test\n"
cannam@128 326
cannam@128 327 ".align 32,0x90\n"
cannam@128 328 ".L_test_for_second_level_length:\n"
cannam@128 329 " testb $64, %%al\n"
cannam@128 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
cannam@128 331
cannam@128 332 " xorl %%eax, %%eax\n"
cannam@128 333 " incl %%eax\n"
cannam@128 334 " shll %%cl, %%eax\n"
cannam@128 335 " decl %%eax\n"
cannam@128 336 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 337 " addl %%r14d, %%eax\n" /* eax += len */
cannam@128 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
cannam@128 339 " jmp .L_dolen\n"
cannam@128 340
cannam@128 341 ".align 32,0x90\n"
cannam@128 342 ".L_test_for_second_level_dist:\n"
cannam@128 343 " testb $64, %%al\n"
cannam@128 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
cannam@128 345
cannam@128 346 " xorl %%eax, %%eax\n"
cannam@128 347 " incl %%eax\n"
cannam@128 348 " shll %%cl, %%eax\n"
cannam@128 349 " decl %%eax\n"
cannam@128 350 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 351 " addl %%r15d, %%eax\n" /* eax += dist */
cannam@128 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
cannam@128 353 " jmp .L_dodist\n"
cannam@128 354
cannam@128 355 ".align 32,0x90\n"
cannam@128 356 ".L_clip_window:\n"
cannam@128 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
cannam@128 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
cannam@128 359 " negl %%ecx\n" /* nbytes = -nbytes */
cannam@128 360
cannam@128 361 " cmpl %%r15d, %%eax\n"
cannam@128 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
cannam@128 363
cannam@128 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
cannam@128 365 " cmpl $0, 96(%%rsp)\n"
cannam@128 366 " jne .L_wrap_around_window\n" /* if (write != 0) */
cannam@128 367
cannam@128 368 " movq 56(%%rsp), %%rsi\n" /* from = window */
cannam@128 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
cannam@128 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
cannam@128 371
cannam@128 372 " movl %%r14d, %%eax\n" /* eax = len */
cannam@128 373 " cmpl %%ecx, %%r14d\n"
cannam@128 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 375
cannam@128 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
cannam@128 377 " rep movsb\n"
cannam@128 378 " movq %%rdi, %%rsi\n"
cannam@128 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
cannam@128 380 " jmp .L_do_copy\n"
cannam@128 381
cannam@128 382 ".align 32,0x90\n"
cannam@128 383 ".L_wrap_around_window:\n"
cannam@128 384 " movl 96(%%rsp), %%eax\n" /* eax = write */
cannam@128 385 " cmpl %%eax, %%ecx\n"
cannam@128 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
cannam@128 387
cannam@128 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
cannam@128 389 " addq 56(%%rsp), %%rsi\n" /* from += window */
cannam@128 390 " addq %%rax, %%rsi\n" /* from += write */
cannam@128 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
cannam@128 392 " subl %%eax, %%ecx\n" /* nbytes -= write */
cannam@128 393
cannam@128 394 " movl %%r14d, %%eax\n" /* eax = len */
cannam@128 395 " cmpl %%ecx, %%eax\n"
cannam@128 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 397
cannam@128 398 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 399 " rep movsb\n"
cannam@128 400 " movq 56(%%rsp), %%rsi\n" /* from = window */
cannam@128 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
cannam@128 402 " cmpl %%ecx, %%eax\n"
cannam@128 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 404
cannam@128 405 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 406 " rep movsb\n"
cannam@128 407 " movq %%rdi, %%rsi\n"
cannam@128 408 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@128 409 " jmp .L_do_copy\n"
cannam@128 410
cannam@128 411 ".align 32,0x90\n"
cannam@128 412 ".L_contiguous_in_window:\n"
cannam@128 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
cannam@128 414 " addq %%rax, %%rsi\n"
cannam@128 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
cannam@128 416
cannam@128 417 " movl %%r14d, %%eax\n" /* eax = len */
cannam@128 418 " cmpl %%ecx, %%eax\n"
cannam@128 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 420
cannam@128 421 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 422 " rep movsb\n"
cannam@128 423 " movq %%rdi, %%rsi\n"
cannam@128 424 " subq %%r15, %%rsi\n" /* from = out - dist */
cannam@128 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 426
cannam@128 427 ".align 32,0x90\n"
cannam@128 428 ".L_do_copy:\n"
cannam@128 429 " movl %%eax, %%ecx\n" /* ecx = len */
cannam@128 430 " rep movsb\n"
cannam@128 431
cannam@128 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
cannam@128 433 " jmp .L_while_test\n"
cannam@128 434
cannam@128 435 ".L_test_for_end_of_block:\n"
cannam@128 436 " testb $32, %%al\n"
cannam@128 437 " jz .L_invalid_literal_length_code\n"
cannam@128 438 " movl $1, 116(%%rsp)\n"
cannam@128 439 " jmp .L_break_loop_with_status\n"
cannam@128 440
cannam@128 441 ".L_invalid_literal_length_code:\n"
cannam@128 442 " movl $2, 116(%%rsp)\n"
cannam@128 443 " jmp .L_break_loop_with_status\n"
cannam@128 444
cannam@128 445 ".L_invalid_distance_code:\n"
cannam@128 446 " movl $3, 116(%%rsp)\n"
cannam@128 447 " jmp .L_break_loop_with_status\n"
cannam@128 448
cannam@128 449 ".L_invalid_distance_too_far:\n"
cannam@128 450 " movl $4, 116(%%rsp)\n"
cannam@128 451 " jmp .L_break_loop_with_status\n"
cannam@128 452
cannam@128 453 ".L_break_loop:\n"
cannam@128 454 " movl $0, 116(%%rsp)\n"
cannam@128 455
cannam@128 456 ".L_break_loop_with_status:\n"
cannam@128 457 /* put in, out, bits, and hold back into ar and pop esp */
cannam@128 458 " movq %%rsi, 16(%%rsp)\n" /* in */
cannam@128 459 " movq %%rdi, 32(%%rsp)\n" /* out */
cannam@128 460 " movl %%ebx, 88(%%rsp)\n" /* bits */
cannam@128 461 " movq %%rdx, 80(%%rsp)\n" /* hold */
cannam@128 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
cannam@128 463 " movq 8(%%rsp), %%rbp\n"
cannam@128 464 " movq %%rax, %%rsp\n"
cannam@128 465 :
cannam@128 466 : "m" (ar)
cannam@128 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
cannam@128 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
cannam@128 469 );
cannam@128 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
cannam@128 471 __asm__ __volatile__ (
cannam@128 472 " leal %0, %%eax\n"
cannam@128 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
cannam@128 474 " movl %%ebp, 4(%%eax)\n"
cannam@128 475 " movl %%eax, %%esp\n"
cannam@128 476 " movl 8(%%esp), %%esi\n" /* esi = in */
cannam@128 477 " movl 16(%%esp), %%edi\n" /* edi = out */
cannam@128 478 " movl 40(%%esp), %%edx\n" /* edx = hold */
cannam@128 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
cannam@128 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@128 481
cannam@128 482 " cld\n"
cannam@128 483 " jmp .L_do_loop\n"
cannam@128 484
cannam@128 485 ".align 32,0x90\n"
cannam@128 486 ".L_while_test:\n"
cannam@128 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
cannam@128 488 " jbe .L_break_loop\n"
cannam@128 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
cannam@128 490 " jbe .L_break_loop\n"
cannam@128 491
cannam@128 492 ".L_do_loop:\n"
cannam@128 493 " cmpb $15, %%bl\n"
cannam@128 494 " ja .L_get_length_code\n" /* if (15 < bits) */
cannam@128 495
cannam@128 496 " xorl %%eax, %%eax\n"
cannam@128 497 " lodsw\n" /* al = *(ushort *)in++ */
cannam@128 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 499 " addb $16, %%bl\n" /* bits += 16 */
cannam@128 500 " shll %%cl, %%eax\n"
cannam@128 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@128 502
cannam@128 503 ".L_get_length_code:\n"
cannam@128 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
cannam@128 505 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
cannam@128 507
cannam@128 508 ".L_dolen:\n"
cannam@128 509 " movb %%ah, %%cl\n" /* cl = this.bits */
cannam@128 510 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@128 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
cannam@128 512
cannam@128 513 " testb %%al, %%al\n"
cannam@128 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
cannam@128 515
cannam@128 516 " shrl $16, %%eax\n" /* output this.val char */
cannam@128 517 " stosb\n"
cannam@128 518 " jmp .L_while_test\n"
cannam@128 519
cannam@128 520 ".align 32,0x90\n"
cannam@128 521 ".L_test_for_length_base:\n"
cannam@128 522 " movl %%eax, %%ecx\n" /* len = this */
cannam@128 523 " shrl $16, %%ecx\n" /* len = this.val */
cannam@128 524 " movl %%ecx, 64(%%esp)\n" /* save len */
cannam@128 525 " movb %%al, %%cl\n"
cannam@128 526
cannam@128 527 " testb $16, %%al\n"
cannam@128 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
cannam@128 529 " andb $15, %%cl\n" /* op &= 15 */
cannam@128 530 " jz .L_decode_distance\n" /* if (!op) */
cannam@128 531 " cmpb %%cl, %%bl\n"
cannam@128 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
cannam@128 533
cannam@128 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
cannam@128 535 " xorl %%eax, %%eax\n"
cannam@128 536 " lodsw\n" /* al = *(ushort *)in++ */
cannam@128 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 538 " addb $16, %%bl\n" /* bits += 16 */
cannam@128 539 " shll %%cl, %%eax\n"
cannam@128 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@128 541 " movb %%ch, %%cl\n" /* move op back to ecx */
cannam@128 542
cannam@128 543 ".L_add_bits_to_len:\n"
cannam@128 544 " subb %%cl, %%bl\n"
cannam@128 545 " xorl %%eax, %%eax\n"
cannam@128 546 " incl %%eax\n"
cannam@128 547 " shll %%cl, %%eax\n"
cannam@128 548 " decl %%eax\n"
cannam@128 549 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 550 " shrl %%cl, %%edx\n"
cannam@128 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
cannam@128 552
cannam@128 553 ".L_decode_distance:\n"
cannam@128 554 " cmpb $15, %%bl\n"
cannam@128 555 " ja .L_get_distance_code\n" /* if (15 < bits) */
cannam@128 556
cannam@128 557 " xorl %%eax, %%eax\n"
cannam@128 558 " lodsw\n" /* al = *(ushort *)in++ */
cannam@128 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 560 " addb $16, %%bl\n" /* bits += 16 */
cannam@128 561 " shll %%cl, %%eax\n"
cannam@128 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@128 563
cannam@128 564 ".L_get_distance_code:\n"
cannam@128 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
cannam@128 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
cannam@128 567 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
cannam@128 569
cannam@128 570 ".L_dodist:\n"
cannam@128 571 " movl %%eax, %%ebp\n" /* dist = this */
cannam@128 572 " shrl $16, %%ebp\n" /* dist = this.val */
cannam@128 573 " movb %%ah, %%cl\n"
cannam@128 574 " subb %%ah, %%bl\n" /* bits -= this.bits */
cannam@128 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
cannam@128 576 " movb %%al, %%cl\n" /* cl = this.op */
cannam@128 577
cannam@128 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
cannam@128 579 " jz .L_test_for_second_level_dist\n"
cannam@128 580 " andb $15, %%cl\n" /* op &= 15 */
cannam@128 581 " jz .L_check_dist_one\n"
cannam@128 582 " cmpb %%cl, %%bl\n"
cannam@128 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
cannam@128 584
cannam@128 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
cannam@128 586 " xorl %%eax, %%eax\n"
cannam@128 587 " lodsw\n" /* al = *(ushort *)in++ */
cannam@128 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
cannam@128 589 " addb $16, %%bl\n" /* bits += 16 */
cannam@128 590 " shll %%cl, %%eax\n"
cannam@128 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
cannam@128 592 " movb %%ch, %%cl\n" /* move op back to ecx */
cannam@128 593
cannam@128 594 ".L_add_bits_to_dist:\n"
cannam@128 595 " subb %%cl, %%bl\n"
cannam@128 596 " xorl %%eax, %%eax\n"
cannam@128 597 " incl %%eax\n"
cannam@128 598 " shll %%cl, %%eax\n"
cannam@128 599 " decl %%eax\n" /* (1 << op) - 1 */
cannam@128 600 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 601 " shrl %%cl, %%edx\n"
cannam@128 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
cannam@128 603
cannam@128 604 ".L_check_window:\n"
cannam@128 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
cannam@128 606 " movl %%edi, %%eax\n"
cannam@128 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
cannam@128 608
cannam@128 609 " cmpl %%ebp, %%eax\n"
cannam@128 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
cannam@128 611
cannam@128 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
cannam@128 613 " movl %%edi, %%esi\n"
cannam@128 614 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@128 615
cannam@128 616 " sarl %%ecx\n"
cannam@128 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
cannam@128 618
cannam@128 619 " rep movsw\n"
cannam@128 620 " movb (%%esi), %%al\n"
cannam@128 621 " movb %%al, (%%edi)\n"
cannam@128 622 " incl %%edi\n"
cannam@128 623
cannam@128 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@128 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@128 626 " jmp .L_while_test\n"
cannam@128 627
cannam@128 628 ".L_copy_two:\n"
cannam@128 629 " rep movsw\n"
cannam@128 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@128 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@128 632 " jmp .L_while_test\n"
cannam@128 633
cannam@128 634 ".align 32,0x90\n"
cannam@128 635 ".L_check_dist_one:\n"
cannam@128 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
cannam@128 637 " jne .L_check_window\n"
cannam@128 638 " cmpl %%edi, 20(%%esp)\n"
cannam@128 639 " je .L_check_window\n" /* out == beg, if outside window */
cannam@128 640
cannam@128 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
cannam@128 642 " movb -1(%%edi), %%al\n"
cannam@128 643 " movb %%al, %%ah\n"
cannam@128 644
cannam@128 645 " sarl %%ecx\n"
cannam@128 646 " jnc .L_set_two\n"
cannam@128 647 " movb %%al, (%%edi)\n"
cannam@128 648 " incl %%edi\n"
cannam@128 649
cannam@128 650 ".L_set_two:\n"
cannam@128 651 " rep stosw\n"
cannam@128 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@128 653 " jmp .L_while_test\n"
cannam@128 654
cannam@128 655 ".align 32,0x90\n"
cannam@128 656 ".L_test_for_second_level_length:\n"
cannam@128 657 " testb $64, %%al\n"
cannam@128 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
cannam@128 659
cannam@128 660 " xorl %%eax, %%eax\n"
cannam@128 661 " incl %%eax\n"
cannam@128 662 " shll %%cl, %%eax\n"
cannam@128 663 " decl %%eax\n"
cannam@128 664 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 665 " addl 64(%%esp), %%eax\n" /* eax += len */
cannam@128 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
cannam@128 667 " jmp .L_dolen\n"
cannam@128 668
cannam@128 669 ".align 32,0x90\n"
cannam@128 670 ".L_test_for_second_level_dist:\n"
cannam@128 671 " testb $64, %%al\n"
cannam@128 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
cannam@128 673
cannam@128 674 " xorl %%eax, %%eax\n"
cannam@128 675 " incl %%eax\n"
cannam@128 676 " shll %%cl, %%eax\n"
cannam@128 677 " decl %%eax\n"
cannam@128 678 " andl %%edx, %%eax\n" /* eax &= hold */
cannam@128 679 " addl %%ebp, %%eax\n" /* eax += dist */
cannam@128 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
cannam@128 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
cannam@128 682 " jmp .L_dodist\n"
cannam@128 683
cannam@128 684 ".align 32,0x90\n"
cannam@128 685 ".L_clip_window:\n"
cannam@128 686 " movl %%eax, %%ecx\n"
cannam@128 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
cannam@128 688 " negl %%ecx\n" /* nbytes = -nbytes */
cannam@128 689 " movl 28(%%esp), %%esi\n" /* from = window */
cannam@128 690
cannam@128 691 " cmpl %%ebp, %%eax\n"
cannam@128 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
cannam@128 693
cannam@128 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
cannam@128 695 " cmpl $0, 52(%%esp)\n"
cannam@128 696 " jne .L_wrap_around_window\n" /* if (write != 0) */
cannam@128 697
cannam@128 698 " subl %%ecx, %%eax\n"
cannam@128 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
cannam@128 700
cannam@128 701 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@128 702 " cmpl %%ecx, %%eax\n"
cannam@128 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 704
cannam@128 705 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 706 " rep movsb\n"
cannam@128 707 " movl %%edi, %%esi\n"
cannam@128 708 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@128 709 " jmp .L_do_copy\n"
cannam@128 710
cannam@128 711 ".align 32,0x90\n"
cannam@128 712 ".L_wrap_around_window:\n"
cannam@128 713 " movl 52(%%esp), %%eax\n" /* eax = write */
cannam@128 714 " cmpl %%eax, %%ecx\n"
cannam@128 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
cannam@128 716
cannam@128 717 " addl 48(%%esp), %%esi\n" /* from += wsize */
cannam@128 718 " addl %%eax, %%esi\n" /* from += write */
cannam@128 719 " subl %%ecx, %%esi\n" /* from -= nbytes */
cannam@128 720 " subl %%eax, %%ecx\n" /* nbytes -= write */
cannam@128 721
cannam@128 722 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@128 723 " cmpl %%ecx, %%eax\n"
cannam@128 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 725
cannam@128 726 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 727 " rep movsb\n"
cannam@128 728 " movl 28(%%esp), %%esi\n" /* from = window */
cannam@128 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
cannam@128 730 " cmpl %%ecx, %%eax\n"
cannam@128 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 732
cannam@128 733 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 734 " rep movsb\n"
cannam@128 735 " movl %%edi, %%esi\n"
cannam@128 736 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@128 737 " jmp .L_do_copy\n"
cannam@128 738
cannam@128 739 ".align 32,0x90\n"
cannam@128 740 ".L_contiguous_in_window:\n"
cannam@128 741 " addl %%eax, %%esi\n"
cannam@128 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
cannam@128 743
cannam@128 744 " movl 64(%%esp), %%eax\n" /* eax = len */
cannam@128 745 " cmpl %%ecx, %%eax\n"
cannam@128 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 747
cannam@128 748 " subl %%ecx, %%eax\n" /* len -= nbytes */
cannam@128 749 " rep movsb\n"
cannam@128 750 " movl %%edi, %%esi\n"
cannam@128 751 " subl %%ebp, %%esi\n" /* from = out - dist */
cannam@128 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
cannam@128 753
cannam@128 754 ".align 32,0x90\n"
cannam@128 755 ".L_do_copy:\n"
cannam@128 756 " movl %%eax, %%ecx\n"
cannam@128 757 " rep movsb\n"
cannam@128 758
cannam@128 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
cannam@128 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
cannam@128 761 " jmp .L_while_test\n"
cannam@128 762
cannam@128 763 ".L_test_for_end_of_block:\n"
cannam@128 764 " testb $32, %%al\n"
cannam@128 765 " jz .L_invalid_literal_length_code\n"
cannam@128 766 " movl $1, 72(%%esp)\n"
cannam@128 767 " jmp .L_break_loop_with_status\n"
cannam@128 768
cannam@128 769 ".L_invalid_literal_length_code:\n"
cannam@128 770 " movl $2, 72(%%esp)\n"
cannam@128 771 " jmp .L_break_loop_with_status\n"
cannam@128 772
cannam@128 773 ".L_invalid_distance_code:\n"
cannam@128 774 " movl $3, 72(%%esp)\n"
cannam@128 775 " jmp .L_break_loop_with_status\n"
cannam@128 776
cannam@128 777 ".L_invalid_distance_too_far:\n"
cannam@128 778 " movl 8(%%esp), %%esi\n"
cannam@128 779 " movl $4, 72(%%esp)\n"
cannam@128 780 " jmp .L_break_loop_with_status\n"
cannam@128 781
cannam@128 782 ".L_break_loop:\n"
cannam@128 783 " movl $0, 72(%%esp)\n"
cannam@128 784
cannam@128 785 ".L_break_loop_with_status:\n"
cannam@128 786 /* put in, out, bits, and hold back into ar and pop esp */
cannam@128 787 " movl %%esi, 8(%%esp)\n" /* save in */
cannam@128 788 " movl %%edi, 16(%%esp)\n" /* save out */
cannam@128 789 " movl %%ebx, 44(%%esp)\n" /* save bits */
cannam@128 790 " movl %%edx, 40(%%esp)\n" /* save hold */
cannam@128 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
cannam@128 792 " movl (%%esp), %%esp\n"
cannam@128 793 :
cannam@128 794 : "m" (ar)
cannam@128 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
cannam@128 796 );
cannam@128 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
cannam@128 798 __asm {
cannam@128 799 lea eax, ar
cannam@128 800 mov [eax], esp /* save esp, ebp */
cannam@128 801 mov [eax+4], ebp
cannam@128 802 mov esp, eax
cannam@128 803 mov esi, [esp+8] /* esi = in */
cannam@128 804 mov edi, [esp+16] /* edi = out */
cannam@128 805 mov edx, [esp+40] /* edx = hold */
cannam@128 806 mov ebx, [esp+44] /* ebx = bits */
cannam@128 807 mov ebp, [esp+32] /* ebp = lcode */
cannam@128 808
cannam@128 809 cld
cannam@128 810 jmp L_do_loop
cannam@128 811
cannam@128 812 ALIGN 4
cannam@128 813 L_while_test:
cannam@128 814 cmp [esp+24], edi
cannam@128 815 jbe L_break_loop
cannam@128 816 cmp [esp+12], esi
cannam@128 817 jbe L_break_loop
cannam@128 818
cannam@128 819 L_do_loop:
cannam@128 820 cmp bl, 15
cannam@128 821 ja L_get_length_code /* if (15 < bits) */
cannam@128 822
cannam@128 823 xor eax, eax
cannam@128 824 lodsw /* al = *(ushort *)in++ */
cannam@128 825 mov cl, bl /* cl = bits, needs it for shifting */
cannam@128 826 add bl, 16 /* bits += 16 */
cannam@128 827 shl eax, cl
cannam@128 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@128 829
cannam@128 830 L_get_length_code:
cannam@128 831 mov eax, [esp+56] /* eax = lmask */
cannam@128 832 and eax, edx /* eax &= hold */
cannam@128 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
cannam@128 834
cannam@128 835 L_dolen:
cannam@128 836 mov cl, ah /* cl = this.bits */
cannam@128 837 sub bl, ah /* bits -= this.bits */
cannam@128 838 shr edx, cl /* hold >>= this.bits */
cannam@128 839
cannam@128 840 test al, al
cannam@128 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
cannam@128 842
cannam@128 843 shr eax, 16 /* output this.val char */
cannam@128 844 stosb
cannam@128 845 jmp L_while_test
cannam@128 846
cannam@128 847 ALIGN 4
cannam@128 848 L_test_for_length_base:
cannam@128 849 mov ecx, eax /* len = this */
cannam@128 850 shr ecx, 16 /* len = this.val */
cannam@128 851 mov [esp+64], ecx /* save len */
cannam@128 852 mov cl, al
cannam@128 853
cannam@128 854 test al, 16
cannam@128 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
cannam@128 856 and cl, 15 /* op &= 15 */
cannam@128 857 jz L_decode_distance /* if (!op) */
cannam@128 858 cmp bl, cl
cannam@128 859 jae L_add_bits_to_len /* if (op <= bits) */
cannam@128 860
cannam@128 861 mov ch, cl /* stash op in ch, freeing cl */
cannam@128 862 xor eax, eax
cannam@128 863 lodsw /* al = *(ushort *)in++ */
cannam@128 864 mov cl, bl /* cl = bits, needs it for shifting */
cannam@128 865 add bl, 16 /* bits += 16 */
cannam@128 866 shl eax, cl
cannam@128 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@128 868 mov cl, ch /* move op back to ecx */
cannam@128 869
cannam@128 870 L_add_bits_to_len:
cannam@128 871 sub bl, cl
cannam@128 872 xor eax, eax
cannam@128 873 inc eax
cannam@128 874 shl eax, cl
cannam@128 875 dec eax
cannam@128 876 and eax, edx /* eax &= hold */
cannam@128 877 shr edx, cl
cannam@128 878 add [esp+64], eax /* len += hold & mask[op] */
cannam@128 879
cannam@128 880 L_decode_distance:
cannam@128 881 cmp bl, 15
cannam@128 882 ja L_get_distance_code /* if (15 < bits) */
cannam@128 883
cannam@128 884 xor eax, eax
cannam@128 885 lodsw /* al = *(ushort *)in++ */
cannam@128 886 mov cl, bl /* cl = bits, needs it for shifting */
cannam@128 887 add bl, 16 /* bits += 16 */
cannam@128 888 shl eax, cl
cannam@128 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@128 890
cannam@128 891 L_get_distance_code:
cannam@128 892 mov eax, [esp+60] /* eax = dmask */
cannam@128 893 mov ecx, [esp+36] /* ecx = dcode */
cannam@128 894 and eax, edx /* eax &= hold */
cannam@128 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
cannam@128 896
cannam@128 897 L_dodist:
cannam@128 898 mov ebp, eax /* dist = this */
cannam@128 899 shr ebp, 16 /* dist = this.val */
cannam@128 900 mov cl, ah
cannam@128 901 sub bl, ah /* bits -= this.bits */
cannam@128 902 shr edx, cl /* hold >>= this.bits */
cannam@128 903 mov cl, al /* cl = this.op */
cannam@128 904
cannam@128 905 test al, 16 /* if ((op & 16) == 0) */
cannam@128 906 jz L_test_for_second_level_dist
cannam@128 907 and cl, 15 /* op &= 15 */
cannam@128 908 jz L_check_dist_one
cannam@128 909 cmp bl, cl
cannam@128 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
cannam@128 911
cannam@128 912 mov ch, cl /* stash op in ch, freeing cl */
cannam@128 913 xor eax, eax
cannam@128 914 lodsw /* al = *(ushort *)in++ */
cannam@128 915 mov cl, bl /* cl = bits, needs it for shifting */
cannam@128 916 add bl, 16 /* bits += 16 */
cannam@128 917 shl eax, cl
cannam@128 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
cannam@128 919 mov cl, ch /* move op back to ecx */
cannam@128 920
cannam@128 921 L_add_bits_to_dist:
cannam@128 922 sub bl, cl
cannam@128 923 xor eax, eax
cannam@128 924 inc eax
cannam@128 925 shl eax, cl
cannam@128 926 dec eax /* (1 << op) - 1 */
cannam@128 927 and eax, edx /* eax &= hold */
cannam@128 928 shr edx, cl
cannam@128 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
cannam@128 930
cannam@128 931 L_check_window:
cannam@128 932 mov [esp+8], esi /* save in so from can use it's reg */
cannam@128 933 mov eax, edi
cannam@128 934 sub eax, [esp+20] /* nbytes = out - beg */
cannam@128 935
cannam@128 936 cmp eax, ebp
cannam@128 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
cannam@128 938
cannam@128 939 mov ecx, [esp+64] /* ecx = len */
cannam@128 940 mov esi, edi
cannam@128 941 sub esi, ebp /* from = out - dist */
cannam@128 942
cannam@128 943 sar ecx, 1
cannam@128 944 jnc L_copy_two
cannam@128 945
cannam@128 946 rep movsw
cannam@128 947 mov al, [esi]
cannam@128 948 mov [edi], al
cannam@128 949 inc edi
cannam@128 950
cannam@128 951 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@128 952 mov ebp, [esp+32] /* ebp = lcode */
cannam@128 953 jmp L_while_test
cannam@128 954
cannam@128 955 L_copy_two:
cannam@128 956 rep movsw
cannam@128 957 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@128 958 mov ebp, [esp+32] /* ebp = lcode */
cannam@128 959 jmp L_while_test
cannam@128 960
cannam@128 961 ALIGN 4
cannam@128 962 L_check_dist_one:
cannam@128 963 cmp ebp, 1 /* if dist 1, is a memset */
cannam@128 964 jne L_check_window
cannam@128 965 cmp [esp+20], edi
cannam@128 966 je L_check_window /* out == beg, if outside window */
cannam@128 967
cannam@128 968 mov ecx, [esp+64] /* ecx = len */
cannam@128 969 mov al, [edi-1]
cannam@128 970 mov ah, al
cannam@128 971
cannam@128 972 sar ecx, 1
cannam@128 973 jnc L_set_two
cannam@128 974 mov [edi], al /* memset out with from[-1] */
cannam@128 975 inc edi
cannam@128 976
cannam@128 977 L_set_two:
cannam@128 978 rep stosw
cannam@128 979 mov ebp, [esp+32] /* ebp = lcode */
cannam@128 980 jmp L_while_test
cannam@128 981
cannam@128 982 ALIGN 4
cannam@128 983 L_test_for_second_level_length:
cannam@128 984 test al, 64
cannam@128 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
cannam@128 986
cannam@128 987 xor eax, eax
cannam@128 988 inc eax
cannam@128 989 shl eax, cl
cannam@128 990 dec eax
cannam@128 991 and eax, edx /* eax &= hold */
cannam@128 992 add eax, [esp+64] /* eax += len */
cannam@128 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
cannam@128 994 jmp L_dolen
cannam@128 995
cannam@128 996 ALIGN 4
cannam@128 997 L_test_for_second_level_dist:
cannam@128 998 test al, 64
cannam@128 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
cannam@128 1000
cannam@128 1001 xor eax, eax
cannam@128 1002 inc eax
cannam@128 1003 shl eax, cl
cannam@128 1004 dec eax
cannam@128 1005 and eax, edx /* eax &= hold */
cannam@128 1006 add eax, ebp /* eax += dist */
cannam@128 1007 mov ecx, [esp+36] /* ecx = dcode */
cannam@128 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
cannam@128 1009 jmp L_dodist
cannam@128 1010
cannam@128 1011 ALIGN 4
cannam@128 1012 L_clip_window:
cannam@128 1013 mov ecx, eax
cannam@128 1014 mov eax, [esp+48] /* eax = wsize */
cannam@128 1015 neg ecx /* nbytes = -nbytes */
cannam@128 1016 mov esi, [esp+28] /* from = window */
cannam@128 1017
cannam@128 1018 cmp eax, ebp
cannam@128 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
cannam@128 1020
cannam@128 1021 add ecx, ebp /* nbytes = dist - nbytes */
cannam@128 1022 cmp dword ptr [esp+52], 0
cannam@128 1023 jne L_wrap_around_window /* if (write != 0) */
cannam@128 1024
cannam@128 1025 sub eax, ecx
cannam@128 1026 add esi, eax /* from += wsize - nbytes */
cannam@128 1027
cannam@128 1028 mov eax, [esp+64] /* eax = len */
cannam@128 1029 cmp eax, ecx
cannam@128 1030 jbe L_do_copy /* if (nbytes >= len) */
cannam@128 1031
cannam@128 1032 sub eax, ecx /* len -= nbytes */
cannam@128 1033 rep movsb
cannam@128 1034 mov esi, edi
cannam@128 1035 sub esi, ebp /* from = out - dist */
cannam@128 1036 jmp L_do_copy
cannam@128 1037
cannam@128 1038 ALIGN 4
cannam@128 1039 L_wrap_around_window:
cannam@128 1040 mov eax, [esp+52] /* eax = write */
cannam@128 1041 cmp ecx, eax
cannam@128 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
cannam@128 1043
cannam@128 1044 add esi, [esp+48] /* from += wsize */
cannam@128 1045 add esi, eax /* from += write */
cannam@128 1046 sub esi, ecx /* from -= nbytes */
cannam@128 1047 sub ecx, eax /* nbytes -= write */
cannam@128 1048
cannam@128 1049 mov eax, [esp+64] /* eax = len */
cannam@128 1050 cmp eax, ecx
cannam@128 1051 jbe L_do_copy /* if (nbytes >= len) */
cannam@128 1052
cannam@128 1053 sub eax, ecx /* len -= nbytes */
cannam@128 1054 rep movsb
cannam@128 1055 mov esi, [esp+28] /* from = window */
cannam@128 1056 mov ecx, [esp+52] /* nbytes = write */
cannam@128 1057 cmp eax, ecx
cannam@128 1058 jbe L_do_copy /* if (nbytes >= len) */
cannam@128 1059
cannam@128 1060 sub eax, ecx /* len -= nbytes */
cannam@128 1061 rep movsb
cannam@128 1062 mov esi, edi
cannam@128 1063 sub esi, ebp /* from = out - dist */
cannam@128 1064 jmp L_do_copy
cannam@128 1065
cannam@128 1066 ALIGN 4
cannam@128 1067 L_contiguous_in_window:
cannam@128 1068 add esi, eax
cannam@128 1069 sub esi, ecx /* from += write - nbytes */
cannam@128 1070
cannam@128 1071 mov eax, [esp+64] /* eax = len */
cannam@128 1072 cmp eax, ecx
cannam@128 1073 jbe L_do_copy /* if (nbytes >= len) */
cannam@128 1074
cannam@128 1075 sub eax, ecx /* len -= nbytes */
cannam@128 1076 rep movsb
cannam@128 1077 mov esi, edi
cannam@128 1078 sub esi, ebp /* from = out - dist */
cannam@128 1079 jmp L_do_copy
cannam@128 1080
cannam@128 1081 ALIGN 4
cannam@128 1082 L_do_copy:
cannam@128 1083 mov ecx, eax
cannam@128 1084 rep movsb
cannam@128 1085
cannam@128 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
cannam@128 1087 mov ebp, [esp+32] /* ebp = lcode */
cannam@128 1088 jmp L_while_test
cannam@128 1089
cannam@128 1090 L_test_for_end_of_block:
cannam@128 1091 test al, 32
cannam@128 1092 jz L_invalid_literal_length_code
cannam@128 1093 mov dword ptr [esp+72], 1
cannam@128 1094 jmp L_break_loop_with_status
cannam@128 1095
cannam@128 1096 L_invalid_literal_length_code:
cannam@128 1097 mov dword ptr [esp+72], 2
cannam@128 1098 jmp L_break_loop_with_status
cannam@128 1099
cannam@128 1100 L_invalid_distance_code:
cannam@128 1101 mov dword ptr [esp+72], 3
cannam@128 1102 jmp L_break_loop_with_status
cannam@128 1103
cannam@128 1104 L_invalid_distance_too_far:
cannam@128 1105 mov esi, [esp+4]
cannam@128 1106 mov dword ptr [esp+72], 4
cannam@128 1107 jmp L_break_loop_with_status
cannam@128 1108
cannam@128 1109 L_break_loop:
cannam@128 1110 mov dword ptr [esp+72], 0
cannam@128 1111
cannam@128 1112 L_break_loop_with_status:
cannam@128 1113 /* put in, out, bits, and hold back into ar and pop esp */
cannam@128 1114 mov [esp+8], esi /* save in */
cannam@128 1115 mov [esp+16], edi /* save out */
cannam@128 1116 mov [esp+44], ebx /* save bits */
cannam@128 1117 mov [esp+40], edx /* save hold */
cannam@128 1118 mov ebp, [esp+4] /* restore esp, ebp */
cannam@128 1119 mov esp, [esp]
cannam@128 1120 }
cannam@128 1121 #else
cannam@128 1122 #error "x86 architecture not defined"
cannam@128 1123 #endif
cannam@128 1124
cannam@128 1125 if (ar.status > 1) {
cannam@128 1126 if (ar.status == 2)
cannam@128 1127 strm->msg = "invalid literal/length code";
cannam@128 1128 else if (ar.status == 3)
cannam@128 1129 strm->msg = "invalid distance code";
cannam@128 1130 else
cannam@128 1131 strm->msg = "invalid distance too far back";
cannam@128 1132 state->mode = BAD;
cannam@128 1133 }
cannam@128 1134 else if ( ar.status == 1 ) {
cannam@128 1135 state->mode = TYPE;
cannam@128 1136 }
cannam@128 1137
cannam@128 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
cannam@128 1139 ar.len = ar.bits >> 3;
cannam@128 1140 ar.in -= ar.len;
cannam@128 1141 ar.bits -= ar.len << 3;
cannam@128 1142 ar.hold &= (1U << ar.bits) - 1;
cannam@128 1143
cannam@128 1144 /* update state and return */
cannam@128 1145 strm->next_in = ar.in;
cannam@128 1146 strm->next_out = ar.out;
cannam@128 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
cannam@128 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
cannam@128 1149 PAD_AVAIL_IN - (ar.in - ar.last));
cannam@128 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
cannam@128 1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
cannam@128 1152 PAD_AVAIL_OUT - (ar.out - ar.end));
cannam@128 1153 state->hold = ar.hold;
cannam@128 1154 state->bits = ar.bits;
cannam@128 1155 return;
cannam@128 1156 }
cannam@128 1157