annotate src/zlib-1.2.7/contrib/inflate86/inffas86.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 /* inffas86.c is a hand tuned assembler version of
Chris@4 2 *
Chris@4 3 * inffast.c -- fast decoding
Chris@4 4 * Copyright (C) 1995-2003 Mark Adler
Chris@4 5 * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 6 *
Chris@4 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@4 8 * Please use the copyright conditions above.
Chris@4 9 *
Chris@4 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
Chris@4 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
Chris@4 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
Chris@4 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
Chris@4 14 * from http://fedora.linux.duke.edu/fc1_x86_64
Chris@4 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
Chris@4 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
Chris@4 17 * when decompressing mozilla-source-1.3.tar.gz.
Chris@4 18 *
Chris@4 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@4 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@4 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@4 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@4 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@4 24 * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@4 25 * versions of this and inffast.S can be found at
Chris@4 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@4 27 */
Chris@4 28
Chris@4 29 #include "zutil.h"
Chris@4 30 #include "inftrees.h"
Chris@4 31 #include "inflate.h"
Chris@4 32 #include "inffast.h"
Chris@4 33
Chris@4 34 /* Mark Adler's comments from inffast.c: */
Chris@4 35
Chris@4 36 /*
Chris@4 37 Decode literal, length, and distance codes and write out the resulting
Chris@4 38 literal and match bytes until either not enough input or output is
Chris@4 39 available, an end-of-block is encountered, or a data error is encountered.
Chris@4 40 When large enough input and output buffers are supplied to inflate(), for
Chris@4 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
Chris@4 42 inflate execution time is spent in this routine.
Chris@4 43
Chris@4 44 Entry assumptions:
Chris@4 45
Chris@4 46 state->mode == LEN
Chris@4 47 strm->avail_in >= 6
Chris@4 48 strm->avail_out >= 258
Chris@4 49 start >= strm->avail_out
Chris@4 50 state->bits < 8
Chris@4 51
Chris@4 52 On return, state->mode is one of:
Chris@4 53
Chris@4 54 LEN -- ran out of enough output space or enough available input
Chris@4 55 TYPE -- reached end of block code, inflate() to interpret next block
Chris@4 56 BAD -- error in block data
Chris@4 57
Chris@4 58 Notes:
Chris@4 59
Chris@4 60 - The maximum input bits used by a length/distance pair is 15 bits for the
Chris@4 61 length code, 5 bits for the length extra, 15 bits for the distance code,
Chris@4 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
Chris@4 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
Chris@4 64 checking for available input while decoding.
Chris@4 65
Chris@4 66 - The maximum bytes that a single length/distance pair can output is 258
Chris@4 67 bytes, which is the maximum length that can be coded. inflate_fast()
Chris@4 68 requires strm->avail_out >= 258 for each loop to avoid checking for
Chris@4 69 output space.
Chris@4 70 */
Chris@4 71 void inflate_fast(strm, start)
Chris@4 72 z_streamp strm;
Chris@4 73 unsigned start; /* inflate()'s starting value for strm->avail_out */
Chris@4 74 {
Chris@4 75 struct inflate_state FAR *state;
Chris@4 76 struct inffast_ar {
Chris@4 77 /* 64 32 x86 x86_64 */
Chris@4 78 /* ar offset register */
Chris@4 79 /* 0 0 */ void *esp; /* esp save */
Chris@4 80 /* 8 4 */ void *ebp; /* ebp save */
Chris@4 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
Chris@4 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
Chris@4 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
Chris@4 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
Chris@4 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
Chris@4 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
Chris@4 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
Chris@4 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
Chris@4 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
Chris@4 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
Chris@4 91 /* 92 48 */ unsigned wsize; /* window size */
Chris@4 92 /* 96 52 */ unsigned write; /* window write index */
Chris@4 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
Chris@4 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
Chris@4 95 /*108 64 */ unsigned len; /* r14 match length */
Chris@4 96 /*112 68 */ unsigned dist; /* r15 match distance */
Chris@4 97 /*116 72 */ unsigned status; /* set when state chng*/
Chris@4 98 } ar;
Chris@4 99
Chris@4 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@4 101 #define PAD_AVAIL_IN 6
Chris@4 102 #define PAD_AVAIL_OUT 258
Chris@4 103 #else
Chris@4 104 #define PAD_AVAIL_IN 5
Chris@4 105 #define PAD_AVAIL_OUT 257
Chris@4 106 #endif
Chris@4 107
Chris@4 108 /* copy state to local variables */
Chris@4 109 state = (struct inflate_state FAR *)strm->state;
Chris@4 110 ar.in = strm->next_in;
Chris@4 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
Chris@4 112 ar.out = strm->next_out;
Chris@4 113 ar.beg = ar.out - (start - strm->avail_out);
Chris@4 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
Chris@4 115 ar.wsize = state->wsize;
Chris@4 116 ar.write = state->wnext;
Chris@4 117 ar.window = state->window;
Chris@4 118 ar.hold = state->hold;
Chris@4 119 ar.bits = state->bits;
Chris@4 120 ar.lcode = state->lencode;
Chris@4 121 ar.dcode = state->distcode;
Chris@4 122 ar.lmask = (1U << state->lenbits) - 1;
Chris@4 123 ar.dmask = (1U << state->distbits) - 1;
Chris@4 124
Chris@4 125 /* decode literals and length/distances until end-of-block or not enough
Chris@4 126 input data or output space */
Chris@4 127
Chris@4 128 /* align in on 1/2 hold size boundary */
Chris@4 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
Chris@4 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
Chris@4 131 ar.bits += 8;
Chris@4 132 }
Chris@4 133
Chris@4 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@4 135 __asm__ __volatile__ (
Chris@4 136 " leaq %0, %%rax\n"
Chris@4 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
Chris@4 138 " movq %%rsp, (%%rax)\n"
Chris@4 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
Chris@4 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
Chris@4 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
Chris@4 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
Chris@4 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
Chris@4 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
Chris@4 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
Chris@4 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
Chris@4 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
Chris@4 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
Chris@4 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
Chris@4 150 /* r14d = len */
Chris@4 151 /* r15d = dist */
Chris@4 152 " cld\n"
Chris@4 153 " cmpq %%rdi, %%r10\n"
Chris@4 154 " je .L_one_time\n" /* if only one decode left */
Chris@4 155 " cmpq %%rsi, %%r9\n"
Chris@4 156 " je .L_one_time\n"
Chris@4 157 " jmp .L_do_loop\n"
Chris@4 158
Chris@4 159 ".L_one_time:\n"
Chris@4 160 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 161 " cmpb $32, %%bl\n"
Chris@4 162 " ja .L_get_length_code_one_time\n"
Chris@4 163
Chris@4 164 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 166 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 167 " shlq %%cl, %%rax\n"
Chris@4 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 169 " jmp .L_get_length_code_one_time\n"
Chris@4 170
Chris@4 171 ".align 32,0x90\n"
Chris@4 172 ".L_while_test:\n"
Chris@4 173 " cmpq %%rdi, %%r10\n"
Chris@4 174 " jbe .L_break_loop\n"
Chris@4 175 " cmpq %%rsi, %%r9\n"
Chris@4 176 " jbe .L_break_loop\n"
Chris@4 177
Chris@4 178 ".L_do_loop:\n"
Chris@4 179 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 180 " cmpb $32, %%bl\n"
Chris@4 181 " ja .L_get_length_code\n" /* if (32 < bits) */
Chris@4 182
Chris@4 183 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 185 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 186 " shlq %%cl, %%rax\n"
Chris@4 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 188
Chris@4 189 ".L_get_length_code:\n"
Chris@4 190 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 192
Chris@4 193 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 194 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 196
Chris@4 197 " testb %%al, %%al\n"
Chris@4 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 199
Chris@4 200 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@4 201 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 202 " stosb\n"
Chris@4 203
Chris@4 204 ".L_get_length_code_one_time:\n"
Chris@4 205 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 207
Chris@4 208 ".L_dolen:\n"
Chris@4 209 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 210 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 212
Chris@4 213 " testb %%al, %%al\n"
Chris@4 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 215
Chris@4 216 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 217 " stosb\n"
Chris@4 218 " jmp .L_while_test\n"
Chris@4 219
Chris@4 220 ".align 32,0x90\n"
Chris@4 221 ".L_test_for_length_base:\n"
Chris@4 222 " movl %%eax, %%r14d\n" /* len = this */
Chris@4 223 " shrl $16, %%r14d\n" /* len = this.val */
Chris@4 224 " movb %%al, %%cl\n"
Chris@4 225
Chris@4 226 " testb $16, %%al\n"
Chris@4 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@4 228 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 229 " jz .L_decode_distance\n" /* if (!op) */
Chris@4 230
Chris@4 231 ".L_add_bits_to_len:\n"
Chris@4 232 " subb %%cl, %%bl\n"
Chris@4 233 " xorl %%eax, %%eax\n"
Chris@4 234 " incl %%eax\n"
Chris@4 235 " shll %%cl, %%eax\n"
Chris@4 236 " decl %%eax\n"
Chris@4 237 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 238 " shrq %%cl, %%rdx\n"
Chris@4 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
Chris@4 240
Chris@4 241 ".L_decode_distance:\n"
Chris@4 242 " movq %%r13, %%r8\n" /* r8 = dmask */
Chris@4 243 " cmpb $32, %%bl\n"
Chris@4 244 " ja .L_get_distance_code\n" /* if (32 < bits) */
Chris@4 245
Chris@4 246 " lodsl\n" /* eax = *(uint *)in++ */
Chris@4 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 248 " addb $32, %%bl\n" /* bits += 32 */
Chris@4 249 " shlq %%cl, %%rax\n"
Chris@4 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@4 251
Chris@4 252 ".L_get_distance_code:\n"
Chris@4 253 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@4 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
Chris@4 255
Chris@4 256 ".L_dodist:\n"
Chris@4 257 " movl %%eax, %%r15d\n" /* dist = this */
Chris@4 258 " shrl $16, %%r15d\n" /* dist = this.val */
Chris@4 259 " movb %%ah, %%cl\n"
Chris@4 260 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@4 262 " movb %%al, %%cl\n" /* cl = this.op */
Chris@4 263
Chris@4 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@4 265 " jz .L_test_for_second_level_dist\n"
Chris@4 266 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 267 " jz .L_check_dist_one\n"
Chris@4 268
Chris@4 269 ".L_add_bits_to_dist:\n"
Chris@4 270 " subb %%cl, %%bl\n"
Chris@4 271 " xorl %%eax, %%eax\n"
Chris@4 272 " incl %%eax\n"
Chris@4 273 " shll %%cl, %%eax\n"
Chris@4 274 " decl %%eax\n" /* (1 << op) - 1 */
Chris@4 275 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 276 " shrq %%cl, %%rdx\n"
Chris@4 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
Chris@4 278
Chris@4 279 ".L_check_window:\n"
Chris@4 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
Chris@4 281 " movq %%rdi, %%rax\n"
Chris@4 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
Chris@4 283
Chris@4 284 " cmpl %%r15d, %%eax\n"
Chris@4 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@4 286
Chris@4 287 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@4 288 " movq %%rdi, %%rsi\n"
Chris@4 289 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 290
Chris@4 291 " sarl %%ecx\n"
Chris@4 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@4 293
Chris@4 294 " rep movsw\n"
Chris@4 295 " movb (%%rsi), %%al\n"
Chris@4 296 " movb %%al, (%%rdi)\n"
Chris@4 297 " incq %%rdi\n"
Chris@4 298
Chris@4 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@4 300 " jmp .L_while_test\n"
Chris@4 301
Chris@4 302 ".L_copy_two:\n"
Chris@4 303 " rep movsw\n"
Chris@4 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@4 305 " jmp .L_while_test\n"
Chris@4 306
Chris@4 307 ".align 32,0x90\n"
Chris@4 308 ".L_check_dist_one:\n"
Chris@4 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
Chris@4 310 " jne .L_check_window\n"
Chris@4 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
Chris@4 312 " je .L_check_window\n"
Chris@4 313
Chris@4 314 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@4 315 " movb -1(%%rdi), %%al\n"
Chris@4 316 " movb %%al, %%ah\n"
Chris@4 317
Chris@4 318 " sarl %%ecx\n"
Chris@4 319 " jnc .L_set_two\n"
Chris@4 320 " movb %%al, (%%rdi)\n"
Chris@4 321 " incq %%rdi\n"
Chris@4 322
Chris@4 323 ".L_set_two:\n"
Chris@4 324 " rep stosw\n"
Chris@4 325 " jmp .L_while_test\n"
Chris@4 326
Chris@4 327 ".align 32,0x90\n"
Chris@4 328 ".L_test_for_second_level_length:\n"
Chris@4 329 " testb $64, %%al\n"
Chris@4 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@4 331
Chris@4 332 " xorl %%eax, %%eax\n"
Chris@4 333 " incl %%eax\n"
Chris@4 334 " shll %%cl, %%eax\n"
Chris@4 335 " decl %%eax\n"
Chris@4 336 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 337 " addl %%r14d, %%eax\n" /* eax += len */
Chris@4 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 339 " jmp .L_dolen\n"
Chris@4 340
Chris@4 341 ".align 32,0x90\n"
Chris@4 342 ".L_test_for_second_level_dist:\n"
Chris@4 343 " testb $64, %%al\n"
Chris@4 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@4 345
Chris@4 346 " xorl %%eax, %%eax\n"
Chris@4 347 " incl %%eax\n"
Chris@4 348 " shll %%cl, %%eax\n"
Chris@4 349 " decl %%eax\n"
Chris@4 350 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 351 " addl %%r15d, %%eax\n" /* eax += dist */
Chris@4 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 353 " jmp .L_dodist\n"
Chris@4 354
Chris@4 355 ".align 32,0x90\n"
Chris@4 356 ".L_clip_window:\n"
Chris@4 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
Chris@4 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
Chris@4 359 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@4 360
Chris@4 361 " cmpl %%r15d, %%eax\n"
Chris@4 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@4 363
Chris@4 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
Chris@4 365 " cmpl $0, 96(%%rsp)\n"
Chris@4 366 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@4 367
Chris@4 368 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@4 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@4 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
Chris@4 371
Chris@4 372 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 373 " cmpl %%ecx, %%r14d\n"
Chris@4 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 375
Chris@4 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@4 377 " rep movsb\n"
Chris@4 378 " movq %%rdi, %%rsi\n"
Chris@4 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
Chris@4 380 " jmp .L_do_copy\n"
Chris@4 381
Chris@4 382 ".align 32,0x90\n"
Chris@4 383 ".L_wrap_around_window:\n"
Chris@4 384 " movl 96(%%rsp), %%eax\n" /* eax = write */
Chris@4 385 " cmpl %%eax, %%ecx\n"
Chris@4 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@4 387
Chris@4 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
Chris@4 389 " addq 56(%%rsp), %%rsi\n" /* from += window */
Chris@4 390 " addq %%rax, %%rsi\n" /* from += write */
Chris@4 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
Chris@4 392 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@4 393
Chris@4 394 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 395 " cmpl %%ecx, %%eax\n"
Chris@4 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 397
Chris@4 398 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 399 " rep movsb\n"
Chris@4 400 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@4 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
Chris@4 402 " cmpl %%ecx, %%eax\n"
Chris@4 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 404
Chris@4 405 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 406 " rep movsb\n"
Chris@4 407 " movq %%rdi, %%rsi\n"
Chris@4 408 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 409 " jmp .L_do_copy\n"
Chris@4 410
Chris@4 411 ".align 32,0x90\n"
Chris@4 412 ".L_contiguous_in_window:\n"
Chris@4 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
Chris@4 414 " addq %%rax, %%rsi\n"
Chris@4 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
Chris@4 416
Chris@4 417 " movl %%r14d, %%eax\n" /* eax = len */
Chris@4 418 " cmpl %%ecx, %%eax\n"
Chris@4 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 420
Chris@4 421 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 422 " rep movsb\n"
Chris@4 423 " movq %%rdi, %%rsi\n"
Chris@4 424 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@4 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 426
Chris@4 427 ".align 32,0x90\n"
Chris@4 428 ".L_do_copy:\n"
Chris@4 429 " movl %%eax, %%ecx\n" /* ecx = len */
Chris@4 430 " rep movsb\n"
Chris@4 431
Chris@4 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
Chris@4 433 " jmp .L_while_test\n"
Chris@4 434
Chris@4 435 ".L_test_for_end_of_block:\n"
Chris@4 436 " testb $32, %%al\n"
Chris@4 437 " jz .L_invalid_literal_length_code\n"
Chris@4 438 " movl $1, 116(%%rsp)\n"
Chris@4 439 " jmp .L_break_loop_with_status\n"
Chris@4 440
Chris@4 441 ".L_invalid_literal_length_code:\n"
Chris@4 442 " movl $2, 116(%%rsp)\n"
Chris@4 443 " jmp .L_break_loop_with_status\n"
Chris@4 444
Chris@4 445 ".L_invalid_distance_code:\n"
Chris@4 446 " movl $3, 116(%%rsp)\n"
Chris@4 447 " jmp .L_break_loop_with_status\n"
Chris@4 448
Chris@4 449 ".L_invalid_distance_too_far:\n"
Chris@4 450 " movl $4, 116(%%rsp)\n"
Chris@4 451 " jmp .L_break_loop_with_status\n"
Chris@4 452
Chris@4 453 ".L_break_loop:\n"
Chris@4 454 " movl $0, 116(%%rsp)\n"
Chris@4 455
Chris@4 456 ".L_break_loop_with_status:\n"
Chris@4 457 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 458 " movq %%rsi, 16(%%rsp)\n" /* in */
Chris@4 459 " movq %%rdi, 32(%%rsp)\n" /* out */
Chris@4 460 " movl %%ebx, 88(%%rsp)\n" /* bits */
Chris@4 461 " movq %%rdx, 80(%%rsp)\n" /* hold */
Chris@4 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
Chris@4 463 " movq 8(%%rsp), %%rbp\n"
Chris@4 464 " movq %%rax, %%rsp\n"
Chris@4 465 :
Chris@4 466 : "m" (ar)
Chris@4 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
Chris@4 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
Chris@4 469 );
Chris@4 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
Chris@4 471 __asm__ __volatile__ (
Chris@4 472 " leal %0, %%eax\n"
Chris@4 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
Chris@4 474 " movl %%ebp, 4(%%eax)\n"
Chris@4 475 " movl %%eax, %%esp\n"
Chris@4 476 " movl 8(%%esp), %%esi\n" /* esi = in */
Chris@4 477 " movl 16(%%esp), %%edi\n" /* edi = out */
Chris@4 478 " movl 40(%%esp), %%edx\n" /* edx = hold */
Chris@4 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
Chris@4 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 481
Chris@4 482 " cld\n"
Chris@4 483 " jmp .L_do_loop\n"
Chris@4 484
Chris@4 485 ".align 32,0x90\n"
Chris@4 486 ".L_while_test:\n"
Chris@4 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
Chris@4 488 " jbe .L_break_loop\n"
Chris@4 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
Chris@4 490 " jbe .L_break_loop\n"
Chris@4 491
Chris@4 492 ".L_do_loop:\n"
Chris@4 493 " cmpb $15, %%bl\n"
Chris@4 494 " ja .L_get_length_code\n" /* if (15 < bits) */
Chris@4 495
Chris@4 496 " xorl %%eax, %%eax\n"
Chris@4 497 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 499 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 500 " shll %%cl, %%eax\n"
Chris@4 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 502
Chris@4 503 ".L_get_length_code:\n"
Chris@4 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
Chris@4 505 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@4 507
Chris@4 508 ".L_dolen:\n"
Chris@4 509 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@4 510 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@4 512
Chris@4 513 " testb %%al, %%al\n"
Chris@4 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@4 515
Chris@4 516 " shrl $16, %%eax\n" /* output this.val char */
Chris@4 517 " stosb\n"
Chris@4 518 " jmp .L_while_test\n"
Chris@4 519
Chris@4 520 ".align 32,0x90\n"
Chris@4 521 ".L_test_for_length_base:\n"
Chris@4 522 " movl %%eax, %%ecx\n" /* len = this */
Chris@4 523 " shrl $16, %%ecx\n" /* len = this.val */
Chris@4 524 " movl %%ecx, 64(%%esp)\n" /* save len */
Chris@4 525 " movb %%al, %%cl\n"
Chris@4 526
Chris@4 527 " testb $16, %%al\n"
Chris@4 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@4 529 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 530 " jz .L_decode_distance\n" /* if (!op) */
Chris@4 531 " cmpb %%cl, %%bl\n"
Chris@4 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
Chris@4 533
Chris@4 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@4 535 " xorl %%eax, %%eax\n"
Chris@4 536 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 538 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 539 " shll %%cl, %%eax\n"
Chris@4 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 541 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@4 542
Chris@4 543 ".L_add_bits_to_len:\n"
Chris@4 544 " subb %%cl, %%bl\n"
Chris@4 545 " xorl %%eax, %%eax\n"
Chris@4 546 " incl %%eax\n"
Chris@4 547 " shll %%cl, %%eax\n"
Chris@4 548 " decl %%eax\n"
Chris@4 549 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 550 " shrl %%cl, %%edx\n"
Chris@4 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
Chris@4 552
Chris@4 553 ".L_decode_distance:\n"
Chris@4 554 " cmpb $15, %%bl\n"
Chris@4 555 " ja .L_get_distance_code\n" /* if (15 < bits) */
Chris@4 556
Chris@4 557 " xorl %%eax, %%eax\n"
Chris@4 558 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 560 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 561 " shll %%cl, %%eax\n"
Chris@4 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 563
Chris@4 564 ".L_get_distance_code:\n"
Chris@4 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
Chris@4 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@4 567 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
Chris@4 569
Chris@4 570 ".L_dodist:\n"
Chris@4 571 " movl %%eax, %%ebp\n" /* dist = this */
Chris@4 572 " shrl $16, %%ebp\n" /* dist = this.val */
Chris@4 573 " movb %%ah, %%cl\n"
Chris@4 574 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@4 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@4 576 " movb %%al, %%cl\n" /* cl = this.op */
Chris@4 577
Chris@4 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@4 579 " jz .L_test_for_second_level_dist\n"
Chris@4 580 " andb $15, %%cl\n" /* op &= 15 */
Chris@4 581 " jz .L_check_dist_one\n"
Chris@4 582 " cmpb %%cl, %%bl\n"
Chris@4 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
Chris@4 584
Chris@4 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@4 586 " xorl %%eax, %%eax\n"
Chris@4 587 " lodsw\n" /* al = *(ushort *)in++ */
Chris@4 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@4 589 " addb $16, %%bl\n" /* bits += 16 */
Chris@4 590 " shll %%cl, %%eax\n"
Chris@4 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@4 592 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@4 593
Chris@4 594 ".L_add_bits_to_dist:\n"
Chris@4 595 " subb %%cl, %%bl\n"
Chris@4 596 " xorl %%eax, %%eax\n"
Chris@4 597 " incl %%eax\n"
Chris@4 598 " shll %%cl, %%eax\n"
Chris@4 599 " decl %%eax\n" /* (1 << op) - 1 */
Chris@4 600 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 601 " shrl %%cl, %%edx\n"
Chris@4 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
Chris@4 603
Chris@4 604 ".L_check_window:\n"
Chris@4 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
Chris@4 606 " movl %%edi, %%eax\n"
Chris@4 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
Chris@4 608
Chris@4 609 " cmpl %%ebp, %%eax\n"
Chris@4 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@4 611
Chris@4 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@4 613 " movl %%edi, %%esi\n"
Chris@4 614 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 615
Chris@4 616 " sarl %%ecx\n"
Chris@4 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@4 618
Chris@4 619 " rep movsw\n"
Chris@4 620 " movb (%%esi), %%al\n"
Chris@4 621 " movb %%al, (%%edi)\n"
Chris@4 622 " incl %%edi\n"
Chris@4 623
Chris@4 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 626 " jmp .L_while_test\n"
Chris@4 627
Chris@4 628 ".L_copy_two:\n"
Chris@4 629 " rep movsw\n"
Chris@4 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 632 " jmp .L_while_test\n"
Chris@4 633
Chris@4 634 ".align 32,0x90\n"
Chris@4 635 ".L_check_dist_one:\n"
Chris@4 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
Chris@4 637 " jne .L_check_window\n"
Chris@4 638 " cmpl %%edi, 20(%%esp)\n"
Chris@4 639 " je .L_check_window\n" /* out == beg, if outside window */
Chris@4 640
Chris@4 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@4 642 " movb -1(%%edi), %%al\n"
Chris@4 643 " movb %%al, %%ah\n"
Chris@4 644
Chris@4 645 " sarl %%ecx\n"
Chris@4 646 " jnc .L_set_two\n"
Chris@4 647 " movb %%al, (%%edi)\n"
Chris@4 648 " incl %%edi\n"
Chris@4 649
Chris@4 650 ".L_set_two:\n"
Chris@4 651 " rep stosw\n"
Chris@4 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 653 " jmp .L_while_test\n"
Chris@4 654
Chris@4 655 ".align 32,0x90\n"
Chris@4 656 ".L_test_for_second_level_length:\n"
Chris@4 657 " testb $64, %%al\n"
Chris@4 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@4 659
Chris@4 660 " xorl %%eax, %%eax\n"
Chris@4 661 " incl %%eax\n"
Chris@4 662 " shll %%cl, %%eax\n"
Chris@4 663 " decl %%eax\n"
Chris@4 664 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 665 " addl 64(%%esp), %%eax\n" /* eax += len */
Chris@4 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 667 " jmp .L_dolen\n"
Chris@4 668
Chris@4 669 ".align 32,0x90\n"
Chris@4 670 ".L_test_for_second_level_dist:\n"
Chris@4 671 " testb $64, %%al\n"
Chris@4 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@4 673
Chris@4 674 " xorl %%eax, %%eax\n"
Chris@4 675 " incl %%eax\n"
Chris@4 676 " shll %%cl, %%eax\n"
Chris@4 677 " decl %%eax\n"
Chris@4 678 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@4 679 " addl %%ebp, %%eax\n" /* eax += dist */
Chris@4 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@4 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 682 " jmp .L_dodist\n"
Chris@4 683
Chris@4 684 ".align 32,0x90\n"
Chris@4 685 ".L_clip_window:\n"
Chris@4 686 " movl %%eax, %%ecx\n"
Chris@4 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
Chris@4 688 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@4 689 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@4 690
Chris@4 691 " cmpl %%ebp, %%eax\n"
Chris@4 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@4 693
Chris@4 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
Chris@4 695 " cmpl $0, 52(%%esp)\n"
Chris@4 696 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@4 697
Chris@4 698 " subl %%ecx, %%eax\n"
Chris@4 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
Chris@4 700
Chris@4 701 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 702 " cmpl %%ecx, %%eax\n"
Chris@4 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 704
Chris@4 705 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 706 " rep movsb\n"
Chris@4 707 " movl %%edi, %%esi\n"
Chris@4 708 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 709 " jmp .L_do_copy\n"
Chris@4 710
Chris@4 711 ".align 32,0x90\n"
Chris@4 712 ".L_wrap_around_window:\n"
Chris@4 713 " movl 52(%%esp), %%eax\n" /* eax = write */
Chris@4 714 " cmpl %%eax, %%ecx\n"
Chris@4 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@4 716
Chris@4 717 " addl 48(%%esp), %%esi\n" /* from += wsize */
Chris@4 718 " addl %%eax, %%esi\n" /* from += write */
Chris@4 719 " subl %%ecx, %%esi\n" /* from -= nbytes */
Chris@4 720 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@4 721
Chris@4 722 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 723 " cmpl %%ecx, %%eax\n"
Chris@4 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 725
Chris@4 726 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 727 " rep movsb\n"
Chris@4 728 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@4 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
Chris@4 730 " cmpl %%ecx, %%eax\n"
Chris@4 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 732
Chris@4 733 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 734 " rep movsb\n"
Chris@4 735 " movl %%edi, %%esi\n"
Chris@4 736 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 737 " jmp .L_do_copy\n"
Chris@4 738
Chris@4 739 ".align 32,0x90\n"
Chris@4 740 ".L_contiguous_in_window:\n"
Chris@4 741 " addl %%eax, %%esi\n"
Chris@4 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
Chris@4 743
Chris@4 744 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@4 745 " cmpl %%ecx, %%eax\n"
Chris@4 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 747
Chris@4 748 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@4 749 " rep movsb\n"
Chris@4 750 " movl %%edi, %%esi\n"
Chris@4 751 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@4 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@4 753
Chris@4 754 ".align 32,0x90\n"
Chris@4 755 ".L_do_copy:\n"
Chris@4 756 " movl %%eax, %%ecx\n"
Chris@4 757 " rep movsb\n"
Chris@4 758
Chris@4 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@4 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@4 761 " jmp .L_while_test\n"
Chris@4 762
Chris@4 763 ".L_test_for_end_of_block:\n"
Chris@4 764 " testb $32, %%al\n"
Chris@4 765 " jz .L_invalid_literal_length_code\n"
Chris@4 766 " movl $1, 72(%%esp)\n"
Chris@4 767 " jmp .L_break_loop_with_status\n"
Chris@4 768
Chris@4 769 ".L_invalid_literal_length_code:\n"
Chris@4 770 " movl $2, 72(%%esp)\n"
Chris@4 771 " jmp .L_break_loop_with_status\n"
Chris@4 772
Chris@4 773 ".L_invalid_distance_code:\n"
Chris@4 774 " movl $3, 72(%%esp)\n"
Chris@4 775 " jmp .L_break_loop_with_status\n"
Chris@4 776
Chris@4 777 ".L_invalid_distance_too_far:\n"
Chris@4 778 " movl 8(%%esp), %%esi\n"
Chris@4 779 " movl $4, 72(%%esp)\n"
Chris@4 780 " jmp .L_break_loop_with_status\n"
Chris@4 781
Chris@4 782 ".L_break_loop:\n"
Chris@4 783 " movl $0, 72(%%esp)\n"
Chris@4 784
Chris@4 785 ".L_break_loop_with_status:\n"
Chris@4 786 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 787 " movl %%esi, 8(%%esp)\n" /* save in */
Chris@4 788 " movl %%edi, 16(%%esp)\n" /* save out */
Chris@4 789 " movl %%ebx, 44(%%esp)\n" /* save bits */
Chris@4 790 " movl %%edx, 40(%%esp)\n" /* save hold */
Chris@4 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
Chris@4 792 " movl (%%esp), %%esp\n"
Chris@4 793 :
Chris@4 794 : "m" (ar)
Chris@4 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
Chris@4 796 );
Chris@4 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
Chris@4 798 __asm {
Chris@4 799 lea eax, ar
Chris@4 800 mov [eax], esp /* save esp, ebp */
Chris@4 801 mov [eax+4], ebp
Chris@4 802 mov esp, eax
Chris@4 803 mov esi, [esp+8] /* esi = in */
Chris@4 804 mov edi, [esp+16] /* edi = out */
Chris@4 805 mov edx, [esp+40] /* edx = hold */
Chris@4 806 mov ebx, [esp+44] /* ebx = bits */
Chris@4 807 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 808
Chris@4 809 cld
Chris@4 810 jmp L_do_loop
Chris@4 811
Chris@4 812 ALIGN 4
Chris@4 813 L_while_test:
Chris@4 814 cmp [esp+24], edi
Chris@4 815 jbe L_break_loop
Chris@4 816 cmp [esp+12], esi
Chris@4 817 jbe L_break_loop
Chris@4 818
Chris@4 819 L_do_loop:
Chris@4 820 cmp bl, 15
Chris@4 821 ja L_get_length_code /* if (15 < bits) */
Chris@4 822
Chris@4 823 xor eax, eax
Chris@4 824 lodsw /* al = *(ushort *)in++ */
Chris@4 825 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 826 add bl, 16 /* bits += 16 */
Chris@4 827 shl eax, cl
Chris@4 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 829
Chris@4 830 L_get_length_code:
Chris@4 831 mov eax, [esp+56] /* eax = lmask */
Chris@4 832 and eax, edx /* eax &= hold */
Chris@4 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
Chris@4 834
Chris@4 835 L_dolen:
Chris@4 836 mov cl, ah /* cl = this.bits */
Chris@4 837 sub bl, ah /* bits -= this.bits */
Chris@4 838 shr edx, cl /* hold >>= this.bits */
Chris@4 839
Chris@4 840 test al, al
Chris@4 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
Chris@4 842
Chris@4 843 shr eax, 16 /* output this.val char */
Chris@4 844 stosb
Chris@4 845 jmp L_while_test
Chris@4 846
Chris@4 847 ALIGN 4
Chris@4 848 L_test_for_length_base:
Chris@4 849 mov ecx, eax /* len = this */
Chris@4 850 shr ecx, 16 /* len = this.val */
Chris@4 851 mov [esp+64], ecx /* save len */
Chris@4 852 mov cl, al
Chris@4 853
Chris@4 854 test al, 16
Chris@4 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
Chris@4 856 and cl, 15 /* op &= 15 */
Chris@4 857 jz L_decode_distance /* if (!op) */
Chris@4 858 cmp bl, cl
Chris@4 859 jae L_add_bits_to_len /* if (op <= bits) */
Chris@4 860
Chris@4 861 mov ch, cl /* stash op in ch, freeing cl */
Chris@4 862 xor eax, eax
Chris@4 863 lodsw /* al = *(ushort *)in++ */
Chris@4 864 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 865 add bl, 16 /* bits += 16 */
Chris@4 866 shl eax, cl
Chris@4 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 868 mov cl, ch /* move op back to ecx */
Chris@4 869
Chris@4 870 L_add_bits_to_len:
Chris@4 871 sub bl, cl
Chris@4 872 xor eax, eax
Chris@4 873 inc eax
Chris@4 874 shl eax, cl
Chris@4 875 dec eax
Chris@4 876 and eax, edx /* eax &= hold */
Chris@4 877 shr edx, cl
Chris@4 878 add [esp+64], eax /* len += hold & mask[op] */
Chris@4 879
Chris@4 880 L_decode_distance:
Chris@4 881 cmp bl, 15
Chris@4 882 ja L_get_distance_code /* if (15 < bits) */
Chris@4 883
Chris@4 884 xor eax, eax
Chris@4 885 lodsw /* al = *(ushort *)in++ */
Chris@4 886 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 887 add bl, 16 /* bits += 16 */
Chris@4 888 shl eax, cl
Chris@4 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 890
Chris@4 891 L_get_distance_code:
Chris@4 892 mov eax, [esp+60] /* eax = dmask */
Chris@4 893 mov ecx, [esp+36] /* ecx = dcode */
Chris@4 894 and eax, edx /* eax &= hold */
Chris@4 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
Chris@4 896
Chris@4 897 L_dodist:
Chris@4 898 mov ebp, eax /* dist = this */
Chris@4 899 shr ebp, 16 /* dist = this.val */
Chris@4 900 mov cl, ah
Chris@4 901 sub bl, ah /* bits -= this.bits */
Chris@4 902 shr edx, cl /* hold >>= this.bits */
Chris@4 903 mov cl, al /* cl = this.op */
Chris@4 904
Chris@4 905 test al, 16 /* if ((op & 16) == 0) */
Chris@4 906 jz L_test_for_second_level_dist
Chris@4 907 and cl, 15 /* op &= 15 */
Chris@4 908 jz L_check_dist_one
Chris@4 909 cmp bl, cl
Chris@4 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
Chris@4 911
Chris@4 912 mov ch, cl /* stash op in ch, freeing cl */
Chris@4 913 xor eax, eax
Chris@4 914 lodsw /* al = *(ushort *)in++ */
Chris@4 915 mov cl, bl /* cl = bits, needs it for shifting */
Chris@4 916 add bl, 16 /* bits += 16 */
Chris@4 917 shl eax, cl
Chris@4 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@4 919 mov cl, ch /* move op back to ecx */
Chris@4 920
Chris@4 921 L_add_bits_to_dist:
Chris@4 922 sub bl, cl
Chris@4 923 xor eax, eax
Chris@4 924 inc eax
Chris@4 925 shl eax, cl
Chris@4 926 dec eax /* (1 << op) - 1 */
Chris@4 927 and eax, edx /* eax &= hold */
Chris@4 928 shr edx, cl
Chris@4 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
Chris@4 930
Chris@4 931 L_check_window:
Chris@4 932 mov [esp+8], esi /* save in so from can use it's reg */
Chris@4 933 mov eax, edi
Chris@4 934 sub eax, [esp+20] /* nbytes = out - beg */
Chris@4 935
Chris@4 936 cmp eax, ebp
Chris@4 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
Chris@4 938
Chris@4 939 mov ecx, [esp+64] /* ecx = len */
Chris@4 940 mov esi, edi
Chris@4 941 sub esi, ebp /* from = out - dist */
Chris@4 942
Chris@4 943 sar ecx, 1
Chris@4 944 jnc L_copy_two
Chris@4 945
Chris@4 946 rep movsw
Chris@4 947 mov al, [esi]
Chris@4 948 mov [edi], al
Chris@4 949 inc edi
Chris@4 950
Chris@4 951 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 952 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 953 jmp L_while_test
Chris@4 954
Chris@4 955 L_copy_two:
Chris@4 956 rep movsw
Chris@4 957 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 958 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 959 jmp L_while_test
Chris@4 960
Chris@4 961 ALIGN 4
Chris@4 962 L_check_dist_one:
Chris@4 963 cmp ebp, 1 /* if dist 1, is a memset */
Chris@4 964 jne L_check_window
Chris@4 965 cmp [esp+20], edi
Chris@4 966 je L_check_window /* out == beg, if outside window */
Chris@4 967
Chris@4 968 mov ecx, [esp+64] /* ecx = len */
Chris@4 969 mov al, [edi-1]
Chris@4 970 mov ah, al
Chris@4 971
Chris@4 972 sar ecx, 1
Chris@4 973 jnc L_set_two
Chris@4 974 mov [edi], al /* memset out with from[-1] */
Chris@4 975 inc edi
Chris@4 976
Chris@4 977 L_set_two:
Chris@4 978 rep stosw
Chris@4 979 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 980 jmp L_while_test
Chris@4 981
Chris@4 982 ALIGN 4
Chris@4 983 L_test_for_second_level_length:
Chris@4 984 test al, 64
Chris@4 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
Chris@4 986
Chris@4 987 xor eax, eax
Chris@4 988 inc eax
Chris@4 989 shl eax, cl
Chris@4 990 dec eax
Chris@4 991 and eax, edx /* eax &= hold */
Chris@4 992 add eax, [esp+64] /* eax += len */
Chris@4 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
Chris@4 994 jmp L_dolen
Chris@4 995
Chris@4 996 ALIGN 4
Chris@4 997 L_test_for_second_level_dist:
Chris@4 998 test al, 64
Chris@4 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
Chris@4 1000
Chris@4 1001 xor eax, eax
Chris@4 1002 inc eax
Chris@4 1003 shl eax, cl
Chris@4 1004 dec eax
Chris@4 1005 and eax, edx /* eax &= hold */
Chris@4 1006 add eax, ebp /* eax += dist */
Chris@4 1007 mov ecx, [esp+36] /* ecx = dcode */
Chris@4 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
Chris@4 1009 jmp L_dodist
Chris@4 1010
Chris@4 1011 ALIGN 4
Chris@4 1012 L_clip_window:
Chris@4 1013 mov ecx, eax
Chris@4 1014 mov eax, [esp+48] /* eax = wsize */
Chris@4 1015 neg ecx /* nbytes = -nbytes */
Chris@4 1016 mov esi, [esp+28] /* from = window */
Chris@4 1017
Chris@4 1018 cmp eax, ebp
Chris@4 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
Chris@4 1020
Chris@4 1021 add ecx, ebp /* nbytes = dist - nbytes */
Chris@4 1022 cmp dword ptr [esp+52], 0
Chris@4 1023 jne L_wrap_around_window /* if (write != 0) */
Chris@4 1024
Chris@4 1025 sub eax, ecx
Chris@4 1026 add esi, eax /* from += wsize - nbytes */
Chris@4 1027
Chris@4 1028 mov eax, [esp+64] /* eax = len */
Chris@4 1029 cmp eax, ecx
Chris@4 1030 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1031
Chris@4 1032 sub eax, ecx /* len -= nbytes */
Chris@4 1033 rep movsb
Chris@4 1034 mov esi, edi
Chris@4 1035 sub esi, ebp /* from = out - dist */
Chris@4 1036 jmp L_do_copy
Chris@4 1037
Chris@4 1038 ALIGN 4
Chris@4 1039 L_wrap_around_window:
Chris@4 1040 mov eax, [esp+52] /* eax = write */
Chris@4 1041 cmp ecx, eax
Chris@4 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
Chris@4 1043
Chris@4 1044 add esi, [esp+48] /* from += wsize */
Chris@4 1045 add esi, eax /* from += write */
Chris@4 1046 sub esi, ecx /* from -= nbytes */
Chris@4 1047 sub ecx, eax /* nbytes -= write */
Chris@4 1048
Chris@4 1049 mov eax, [esp+64] /* eax = len */
Chris@4 1050 cmp eax, ecx
Chris@4 1051 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1052
Chris@4 1053 sub eax, ecx /* len -= nbytes */
Chris@4 1054 rep movsb
Chris@4 1055 mov esi, [esp+28] /* from = window */
Chris@4 1056 mov ecx, [esp+52] /* nbytes = write */
Chris@4 1057 cmp eax, ecx
Chris@4 1058 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1059
Chris@4 1060 sub eax, ecx /* len -= nbytes */
Chris@4 1061 rep movsb
Chris@4 1062 mov esi, edi
Chris@4 1063 sub esi, ebp /* from = out - dist */
Chris@4 1064 jmp L_do_copy
Chris@4 1065
Chris@4 1066 ALIGN 4
Chris@4 1067 L_contiguous_in_window:
Chris@4 1068 add esi, eax
Chris@4 1069 sub esi, ecx /* from += write - nbytes */
Chris@4 1070
Chris@4 1071 mov eax, [esp+64] /* eax = len */
Chris@4 1072 cmp eax, ecx
Chris@4 1073 jbe L_do_copy /* if (nbytes >= len) */
Chris@4 1074
Chris@4 1075 sub eax, ecx /* len -= nbytes */
Chris@4 1076 rep movsb
Chris@4 1077 mov esi, edi
Chris@4 1078 sub esi, ebp /* from = out - dist */
Chris@4 1079 jmp L_do_copy
Chris@4 1080
Chris@4 1081 ALIGN 4
Chris@4 1082 L_do_copy:
Chris@4 1083 mov ecx, eax
Chris@4 1084 rep movsb
Chris@4 1085
Chris@4 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@4 1087 mov ebp, [esp+32] /* ebp = lcode */
Chris@4 1088 jmp L_while_test
Chris@4 1089
Chris@4 1090 L_test_for_end_of_block:
Chris@4 1091 test al, 32
Chris@4 1092 jz L_invalid_literal_length_code
Chris@4 1093 mov dword ptr [esp+72], 1
Chris@4 1094 jmp L_break_loop_with_status
Chris@4 1095
Chris@4 1096 L_invalid_literal_length_code:
Chris@4 1097 mov dword ptr [esp+72], 2
Chris@4 1098 jmp L_break_loop_with_status
Chris@4 1099
Chris@4 1100 L_invalid_distance_code:
Chris@4 1101 mov dword ptr [esp+72], 3
Chris@4 1102 jmp L_break_loop_with_status
Chris@4 1103
Chris@4 1104 L_invalid_distance_too_far:
Chris@4 1105 mov esi, [esp+4]
Chris@4 1106 mov dword ptr [esp+72], 4
Chris@4 1107 jmp L_break_loop_with_status
Chris@4 1108
Chris@4 1109 L_break_loop:
Chris@4 1110 mov dword ptr [esp+72], 0
Chris@4 1111
Chris@4 1112 L_break_loop_with_status:
Chris@4 1113 /* put in, out, bits, and hold back into ar and pop esp */
Chris@4 1114 mov [esp+8], esi /* save in */
Chris@4 1115 mov [esp+16], edi /* save out */
Chris@4 1116 mov [esp+44], ebx /* save bits */
Chris@4 1117 mov [esp+40], edx /* save hold */
Chris@4 1118 mov ebp, [esp+4] /* restore esp, ebp */
Chris@4 1119 mov esp, [esp]
Chris@4 1120 }
Chris@4 1121 #else
Chris@4 1122 #error "x86 architecture not defined"
Chris@4 1123 #endif
Chris@4 1124
Chris@4 1125 if (ar.status > 1) {
Chris@4 1126 if (ar.status == 2)
Chris@4 1127 strm->msg = "invalid literal/length code";
Chris@4 1128 else if (ar.status == 3)
Chris@4 1129 strm->msg = "invalid distance code";
Chris@4 1130 else
Chris@4 1131 strm->msg = "invalid distance too far back";
Chris@4 1132 state->mode = BAD;
Chris@4 1133 }
Chris@4 1134 else if ( ar.status == 1 ) {
Chris@4 1135 state->mode = TYPE;
Chris@4 1136 }
Chris@4 1137
Chris@4 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
Chris@4 1139 ar.len = ar.bits >> 3;
Chris@4 1140 ar.in -= ar.len;
Chris@4 1141 ar.bits -= ar.len << 3;
Chris@4 1142 ar.hold &= (1U << ar.bits) - 1;
Chris@4 1143
Chris@4 1144 /* update state and return */
Chris@4 1145 strm->next_in = ar.in;
Chris@4 1146 strm->next_out = ar.out;
Chris@4 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
Chris@4 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
Chris@4 1149 PAD_AVAIL_IN - (ar.in - ar.last));
Chris@4 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
Chris@4 1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
Chris@4 1152 PAD_AVAIL_OUT - (ar.out - ar.end));
Chris@4 1153 state->hold = ar.hold;
Chris@4 1154 state->bits = ar.bits;
Chris@4 1155 return;
Chris@4 1156 }
Chris@4 1157