annotate src/zlib-1.2.8/contrib/inflate86/inffas86.c @ 79:91c729825bca pa_catalina

Update build for AUDIO_COMPONENT_FIX
author Chris Cannam
date Wed, 30 Oct 2019 12:40:34 +0000
parents 5ea0608b923f
children
rev   line source
Chris@43 1 /* inffas86.c is a hand tuned assembler version of
Chris@43 2 *
Chris@43 3 * inffast.c -- fast decoding
Chris@43 4 * Copyright (C) 1995-2003 Mark Adler
Chris@43 5 * For conditions of distribution and use, see copyright notice in zlib.h
Chris@43 6 *
Chris@43 7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@43 8 * Please use the copyright conditions above.
Chris@43 9 *
Chris@43 10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
Chris@43 11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
Chris@43 12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
Chris@43 13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
Chris@43 14 * from http://fedora.linux.duke.edu/fc1_x86_64
Chris@43 15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
Chris@43 16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
Chris@43 17 * when decompressing mozilla-source-1.3.tar.gz.
Chris@43 18 *
Chris@43 19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
Chris@43 20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
Chris@43 21 * the moment. I have successfully compiled and tested this code with gcc2.96,
Chris@43 22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
Chris@43 23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
Chris@43 24 * enabled. I will attempt to merge the MMX code into this version. Newer
Chris@43 25 * versions of this and inffast.S can be found at
Chris@43 26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
Chris@43 27 */
Chris@43 28
Chris@43 29 #include "zutil.h"
Chris@43 30 #include "inftrees.h"
Chris@43 31 #include "inflate.h"
Chris@43 32 #include "inffast.h"
Chris@43 33
Chris@43 34 /* Mark Adler's comments from inffast.c: */
Chris@43 35
Chris@43 36 /*
Chris@43 37 Decode literal, length, and distance codes and write out the resulting
Chris@43 38 literal and match bytes until either not enough input or output is
Chris@43 39 available, an end-of-block is encountered, or a data error is encountered.
Chris@43 40 When large enough input and output buffers are supplied to inflate(), for
Chris@43 41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
Chris@43 42 inflate execution time is spent in this routine.
Chris@43 43
Chris@43 44 Entry assumptions:
Chris@43 45
Chris@43 46 state->mode == LEN
Chris@43 47 strm->avail_in >= 6
Chris@43 48 strm->avail_out >= 258
Chris@43 49 start >= strm->avail_out
Chris@43 50 state->bits < 8
Chris@43 51
Chris@43 52 On return, state->mode is one of:
Chris@43 53
Chris@43 54 LEN -- ran out of enough output space or enough available input
Chris@43 55 TYPE -- reached end of block code, inflate() to interpret next block
Chris@43 56 BAD -- error in block data
Chris@43 57
Chris@43 58 Notes:
Chris@43 59
Chris@43 60 - The maximum input bits used by a length/distance pair is 15 bits for the
Chris@43 61 length code, 5 bits for the length extra, 15 bits for the distance code,
Chris@43 62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
Chris@43 63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
Chris@43 64 checking for available input while decoding.
Chris@43 65
Chris@43 66 - The maximum bytes that a single length/distance pair can output is 258
Chris@43 67 bytes, which is the maximum length that can be coded. inflate_fast()
Chris@43 68 requires strm->avail_out >= 258 for each loop to avoid checking for
Chris@43 69 output space.
Chris@43 70 */
Chris@43 71 void inflate_fast(strm, start)
Chris@43 72 z_streamp strm;
Chris@43 73 unsigned start; /* inflate()'s starting value for strm->avail_out */
Chris@43 74 {
Chris@43 75 struct inflate_state FAR *state;
Chris@43 76 struct inffast_ar {
Chris@43 77 /* 64 32 x86 x86_64 */
Chris@43 78 /* ar offset register */
Chris@43 79 /* 0 0 */ void *esp; /* esp save */
Chris@43 80 /* 8 4 */ void *ebp; /* ebp save */
Chris@43 81 /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
Chris@43 82 /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
Chris@43 83 /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
Chris@43 84 /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
Chris@43 85 /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
Chris@43 86 /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
Chris@43 87 /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
Chris@43 88 /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
Chris@43 89 /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
Chris@43 90 /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
Chris@43 91 /* 92 48 */ unsigned wsize; /* window size */
Chris@43 92 /* 96 52 */ unsigned write; /* window write index */
Chris@43 93 /*100 56 */ unsigned lmask; /* r12 mask for lcode */
Chris@43 94 /*104 60 */ unsigned dmask; /* r13 mask for dcode */
Chris@43 95 /*108 64 */ unsigned len; /* r14 match length */
Chris@43 96 /*112 68 */ unsigned dist; /* r15 match distance */
Chris@43 97 /*116 72 */ unsigned status; /* set when state chng*/
Chris@43 98 } ar;
Chris@43 99
Chris@43 100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@43 101 #define PAD_AVAIL_IN 6
Chris@43 102 #define PAD_AVAIL_OUT 258
Chris@43 103 #else
Chris@43 104 #define PAD_AVAIL_IN 5
Chris@43 105 #define PAD_AVAIL_OUT 257
Chris@43 106 #endif
Chris@43 107
Chris@43 108 /* copy state to local variables */
Chris@43 109 state = (struct inflate_state FAR *)strm->state;
Chris@43 110 ar.in = strm->next_in;
Chris@43 111 ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
Chris@43 112 ar.out = strm->next_out;
Chris@43 113 ar.beg = ar.out - (start - strm->avail_out);
Chris@43 114 ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
Chris@43 115 ar.wsize = state->wsize;
Chris@43 116 ar.write = state->wnext;
Chris@43 117 ar.window = state->window;
Chris@43 118 ar.hold = state->hold;
Chris@43 119 ar.bits = state->bits;
Chris@43 120 ar.lcode = state->lencode;
Chris@43 121 ar.dcode = state->distcode;
Chris@43 122 ar.lmask = (1U << state->lenbits) - 1;
Chris@43 123 ar.dmask = (1U << state->distbits) - 1;
Chris@43 124
Chris@43 125 /* decode literals and length/distances until end-of-block or not enough
Chris@43 126 input data or output space */
Chris@43 127
Chris@43 128 /* align in on 1/2 hold size boundary */
Chris@43 129 while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
Chris@43 130 ar.hold += (unsigned long)*ar.in++ << ar.bits;
Chris@43 131 ar.bits += 8;
Chris@43 132 }
Chris@43 133
Chris@43 134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
Chris@43 135 __asm__ __volatile__ (
Chris@43 136 " leaq %0, %%rax\n"
Chris@43 137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
Chris@43 138 " movq %%rsp, (%%rax)\n"
Chris@43 139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
Chris@43 140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
Chris@43 141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
Chris@43 142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
Chris@43 143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
Chris@43 144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
Chris@43 145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
Chris@43 146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
Chris@43 147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
Chris@43 148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
Chris@43 149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
Chris@43 150 /* r14d = len */
Chris@43 151 /* r15d = dist */
Chris@43 152 " cld\n"
Chris@43 153 " cmpq %%rdi, %%r10\n"
Chris@43 154 " je .L_one_time\n" /* if only one decode left */
Chris@43 155 " cmpq %%rsi, %%r9\n"
Chris@43 156 " je .L_one_time\n"
Chris@43 157 " jmp .L_do_loop\n"
Chris@43 158
Chris@43 159 ".L_one_time:\n"
Chris@43 160 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@43 161 " cmpb $32, %%bl\n"
Chris@43 162 " ja .L_get_length_code_one_time\n"
Chris@43 163
Chris@43 164 " lodsl\n" /* eax = *(uint *)in++ */
Chris@43 165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 166 " addb $32, %%bl\n" /* bits += 32 */
Chris@43 167 " shlq %%cl, %%rax\n"
Chris@43 168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@43 169 " jmp .L_get_length_code_one_time\n"
Chris@43 170
Chris@43 171 ".align 32,0x90\n"
Chris@43 172 ".L_while_test:\n"
Chris@43 173 " cmpq %%rdi, %%r10\n"
Chris@43 174 " jbe .L_break_loop\n"
Chris@43 175 " cmpq %%rsi, %%r9\n"
Chris@43 176 " jbe .L_break_loop\n"
Chris@43 177
Chris@43 178 ".L_do_loop:\n"
Chris@43 179 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@43 180 " cmpb $32, %%bl\n"
Chris@43 181 " ja .L_get_length_code\n" /* if (32 < bits) */
Chris@43 182
Chris@43 183 " lodsl\n" /* eax = *(uint *)in++ */
Chris@43 184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 185 " addb $32, %%bl\n" /* bits += 32 */
Chris@43 186 " shlq %%cl, %%rax\n"
Chris@43 187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@43 188
Chris@43 189 ".L_get_length_code:\n"
Chris@43 190 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@43 191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@43 192
Chris@43 193 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@43 194 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@43 195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@43 196
Chris@43 197 " testb %%al, %%al\n"
Chris@43 198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@43 199
Chris@43 200 " movq %%r12, %%r8\n" /* r8 = lmask */
Chris@43 201 " shrl $16, %%eax\n" /* output this.val char */
Chris@43 202 " stosb\n"
Chris@43 203
Chris@43 204 ".L_get_length_code_one_time:\n"
Chris@43 205 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@43 206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@43 207
Chris@43 208 ".L_dolen:\n"
Chris@43 209 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@43 210 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@43 211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@43 212
Chris@43 213 " testb %%al, %%al\n"
Chris@43 214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@43 215
Chris@43 216 " shrl $16, %%eax\n" /* output this.val char */
Chris@43 217 " stosb\n"
Chris@43 218 " jmp .L_while_test\n"
Chris@43 219
Chris@43 220 ".align 32,0x90\n"
Chris@43 221 ".L_test_for_length_base:\n"
Chris@43 222 " movl %%eax, %%r14d\n" /* len = this */
Chris@43 223 " shrl $16, %%r14d\n" /* len = this.val */
Chris@43 224 " movb %%al, %%cl\n"
Chris@43 225
Chris@43 226 " testb $16, %%al\n"
Chris@43 227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@43 228 " andb $15, %%cl\n" /* op &= 15 */
Chris@43 229 " jz .L_decode_distance\n" /* if (!op) */
Chris@43 230
Chris@43 231 ".L_add_bits_to_len:\n"
Chris@43 232 " subb %%cl, %%bl\n"
Chris@43 233 " xorl %%eax, %%eax\n"
Chris@43 234 " incl %%eax\n"
Chris@43 235 " shll %%cl, %%eax\n"
Chris@43 236 " decl %%eax\n"
Chris@43 237 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 238 " shrq %%cl, %%rdx\n"
Chris@43 239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
Chris@43 240
Chris@43 241 ".L_decode_distance:\n"
Chris@43 242 " movq %%r13, %%r8\n" /* r8 = dmask */
Chris@43 243 " cmpb $32, %%bl\n"
Chris@43 244 " ja .L_get_distance_code\n" /* if (32 < bits) */
Chris@43 245
Chris@43 246 " lodsl\n" /* eax = *(uint *)in++ */
Chris@43 247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 248 " addb $32, %%bl\n" /* bits += 32 */
Chris@43 249 " shlq %%cl, %%rax\n"
Chris@43 250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
Chris@43 251
Chris@43 252 ".L_get_distance_code:\n"
Chris@43 253 " andq %%rdx, %%r8\n" /* r8 &= hold */
Chris@43 254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
Chris@43 255
Chris@43 256 ".L_dodist:\n"
Chris@43 257 " movl %%eax, %%r15d\n" /* dist = this */
Chris@43 258 " shrl $16, %%r15d\n" /* dist = this.val */
Chris@43 259 " movb %%ah, %%cl\n"
Chris@43 260 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@43 261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
Chris@43 262 " movb %%al, %%cl\n" /* cl = this.op */
Chris@43 263
Chris@43 264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@43 265 " jz .L_test_for_second_level_dist\n"
Chris@43 266 " andb $15, %%cl\n" /* op &= 15 */
Chris@43 267 " jz .L_check_dist_one\n"
Chris@43 268
Chris@43 269 ".L_add_bits_to_dist:\n"
Chris@43 270 " subb %%cl, %%bl\n"
Chris@43 271 " xorl %%eax, %%eax\n"
Chris@43 272 " incl %%eax\n"
Chris@43 273 " shll %%cl, %%eax\n"
Chris@43 274 " decl %%eax\n" /* (1 << op) - 1 */
Chris@43 275 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 276 " shrq %%cl, %%rdx\n"
Chris@43 277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
Chris@43 278
Chris@43 279 ".L_check_window:\n"
Chris@43 280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
Chris@43 281 " movq %%rdi, %%rax\n"
Chris@43 282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
Chris@43 283
Chris@43 284 " cmpl %%r15d, %%eax\n"
Chris@43 285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@43 286
Chris@43 287 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@43 288 " movq %%rdi, %%rsi\n"
Chris@43 289 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@43 290
Chris@43 291 " sarl %%ecx\n"
Chris@43 292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@43 293
Chris@43 294 " rep movsw\n"
Chris@43 295 " movb (%%rsi), %%al\n"
Chris@43 296 " movb %%al, (%%rdi)\n"
Chris@43 297 " incq %%rdi\n"
Chris@43 298
Chris@43 299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@43 300 " jmp .L_while_test\n"
Chris@43 301
Chris@43 302 ".L_copy_two:\n"
Chris@43 303 " rep movsw\n"
Chris@43 304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
Chris@43 305 " jmp .L_while_test\n"
Chris@43 306
Chris@43 307 ".align 32,0x90\n"
Chris@43 308 ".L_check_dist_one:\n"
Chris@43 309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
Chris@43 310 " jne .L_check_window\n"
Chris@43 311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
Chris@43 312 " je .L_check_window\n"
Chris@43 313
Chris@43 314 " movl %%r14d, %%ecx\n" /* ecx = len */
Chris@43 315 " movb -1(%%rdi), %%al\n"
Chris@43 316 " movb %%al, %%ah\n"
Chris@43 317
Chris@43 318 " sarl %%ecx\n"
Chris@43 319 " jnc .L_set_two\n"
Chris@43 320 " movb %%al, (%%rdi)\n"
Chris@43 321 " incq %%rdi\n"
Chris@43 322
Chris@43 323 ".L_set_two:\n"
Chris@43 324 " rep stosw\n"
Chris@43 325 " jmp .L_while_test\n"
Chris@43 326
Chris@43 327 ".align 32,0x90\n"
Chris@43 328 ".L_test_for_second_level_length:\n"
Chris@43 329 " testb $64, %%al\n"
Chris@43 330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@43 331
Chris@43 332 " xorl %%eax, %%eax\n"
Chris@43 333 " incl %%eax\n"
Chris@43 334 " shll %%cl, %%eax\n"
Chris@43 335 " decl %%eax\n"
Chris@43 336 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 337 " addl %%r14d, %%eax\n" /* eax += len */
Chris@43 338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@43 339 " jmp .L_dolen\n"
Chris@43 340
Chris@43 341 ".align 32,0x90\n"
Chris@43 342 ".L_test_for_second_level_dist:\n"
Chris@43 343 " testb $64, %%al\n"
Chris@43 344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@43 345
Chris@43 346 " xorl %%eax, %%eax\n"
Chris@43 347 " incl %%eax\n"
Chris@43 348 " shll %%cl, %%eax\n"
Chris@43 349 " decl %%eax\n"
Chris@43 350 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 351 " addl %%r15d, %%eax\n" /* eax += dist */
Chris@43 352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@43 353 " jmp .L_dodist\n"
Chris@43 354
Chris@43 355 ".align 32,0x90\n"
Chris@43 356 ".L_clip_window:\n"
Chris@43 357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
Chris@43 358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
Chris@43 359 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@43 360
Chris@43 361 " cmpl %%r15d, %%eax\n"
Chris@43 362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@43 363
Chris@43 364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
Chris@43 365 " cmpl $0, 96(%%rsp)\n"
Chris@43 366 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@43 367
Chris@43 368 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@43 369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@43 370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
Chris@43 371
Chris@43 372 " movl %%r14d, %%eax\n" /* eax = len */
Chris@43 373 " cmpl %%ecx, %%r14d\n"
Chris@43 374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 375
Chris@43 376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
Chris@43 377 " rep movsb\n"
Chris@43 378 " movq %%rdi, %%rsi\n"
Chris@43 379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
Chris@43 380 " jmp .L_do_copy\n"
Chris@43 381
Chris@43 382 ".align 32,0x90\n"
Chris@43 383 ".L_wrap_around_window:\n"
Chris@43 384 " movl 96(%%rsp), %%eax\n" /* eax = write */
Chris@43 385 " cmpl %%eax, %%ecx\n"
Chris@43 386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@43 387
Chris@43 388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
Chris@43 389 " addq 56(%%rsp), %%rsi\n" /* from += window */
Chris@43 390 " addq %%rax, %%rsi\n" /* from += write */
Chris@43 391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
Chris@43 392 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@43 393
Chris@43 394 " movl %%r14d, %%eax\n" /* eax = len */
Chris@43 395 " cmpl %%ecx, %%eax\n"
Chris@43 396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 397
Chris@43 398 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 399 " rep movsb\n"
Chris@43 400 " movq 56(%%rsp), %%rsi\n" /* from = window */
Chris@43 401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
Chris@43 402 " cmpl %%ecx, %%eax\n"
Chris@43 403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 404
Chris@43 405 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 406 " rep movsb\n"
Chris@43 407 " movq %%rdi, %%rsi\n"
Chris@43 408 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@43 409 " jmp .L_do_copy\n"
Chris@43 410
Chris@43 411 ".align 32,0x90\n"
Chris@43 412 ".L_contiguous_in_window:\n"
Chris@43 413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
Chris@43 414 " addq %%rax, %%rsi\n"
Chris@43 415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
Chris@43 416
Chris@43 417 " movl %%r14d, %%eax\n" /* eax = len */
Chris@43 418 " cmpl %%ecx, %%eax\n"
Chris@43 419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 420
Chris@43 421 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 422 " rep movsb\n"
Chris@43 423 " movq %%rdi, %%rsi\n"
Chris@43 424 " subq %%r15, %%rsi\n" /* from = out - dist */
Chris@43 425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 426
Chris@43 427 ".align 32,0x90\n"
Chris@43 428 ".L_do_copy:\n"
Chris@43 429 " movl %%eax, %%ecx\n" /* ecx = len */
Chris@43 430 " rep movsb\n"
Chris@43 431
Chris@43 432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
Chris@43 433 " jmp .L_while_test\n"
Chris@43 434
Chris@43 435 ".L_test_for_end_of_block:\n"
Chris@43 436 " testb $32, %%al\n"
Chris@43 437 " jz .L_invalid_literal_length_code\n"
Chris@43 438 " movl $1, 116(%%rsp)\n"
Chris@43 439 " jmp .L_break_loop_with_status\n"
Chris@43 440
Chris@43 441 ".L_invalid_literal_length_code:\n"
Chris@43 442 " movl $2, 116(%%rsp)\n"
Chris@43 443 " jmp .L_break_loop_with_status\n"
Chris@43 444
Chris@43 445 ".L_invalid_distance_code:\n"
Chris@43 446 " movl $3, 116(%%rsp)\n"
Chris@43 447 " jmp .L_break_loop_with_status\n"
Chris@43 448
Chris@43 449 ".L_invalid_distance_too_far:\n"
Chris@43 450 " movl $4, 116(%%rsp)\n"
Chris@43 451 " jmp .L_break_loop_with_status\n"
Chris@43 452
Chris@43 453 ".L_break_loop:\n"
Chris@43 454 " movl $0, 116(%%rsp)\n"
Chris@43 455
Chris@43 456 ".L_break_loop_with_status:\n"
Chris@43 457 /* put in, out, bits, and hold back into ar and pop esp */
Chris@43 458 " movq %%rsi, 16(%%rsp)\n" /* in */
Chris@43 459 " movq %%rdi, 32(%%rsp)\n" /* out */
Chris@43 460 " movl %%ebx, 88(%%rsp)\n" /* bits */
Chris@43 461 " movq %%rdx, 80(%%rsp)\n" /* hold */
Chris@43 462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
Chris@43 463 " movq 8(%%rsp), %%rbp\n"
Chris@43 464 " movq %%rax, %%rsp\n"
Chris@43 465 :
Chris@43 466 : "m" (ar)
Chris@43 467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
Chris@43 468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
Chris@43 469 );
Chris@43 470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
Chris@43 471 __asm__ __volatile__ (
Chris@43 472 " leal %0, %%eax\n"
Chris@43 473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
Chris@43 474 " movl %%ebp, 4(%%eax)\n"
Chris@43 475 " movl %%eax, %%esp\n"
Chris@43 476 " movl 8(%%esp), %%esi\n" /* esi = in */
Chris@43 477 " movl 16(%%esp), %%edi\n" /* edi = out */
Chris@43 478 " movl 40(%%esp), %%edx\n" /* edx = hold */
Chris@43 479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
Chris@43 480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@43 481
Chris@43 482 " cld\n"
Chris@43 483 " jmp .L_do_loop\n"
Chris@43 484
Chris@43 485 ".align 32,0x90\n"
Chris@43 486 ".L_while_test:\n"
Chris@43 487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
Chris@43 488 " jbe .L_break_loop\n"
Chris@43 489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
Chris@43 490 " jbe .L_break_loop\n"
Chris@43 491
Chris@43 492 ".L_do_loop:\n"
Chris@43 493 " cmpb $15, %%bl\n"
Chris@43 494 " ja .L_get_length_code\n" /* if (15 < bits) */
Chris@43 495
Chris@43 496 " xorl %%eax, %%eax\n"
Chris@43 497 " lodsw\n" /* al = *(ushort *)in++ */
Chris@43 498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 499 " addb $16, %%bl\n" /* bits += 16 */
Chris@43 500 " shll %%cl, %%eax\n"
Chris@43 501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@43 502
Chris@43 503 ".L_get_length_code:\n"
Chris@43 504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
Chris@43 505 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
Chris@43 507
Chris@43 508 ".L_dolen:\n"
Chris@43 509 " movb %%ah, %%cl\n" /* cl = this.bits */
Chris@43 510 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@43 511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@43 512
Chris@43 513 " testb %%al, %%al\n"
Chris@43 514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
Chris@43 515
Chris@43 516 " shrl $16, %%eax\n" /* output this.val char */
Chris@43 517 " stosb\n"
Chris@43 518 " jmp .L_while_test\n"
Chris@43 519
Chris@43 520 ".align 32,0x90\n"
Chris@43 521 ".L_test_for_length_base:\n"
Chris@43 522 " movl %%eax, %%ecx\n" /* len = this */
Chris@43 523 " shrl $16, %%ecx\n" /* len = this.val */
Chris@43 524 " movl %%ecx, 64(%%esp)\n" /* save len */
Chris@43 525 " movb %%al, %%cl\n"
Chris@43 526
Chris@43 527 " testb $16, %%al\n"
Chris@43 528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
Chris@43 529 " andb $15, %%cl\n" /* op &= 15 */
Chris@43 530 " jz .L_decode_distance\n" /* if (!op) */
Chris@43 531 " cmpb %%cl, %%bl\n"
Chris@43 532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
Chris@43 533
Chris@43 534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@43 535 " xorl %%eax, %%eax\n"
Chris@43 536 " lodsw\n" /* al = *(ushort *)in++ */
Chris@43 537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 538 " addb $16, %%bl\n" /* bits += 16 */
Chris@43 539 " shll %%cl, %%eax\n"
Chris@43 540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@43 541 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@43 542
Chris@43 543 ".L_add_bits_to_len:\n"
Chris@43 544 " subb %%cl, %%bl\n"
Chris@43 545 " xorl %%eax, %%eax\n"
Chris@43 546 " incl %%eax\n"
Chris@43 547 " shll %%cl, %%eax\n"
Chris@43 548 " decl %%eax\n"
Chris@43 549 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 550 " shrl %%cl, %%edx\n"
Chris@43 551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
Chris@43 552
Chris@43 553 ".L_decode_distance:\n"
Chris@43 554 " cmpb $15, %%bl\n"
Chris@43 555 " ja .L_get_distance_code\n" /* if (15 < bits) */
Chris@43 556
Chris@43 557 " xorl %%eax, %%eax\n"
Chris@43 558 " lodsw\n" /* al = *(ushort *)in++ */
Chris@43 559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 560 " addb $16, %%bl\n" /* bits += 16 */
Chris@43 561 " shll %%cl, %%eax\n"
Chris@43 562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@43 563
Chris@43 564 ".L_get_distance_code:\n"
Chris@43 565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
Chris@43 566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@43 567 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
Chris@43 569
Chris@43 570 ".L_dodist:\n"
Chris@43 571 " movl %%eax, %%ebp\n" /* dist = this */
Chris@43 572 " shrl $16, %%ebp\n" /* dist = this.val */
Chris@43 573 " movb %%ah, %%cl\n"
Chris@43 574 " subb %%ah, %%bl\n" /* bits -= this.bits */
Chris@43 575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
Chris@43 576 " movb %%al, %%cl\n" /* cl = this.op */
Chris@43 577
Chris@43 578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
Chris@43 579 " jz .L_test_for_second_level_dist\n"
Chris@43 580 " andb $15, %%cl\n" /* op &= 15 */
Chris@43 581 " jz .L_check_dist_one\n"
Chris@43 582 " cmpb %%cl, %%bl\n"
Chris@43 583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
Chris@43 584
Chris@43 585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
Chris@43 586 " xorl %%eax, %%eax\n"
Chris@43 587 " lodsw\n" /* al = *(ushort *)in++ */
Chris@43 588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
Chris@43 589 " addb $16, %%bl\n" /* bits += 16 */
Chris@43 590 " shll %%cl, %%eax\n"
Chris@43 591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
Chris@43 592 " movb %%ch, %%cl\n" /* move op back to ecx */
Chris@43 593
Chris@43 594 ".L_add_bits_to_dist:\n"
Chris@43 595 " subb %%cl, %%bl\n"
Chris@43 596 " xorl %%eax, %%eax\n"
Chris@43 597 " incl %%eax\n"
Chris@43 598 " shll %%cl, %%eax\n"
Chris@43 599 " decl %%eax\n" /* (1 << op) - 1 */
Chris@43 600 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 601 " shrl %%cl, %%edx\n"
Chris@43 602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
Chris@43 603
Chris@43 604 ".L_check_window:\n"
Chris@43 605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
Chris@43 606 " movl %%edi, %%eax\n"
Chris@43 607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
Chris@43 608
Chris@43 609 " cmpl %%ebp, %%eax\n"
Chris@43 610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
Chris@43 611
Chris@43 612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@43 613 " movl %%edi, %%esi\n"
Chris@43 614 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@43 615
Chris@43 616 " sarl %%ecx\n"
Chris@43 617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
Chris@43 618
Chris@43 619 " rep movsw\n"
Chris@43 620 " movb (%%esi), %%al\n"
Chris@43 621 " movb %%al, (%%edi)\n"
Chris@43 622 " incl %%edi\n"
Chris@43 623
Chris@43 624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@43 625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@43 626 " jmp .L_while_test\n"
Chris@43 627
Chris@43 628 ".L_copy_two:\n"
Chris@43 629 " rep movsw\n"
Chris@43 630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@43 631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@43 632 " jmp .L_while_test\n"
Chris@43 633
Chris@43 634 ".align 32,0x90\n"
Chris@43 635 ".L_check_dist_one:\n"
Chris@43 636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
Chris@43 637 " jne .L_check_window\n"
Chris@43 638 " cmpl %%edi, 20(%%esp)\n"
Chris@43 639 " je .L_check_window\n" /* out == beg, if outside window */
Chris@43 640
Chris@43 641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
Chris@43 642 " movb -1(%%edi), %%al\n"
Chris@43 643 " movb %%al, %%ah\n"
Chris@43 644
Chris@43 645 " sarl %%ecx\n"
Chris@43 646 " jnc .L_set_two\n"
Chris@43 647 " movb %%al, (%%edi)\n"
Chris@43 648 " incl %%edi\n"
Chris@43 649
Chris@43 650 ".L_set_two:\n"
Chris@43 651 " rep stosw\n"
Chris@43 652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@43 653 " jmp .L_while_test\n"
Chris@43 654
Chris@43 655 ".align 32,0x90\n"
Chris@43 656 ".L_test_for_second_level_length:\n"
Chris@43 657 " testb $64, %%al\n"
Chris@43 658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
Chris@43 659
Chris@43 660 " xorl %%eax, %%eax\n"
Chris@43 661 " incl %%eax\n"
Chris@43 662 " shll %%cl, %%eax\n"
Chris@43 663 " decl %%eax\n"
Chris@43 664 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 665 " addl 64(%%esp), %%eax\n" /* eax += len */
Chris@43 666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
Chris@43 667 " jmp .L_dolen\n"
Chris@43 668
Chris@43 669 ".align 32,0x90\n"
Chris@43 670 ".L_test_for_second_level_dist:\n"
Chris@43 671 " testb $64, %%al\n"
Chris@43 672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
Chris@43 673
Chris@43 674 " xorl %%eax, %%eax\n"
Chris@43 675 " incl %%eax\n"
Chris@43 676 " shll %%cl, %%eax\n"
Chris@43 677 " decl %%eax\n"
Chris@43 678 " andl %%edx, %%eax\n" /* eax &= hold */
Chris@43 679 " addl %%ebp, %%eax\n" /* eax += dist */
Chris@43 680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
Chris@43 681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
Chris@43 682 " jmp .L_dodist\n"
Chris@43 683
Chris@43 684 ".align 32,0x90\n"
Chris@43 685 ".L_clip_window:\n"
Chris@43 686 " movl %%eax, %%ecx\n"
Chris@43 687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
Chris@43 688 " negl %%ecx\n" /* nbytes = -nbytes */
Chris@43 689 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@43 690
Chris@43 691 " cmpl %%ebp, %%eax\n"
Chris@43 692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
Chris@43 693
Chris@43 694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
Chris@43 695 " cmpl $0, 52(%%esp)\n"
Chris@43 696 " jne .L_wrap_around_window\n" /* if (write != 0) */
Chris@43 697
Chris@43 698 " subl %%ecx, %%eax\n"
Chris@43 699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
Chris@43 700
Chris@43 701 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@43 702 " cmpl %%ecx, %%eax\n"
Chris@43 703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 704
Chris@43 705 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 706 " rep movsb\n"
Chris@43 707 " movl %%edi, %%esi\n"
Chris@43 708 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@43 709 " jmp .L_do_copy\n"
Chris@43 710
Chris@43 711 ".align 32,0x90\n"
Chris@43 712 ".L_wrap_around_window:\n"
Chris@43 713 " movl 52(%%esp), %%eax\n" /* eax = write */
Chris@43 714 " cmpl %%eax, %%ecx\n"
Chris@43 715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
Chris@43 716
Chris@43 717 " addl 48(%%esp), %%esi\n" /* from += wsize */
Chris@43 718 " addl %%eax, %%esi\n" /* from += write */
Chris@43 719 " subl %%ecx, %%esi\n" /* from -= nbytes */
Chris@43 720 " subl %%eax, %%ecx\n" /* nbytes -= write */
Chris@43 721
Chris@43 722 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@43 723 " cmpl %%ecx, %%eax\n"
Chris@43 724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 725
Chris@43 726 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 727 " rep movsb\n"
Chris@43 728 " movl 28(%%esp), %%esi\n" /* from = window */
Chris@43 729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
Chris@43 730 " cmpl %%ecx, %%eax\n"
Chris@43 731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 732
Chris@43 733 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 734 " rep movsb\n"
Chris@43 735 " movl %%edi, %%esi\n"
Chris@43 736 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@43 737 " jmp .L_do_copy\n"
Chris@43 738
Chris@43 739 ".align 32,0x90\n"
Chris@43 740 ".L_contiguous_in_window:\n"
Chris@43 741 " addl %%eax, %%esi\n"
Chris@43 742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
Chris@43 743
Chris@43 744 " movl 64(%%esp), %%eax\n" /* eax = len */
Chris@43 745 " cmpl %%ecx, %%eax\n"
Chris@43 746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 747
Chris@43 748 " subl %%ecx, %%eax\n" /* len -= nbytes */
Chris@43 749 " rep movsb\n"
Chris@43 750 " movl %%edi, %%esi\n"
Chris@43 751 " subl %%ebp, %%esi\n" /* from = out - dist */
Chris@43 752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
Chris@43 753
Chris@43 754 ".align 32,0x90\n"
Chris@43 755 ".L_do_copy:\n"
Chris@43 756 " movl %%eax, %%ecx\n"
Chris@43 757 " rep movsb\n"
Chris@43 758
Chris@43 759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
Chris@43 760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
Chris@43 761 " jmp .L_while_test\n"
Chris@43 762
Chris@43 763 ".L_test_for_end_of_block:\n"
Chris@43 764 " testb $32, %%al\n"
Chris@43 765 " jz .L_invalid_literal_length_code\n"
Chris@43 766 " movl $1, 72(%%esp)\n"
Chris@43 767 " jmp .L_break_loop_with_status\n"
Chris@43 768
Chris@43 769 ".L_invalid_literal_length_code:\n"
Chris@43 770 " movl $2, 72(%%esp)\n"
Chris@43 771 " jmp .L_break_loop_with_status\n"
Chris@43 772
Chris@43 773 ".L_invalid_distance_code:\n"
Chris@43 774 " movl $3, 72(%%esp)\n"
Chris@43 775 " jmp .L_break_loop_with_status\n"
Chris@43 776
Chris@43 777 ".L_invalid_distance_too_far:\n"
Chris@43 778 " movl 8(%%esp), %%esi\n"
Chris@43 779 " movl $4, 72(%%esp)\n"
Chris@43 780 " jmp .L_break_loop_with_status\n"
Chris@43 781
Chris@43 782 ".L_break_loop:\n"
Chris@43 783 " movl $0, 72(%%esp)\n"
Chris@43 784
Chris@43 785 ".L_break_loop_with_status:\n"
Chris@43 786 /* put in, out, bits, and hold back into ar and pop esp */
Chris@43 787 " movl %%esi, 8(%%esp)\n" /* save in */
Chris@43 788 " movl %%edi, 16(%%esp)\n" /* save out */
Chris@43 789 " movl %%ebx, 44(%%esp)\n" /* save bits */
Chris@43 790 " movl %%edx, 40(%%esp)\n" /* save hold */
Chris@43 791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
Chris@43 792 " movl (%%esp), %%esp\n"
Chris@43 793 :
Chris@43 794 : "m" (ar)
Chris@43 795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
Chris@43 796 );
Chris@43 797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
Chris@43 798 __asm {
Chris@43 799 lea eax, ar
Chris@43 800 mov [eax], esp /* save esp, ebp */
Chris@43 801 mov [eax+4], ebp
Chris@43 802 mov esp, eax
Chris@43 803 mov esi, [esp+8] /* esi = in */
Chris@43 804 mov edi, [esp+16] /* edi = out */
Chris@43 805 mov edx, [esp+40] /* edx = hold */
Chris@43 806 mov ebx, [esp+44] /* ebx = bits */
Chris@43 807 mov ebp, [esp+32] /* ebp = lcode */
Chris@43 808
Chris@43 809 cld
Chris@43 810 jmp L_do_loop
Chris@43 811
Chris@43 812 ALIGN 4
Chris@43 813 L_while_test:
Chris@43 814 cmp [esp+24], edi
Chris@43 815 jbe L_break_loop
Chris@43 816 cmp [esp+12], esi
Chris@43 817 jbe L_break_loop
Chris@43 818
Chris@43 819 L_do_loop:
Chris@43 820 cmp bl, 15
Chris@43 821 ja L_get_length_code /* if (15 < bits) */
Chris@43 822
Chris@43 823 xor eax, eax
Chris@43 824 lodsw /* al = *(ushort *)in++ */
Chris@43 825 mov cl, bl /* cl = bits, needs it for shifting */
Chris@43 826 add bl, 16 /* bits += 16 */
Chris@43 827 shl eax, cl
Chris@43 828 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@43 829
Chris@43 830 L_get_length_code:
Chris@43 831 mov eax, [esp+56] /* eax = lmask */
Chris@43 832 and eax, edx /* eax &= hold */
Chris@43 833 mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
Chris@43 834
Chris@43 835 L_dolen:
Chris@43 836 mov cl, ah /* cl = this.bits */
Chris@43 837 sub bl, ah /* bits -= this.bits */
Chris@43 838 shr edx, cl /* hold >>= this.bits */
Chris@43 839
Chris@43 840 test al, al
Chris@43 841 jnz L_test_for_length_base /* if (op != 0) 45.7% */
Chris@43 842
Chris@43 843 shr eax, 16 /* output this.val char */
Chris@43 844 stosb
Chris@43 845 jmp L_while_test
Chris@43 846
Chris@43 847 ALIGN 4
Chris@43 848 L_test_for_length_base:
Chris@43 849 mov ecx, eax /* len = this */
Chris@43 850 shr ecx, 16 /* len = this.val */
Chris@43 851 mov [esp+64], ecx /* save len */
Chris@43 852 mov cl, al
Chris@43 853
Chris@43 854 test al, 16
Chris@43 855 jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
Chris@43 856 and cl, 15 /* op &= 15 */
Chris@43 857 jz L_decode_distance /* if (!op) */
Chris@43 858 cmp bl, cl
Chris@43 859 jae L_add_bits_to_len /* if (op <= bits) */
Chris@43 860
Chris@43 861 mov ch, cl /* stash op in ch, freeing cl */
Chris@43 862 xor eax, eax
Chris@43 863 lodsw /* al = *(ushort *)in++ */
Chris@43 864 mov cl, bl /* cl = bits, needs it for shifting */
Chris@43 865 add bl, 16 /* bits += 16 */
Chris@43 866 shl eax, cl
Chris@43 867 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@43 868 mov cl, ch /* move op back to ecx */
Chris@43 869
Chris@43 870 L_add_bits_to_len:
Chris@43 871 sub bl, cl
Chris@43 872 xor eax, eax
Chris@43 873 inc eax
Chris@43 874 shl eax, cl
Chris@43 875 dec eax
Chris@43 876 and eax, edx /* eax &= hold */
Chris@43 877 shr edx, cl
Chris@43 878 add [esp+64], eax /* len += hold & mask[op] */
Chris@43 879
Chris@43 880 L_decode_distance:
Chris@43 881 cmp bl, 15
Chris@43 882 ja L_get_distance_code /* if (15 < bits) */
Chris@43 883
Chris@43 884 xor eax, eax
Chris@43 885 lodsw /* al = *(ushort *)in++ */
Chris@43 886 mov cl, bl /* cl = bits, needs it for shifting */
Chris@43 887 add bl, 16 /* bits += 16 */
Chris@43 888 shl eax, cl
Chris@43 889 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@43 890
Chris@43 891 L_get_distance_code:
Chris@43 892 mov eax, [esp+60] /* eax = dmask */
Chris@43 893 mov ecx, [esp+36] /* ecx = dcode */
Chris@43 894 and eax, edx /* eax &= hold */
Chris@43 895 mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
Chris@43 896
Chris@43 897 L_dodist:
Chris@43 898 mov ebp, eax /* dist = this */
Chris@43 899 shr ebp, 16 /* dist = this.val */
Chris@43 900 mov cl, ah
Chris@43 901 sub bl, ah /* bits -= this.bits */
Chris@43 902 shr edx, cl /* hold >>= this.bits */
Chris@43 903 mov cl, al /* cl = this.op */
Chris@43 904
Chris@43 905 test al, 16 /* if ((op & 16) == 0) */
Chris@43 906 jz L_test_for_second_level_dist
Chris@43 907 and cl, 15 /* op &= 15 */
Chris@43 908 jz L_check_dist_one
Chris@43 909 cmp bl, cl
Chris@43 910 jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
Chris@43 911
Chris@43 912 mov ch, cl /* stash op in ch, freeing cl */
Chris@43 913 xor eax, eax
Chris@43 914 lodsw /* al = *(ushort *)in++ */
Chris@43 915 mov cl, bl /* cl = bits, needs it for shifting */
Chris@43 916 add bl, 16 /* bits += 16 */
Chris@43 917 shl eax, cl
Chris@43 918 or edx, eax /* hold |= *((ushort *)in)++ << bits */
Chris@43 919 mov cl, ch /* move op back to ecx */
Chris@43 920
Chris@43 921 L_add_bits_to_dist:
Chris@43 922 sub bl, cl
Chris@43 923 xor eax, eax
Chris@43 924 inc eax
Chris@43 925 shl eax, cl
Chris@43 926 dec eax /* (1 << op) - 1 */
Chris@43 927 and eax, edx /* eax &= hold */
Chris@43 928 shr edx, cl
Chris@43 929 add ebp, eax /* dist += hold & ((1 << op) - 1) */
Chris@43 930
Chris@43 931 L_check_window:
Chris@43 932 mov [esp+8], esi /* save in so from can use it's reg */
Chris@43 933 mov eax, edi
Chris@43 934 sub eax, [esp+20] /* nbytes = out - beg */
Chris@43 935
Chris@43 936 cmp eax, ebp
Chris@43 937 jb L_clip_window /* if (dist > nbytes) 4.2% */
Chris@43 938
Chris@43 939 mov ecx, [esp+64] /* ecx = len */
Chris@43 940 mov esi, edi
Chris@43 941 sub esi, ebp /* from = out - dist */
Chris@43 942
Chris@43 943 sar ecx, 1
Chris@43 944 jnc L_copy_two
Chris@43 945
Chris@43 946 rep movsw
Chris@43 947 mov al, [esi]
Chris@43 948 mov [edi], al
Chris@43 949 inc edi
Chris@43 950
Chris@43 951 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@43 952 mov ebp, [esp+32] /* ebp = lcode */
Chris@43 953 jmp L_while_test
Chris@43 954
Chris@43 955 L_copy_two:
Chris@43 956 rep movsw
Chris@43 957 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@43 958 mov ebp, [esp+32] /* ebp = lcode */
Chris@43 959 jmp L_while_test
Chris@43 960
Chris@43 961 ALIGN 4
Chris@43 962 L_check_dist_one:
Chris@43 963 cmp ebp, 1 /* if dist 1, is a memset */
Chris@43 964 jne L_check_window
Chris@43 965 cmp [esp+20], edi
Chris@43 966 je L_check_window /* out == beg, if outside window */
Chris@43 967
Chris@43 968 mov ecx, [esp+64] /* ecx = len */
Chris@43 969 mov al, [edi-1]
Chris@43 970 mov ah, al
Chris@43 971
Chris@43 972 sar ecx, 1
Chris@43 973 jnc L_set_two
Chris@43 974 mov [edi], al /* memset out with from[-1] */
Chris@43 975 inc edi
Chris@43 976
Chris@43 977 L_set_two:
Chris@43 978 rep stosw
Chris@43 979 mov ebp, [esp+32] /* ebp = lcode */
Chris@43 980 jmp L_while_test
Chris@43 981
Chris@43 982 ALIGN 4
Chris@43 983 L_test_for_second_level_length:
Chris@43 984 test al, 64
Chris@43 985 jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
Chris@43 986
Chris@43 987 xor eax, eax
Chris@43 988 inc eax
Chris@43 989 shl eax, cl
Chris@43 990 dec eax
Chris@43 991 and eax, edx /* eax &= hold */
Chris@43 992 add eax, [esp+64] /* eax += len */
Chris@43 993 mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
Chris@43 994 jmp L_dolen
Chris@43 995
Chris@43 996 ALIGN 4
Chris@43 997 L_test_for_second_level_dist:
Chris@43 998 test al, 64
Chris@43 999 jnz L_invalid_distance_code /* if ((op & 64) != 0) */
Chris@43 1000
Chris@43 1001 xor eax, eax
Chris@43 1002 inc eax
Chris@43 1003 shl eax, cl
Chris@43 1004 dec eax
Chris@43 1005 and eax, edx /* eax &= hold */
Chris@43 1006 add eax, ebp /* eax += dist */
Chris@43 1007 mov ecx, [esp+36] /* ecx = dcode */
Chris@43 1008 mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
Chris@43 1009 jmp L_dodist
Chris@43 1010
Chris@43 1011 ALIGN 4
Chris@43 1012 L_clip_window:
Chris@43 1013 mov ecx, eax
Chris@43 1014 mov eax, [esp+48] /* eax = wsize */
Chris@43 1015 neg ecx /* nbytes = -nbytes */
Chris@43 1016 mov esi, [esp+28] /* from = window */
Chris@43 1017
Chris@43 1018 cmp eax, ebp
Chris@43 1019 jb L_invalid_distance_too_far /* if (dist > wsize) */
Chris@43 1020
Chris@43 1021 add ecx, ebp /* nbytes = dist - nbytes */
Chris@43 1022 cmp dword ptr [esp+52], 0
Chris@43 1023 jne L_wrap_around_window /* if (write != 0) */
Chris@43 1024
Chris@43 1025 sub eax, ecx
Chris@43 1026 add esi, eax /* from += wsize - nbytes */
Chris@43 1027
Chris@43 1028 mov eax, [esp+64] /* eax = len */
Chris@43 1029 cmp eax, ecx
Chris@43 1030 jbe L_do_copy /* if (nbytes >= len) */
Chris@43 1031
Chris@43 1032 sub eax, ecx /* len -= nbytes */
Chris@43 1033 rep movsb
Chris@43 1034 mov esi, edi
Chris@43 1035 sub esi, ebp /* from = out - dist */
Chris@43 1036 jmp L_do_copy
Chris@43 1037
Chris@43 1038 ALIGN 4
Chris@43 1039 L_wrap_around_window:
Chris@43 1040 mov eax, [esp+52] /* eax = write */
Chris@43 1041 cmp ecx, eax
Chris@43 1042 jbe L_contiguous_in_window /* if (write >= nbytes) */
Chris@43 1043
Chris@43 1044 add esi, [esp+48] /* from += wsize */
Chris@43 1045 add esi, eax /* from += write */
Chris@43 1046 sub esi, ecx /* from -= nbytes */
Chris@43 1047 sub ecx, eax /* nbytes -= write */
Chris@43 1048
Chris@43 1049 mov eax, [esp+64] /* eax = len */
Chris@43 1050 cmp eax, ecx
Chris@43 1051 jbe L_do_copy /* if (nbytes >= len) */
Chris@43 1052
Chris@43 1053 sub eax, ecx /* len -= nbytes */
Chris@43 1054 rep movsb
Chris@43 1055 mov esi, [esp+28] /* from = window */
Chris@43 1056 mov ecx, [esp+52] /* nbytes = write */
Chris@43 1057 cmp eax, ecx
Chris@43 1058 jbe L_do_copy /* if (nbytes >= len) */
Chris@43 1059
Chris@43 1060 sub eax, ecx /* len -= nbytes */
Chris@43 1061 rep movsb
Chris@43 1062 mov esi, edi
Chris@43 1063 sub esi, ebp /* from = out - dist */
Chris@43 1064 jmp L_do_copy
Chris@43 1065
Chris@43 1066 ALIGN 4
Chris@43 1067 L_contiguous_in_window:
Chris@43 1068 add esi, eax
Chris@43 1069 sub esi, ecx /* from += write - nbytes */
Chris@43 1070
Chris@43 1071 mov eax, [esp+64] /* eax = len */
Chris@43 1072 cmp eax, ecx
Chris@43 1073 jbe L_do_copy /* if (nbytes >= len) */
Chris@43 1074
Chris@43 1075 sub eax, ecx /* len -= nbytes */
Chris@43 1076 rep movsb
Chris@43 1077 mov esi, edi
Chris@43 1078 sub esi, ebp /* from = out - dist */
Chris@43 1079 jmp L_do_copy
Chris@43 1080
Chris@43 1081 ALIGN 4
Chris@43 1082 L_do_copy:
Chris@43 1083 mov ecx, eax
Chris@43 1084 rep movsb
Chris@43 1085
Chris@43 1086 mov esi, [esp+8] /* move in back to %esi, toss from */
Chris@43 1087 mov ebp, [esp+32] /* ebp = lcode */
Chris@43 1088 jmp L_while_test
Chris@43 1089
Chris@43 1090 L_test_for_end_of_block:
Chris@43 1091 test al, 32
Chris@43 1092 jz L_invalid_literal_length_code
Chris@43 1093 mov dword ptr [esp+72], 1
Chris@43 1094 jmp L_break_loop_with_status
Chris@43 1095
Chris@43 1096 L_invalid_literal_length_code:
Chris@43 1097 mov dword ptr [esp+72], 2
Chris@43 1098 jmp L_break_loop_with_status
Chris@43 1099
Chris@43 1100 L_invalid_distance_code:
Chris@43 1101 mov dword ptr [esp+72], 3
Chris@43 1102 jmp L_break_loop_with_status
Chris@43 1103
Chris@43 1104 L_invalid_distance_too_far:
Chris@43 1105 mov esi, [esp+4]
Chris@43 1106 mov dword ptr [esp+72], 4
Chris@43 1107 jmp L_break_loop_with_status
Chris@43 1108
Chris@43 1109 L_break_loop:
Chris@43 1110 mov dword ptr [esp+72], 0
Chris@43 1111
Chris@43 1112 L_break_loop_with_status:
Chris@43 1113 /* put in, out, bits, and hold back into ar and pop esp */
Chris@43 1114 mov [esp+8], esi /* save in */
Chris@43 1115 mov [esp+16], edi /* save out */
Chris@43 1116 mov [esp+44], ebx /* save bits */
Chris@43 1117 mov [esp+40], edx /* save hold */
Chris@43 1118 mov ebp, [esp+4] /* restore esp, ebp */
Chris@43 1119 mov esp, [esp]
Chris@43 1120 }
Chris@43 1121 #else
Chris@43 1122 #error "x86 architecture not defined"
Chris@43 1123 #endif
Chris@43 1124
Chris@43 1125 if (ar.status > 1) {
Chris@43 1126 if (ar.status == 2)
Chris@43 1127 strm->msg = "invalid literal/length code";
Chris@43 1128 else if (ar.status == 3)
Chris@43 1129 strm->msg = "invalid distance code";
Chris@43 1130 else
Chris@43 1131 strm->msg = "invalid distance too far back";
Chris@43 1132 state->mode = BAD;
Chris@43 1133 }
Chris@43 1134 else if ( ar.status == 1 ) {
Chris@43 1135 state->mode = TYPE;
Chris@43 1136 }
Chris@43 1137
Chris@43 1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
Chris@43 1139 ar.len = ar.bits >> 3;
Chris@43 1140 ar.in -= ar.len;
Chris@43 1141 ar.bits -= ar.len << 3;
Chris@43 1142 ar.hold &= (1U << ar.bits) - 1;
Chris@43 1143
Chris@43 1144 /* update state and return */
Chris@43 1145 strm->next_in = ar.in;
Chris@43 1146 strm->next_out = ar.out;
Chris@43 1147 strm->avail_in = (unsigned)(ar.in < ar.last ?
Chris@43 1148 PAD_AVAIL_IN + (ar.last - ar.in) :
Chris@43 1149 PAD_AVAIL_IN - (ar.in - ar.last));
Chris@43 1150 strm->avail_out = (unsigned)(ar.out < ar.end ?
Chris@43 1151 PAD_AVAIL_OUT + (ar.end - ar.out) :
Chris@43 1152 PAD_AVAIL_OUT - (ar.out - ar.end));
Chris@43 1153 state->hold = ar.hold;
Chris@43 1154 state->bits = ar.bits;
Chris@43 1155 return;
Chris@43 1156 }
Chris@43 1157