annotate src/zlib-1.2.7/contrib/inflate86/inffast.S @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1 /*
Chris@4 2 * inffast.S is a hand tuned assembler version of:
Chris@4 3 *
Chris@4 4 * inffast.c -- fast decoding
Chris@4 5 * Copyright (C) 1995-2003 Mark Adler
Chris@4 6 * For conditions of distribution and use, see copyright notice in zlib.h
Chris@4 7 *
Chris@4 8 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
Chris@4 9 * Please use the copyright conditions above.
Chris@4 10 *
Chris@4 11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
Chris@4 12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
Chris@4 13 * machine, I found that gzip style archives decompressed about 20% faster than
Chris@4 14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
Chris@4 15 * depend on how large of a buffer is used for z_stream.next_in & next_out
Chris@4 16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
Chris@4 17 * stream processing I/O and crc32/addler32. In my case, this routine used
Chris@4 18 * 70% of the cpu time and crc32 used 20%.
Chris@4 19 *
Chris@4 20 * I am confident that this version will work in the general case, but I have
Chris@4 21 * not tested a wide variety of datasets or a wide variety of platforms.
Chris@4 22 *
Chris@4 23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
Chris@4 24 * It should be a runtime flag instead of compile time flag...
Chris@4 25 *
Chris@4 26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
Chris@4 27 * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code
Chris@4 28 * is compiled. Without either option, runtime detection is enabled. Runtime
Chris@4 29 * detection should work on all modern cpus and the recomended algorithm (flip
Chris@4 30 * ID bit on eflags and then use the cpuid instruction) is used in many
Chris@4 31 * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12
Chris@4 32 * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o
Chris@4 33 * inffast.obj generates a COFF object which can then be linked with MSVC++
Chris@4 34 * compiled code. Tested under FreeBSD 4.7 with gcc-2.95.
Chris@4 35 *
Chris@4 36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
Chris@4 37 * slower than compiler generated code). Adjusted cpuid check to use the MMX
Chris@4 38 * code only for Pentiums < P4 until I have more data on the P4. Speed
Chris@4 39 * improvment is only about 15% on the Athlon when compared with code generated
Chris@4 40 * with MSVC++. Not sure yet, but I think the P4 will also be slower using the
Chris@4 41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
Chris@4 42 * have less latency than MMX ops. Added code to buffer the last 11 bytes of
Chris@4 43 * the input stream since the MMX code grabs bits in chunks of 32, which
Chris@4 44 * differs from the inffast.c algorithm. I don't think there would have been
Chris@4 45 * read overruns where a page boundary was crossed (a segfault), but there
Chris@4 46 * could have been overruns when next_in ends on unaligned memory (unintialized
Chris@4 47 * memory read).
Chris@4 48 *
Chris@4 49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C
Chris@4 50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
Chris@4 51 * structure offsets which are hard coded in this file. This was last tested
Chris@4 52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
Chris@4 53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
Chris@4 54 * http://www.charm.net/~christop/zlib/
Chris@4 55 */
Chris@4 56
Chris@4 57
Chris@4 58 /*
Chris@4 59 * if you have underscore linking problems (_inflate_fast undefined), try
Chris@4 60 * using -DGAS_COFF
Chris@4 61 */
Chris@4 62 #if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
Chris@4 63
Chris@4 64 #if defined( WIN32 ) || defined( __CYGWIN__ )
Chris@4 65 #define GAS_COFF /* windows object format */
Chris@4 66 #else
Chris@4 67 #define GAS_ELF
Chris@4 68 #endif
Chris@4 69
Chris@4 70 #endif /* ! GAS_COFF && ! GAS_ELF */
Chris@4 71
Chris@4 72
Chris@4 73 #if defined( GAS_COFF )
Chris@4 74
Chris@4 75 /* coff externals have underscores */
Chris@4 76 #define inflate_fast _inflate_fast
Chris@4 77 #define inflate_fast_use_mmx _inflate_fast_use_mmx
Chris@4 78
Chris@4 79 #endif /* GAS_COFF */
Chris@4 80
Chris@4 81
Chris@4 82 .file "inffast.S"
Chris@4 83
Chris@4 84 .globl inflate_fast
Chris@4 85
Chris@4 86 .text
Chris@4 87 .align 4,0
Chris@4 88 .L_invalid_literal_length_code_msg:
Chris@4 89 .string "invalid literal/length code"
Chris@4 90
Chris@4 91 .align 4,0
Chris@4 92 .L_invalid_distance_code_msg:
Chris@4 93 .string "invalid distance code"
Chris@4 94
Chris@4 95 .align 4,0
Chris@4 96 .L_invalid_distance_too_far_msg:
Chris@4 97 .string "invalid distance too far back"
Chris@4 98
Chris@4 99 #if ! defined( NO_MMX )
Chris@4 100 .align 4,0
Chris@4 101 .L_mask: /* mask[N] = ( 1 << N ) - 1 */
Chris@4 102 .long 0
Chris@4 103 .long 1
Chris@4 104 .long 3
Chris@4 105 .long 7
Chris@4 106 .long 15
Chris@4 107 .long 31
Chris@4 108 .long 63
Chris@4 109 .long 127
Chris@4 110 .long 255
Chris@4 111 .long 511
Chris@4 112 .long 1023
Chris@4 113 .long 2047
Chris@4 114 .long 4095
Chris@4 115 .long 8191
Chris@4 116 .long 16383
Chris@4 117 .long 32767
Chris@4 118 .long 65535
Chris@4 119 .long 131071
Chris@4 120 .long 262143
Chris@4 121 .long 524287
Chris@4 122 .long 1048575
Chris@4 123 .long 2097151
Chris@4 124 .long 4194303
Chris@4 125 .long 8388607
Chris@4 126 .long 16777215
Chris@4 127 .long 33554431
Chris@4 128 .long 67108863
Chris@4 129 .long 134217727
Chris@4 130 .long 268435455
Chris@4 131 .long 536870911
Chris@4 132 .long 1073741823
Chris@4 133 .long 2147483647
Chris@4 134 .long 4294967295
Chris@4 135 #endif /* NO_MMX */
Chris@4 136
Chris@4 137 .text
Chris@4 138
Chris@4 139 /*
Chris@4 140 * struct z_stream offsets, in zlib.h
Chris@4 141 */
Chris@4 142 #define next_in_strm 0 /* strm->next_in */
Chris@4 143 #define avail_in_strm 4 /* strm->avail_in */
Chris@4 144 #define next_out_strm 12 /* strm->next_out */
Chris@4 145 #define avail_out_strm 16 /* strm->avail_out */
Chris@4 146 #define msg_strm 24 /* strm->msg */
Chris@4 147 #define state_strm 28 /* strm->state */
Chris@4 148
Chris@4 149 /*
Chris@4 150 * struct inflate_state offsets, in inflate.h
Chris@4 151 */
Chris@4 152 #define mode_state 0 /* state->mode */
Chris@4 153 #define wsize_state 32 /* state->wsize */
Chris@4 154 #define write_state 40 /* state->write */
Chris@4 155 #define window_state 44 /* state->window */
Chris@4 156 #define hold_state 48 /* state->hold */
Chris@4 157 #define bits_state 52 /* state->bits */
Chris@4 158 #define lencode_state 68 /* state->lencode */
Chris@4 159 #define distcode_state 72 /* state->distcode */
Chris@4 160 #define lenbits_state 76 /* state->lenbits */
Chris@4 161 #define distbits_state 80 /* state->distbits */
Chris@4 162
Chris@4 163 /*
Chris@4 164 * inflate_fast's activation record
Chris@4 165 */
Chris@4 166 #define local_var_size 64 /* how much local space for vars */
Chris@4 167 #define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */
Chris@4 168 #define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */
Chris@4 169
Chris@4 170 /*
Chris@4 171 * offsets for local vars on stack
Chris@4 172 */
Chris@4 173 #define out 60 /* unsigned char* */
Chris@4 174 #define window 56 /* unsigned char* */
Chris@4 175 #define wsize 52 /* unsigned int */
Chris@4 176 #define write 48 /* unsigned int */
Chris@4 177 #define in 44 /* unsigned char* */
Chris@4 178 #define beg 40 /* unsigned char* */
Chris@4 179 #define buf 28 /* char[ 12 ] */
Chris@4 180 #define len 24 /* unsigned int */
Chris@4 181 #define last 20 /* unsigned char* */
Chris@4 182 #define end 16 /* unsigned char* */
Chris@4 183 #define dcode 12 /* code* */
Chris@4 184 #define lcode 8 /* code* */
Chris@4 185 #define dmask 4 /* unsigned int */
Chris@4 186 #define lmask 0 /* unsigned int */
Chris@4 187
Chris@4 188 /*
Chris@4 189 * typedef enum inflate_mode consts, in inflate.h
Chris@4 190 */
Chris@4 191 #define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
Chris@4 192 #define INFLATE_MODE_BAD 26
Chris@4 193
Chris@4 194
Chris@4 195 #if ! defined( USE_MMX ) && ! defined( NO_MMX )
Chris@4 196
Chris@4 197 #define RUN_TIME_MMX
Chris@4 198
Chris@4 199 #define CHECK_MMX 1
Chris@4 200 #define DO_USE_MMX 2
Chris@4 201 #define DONT_USE_MMX 3
Chris@4 202
Chris@4 203 .globl inflate_fast_use_mmx
Chris@4 204
Chris@4 205 .data
Chris@4 206
Chris@4 207 .align 4,0
Chris@4 208 inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
Chris@4 209 .long CHECK_MMX
Chris@4 210
Chris@4 211 #if defined( GAS_ELF )
Chris@4 212 /* elf info */
Chris@4 213 .type inflate_fast_use_mmx,@object
Chris@4 214 .size inflate_fast_use_mmx,4
Chris@4 215 #endif
Chris@4 216
Chris@4 217 #endif /* RUN_TIME_MMX */
Chris@4 218
Chris@4 219 #if defined( GAS_COFF )
Chris@4 220 /* coff info: scl 2 = extern, type 32 = function */
Chris@4 221 .def inflate_fast; .scl 2; .type 32; .endef
Chris@4 222 #endif
Chris@4 223
Chris@4 224 .text
Chris@4 225
Chris@4 226 .align 32,0x90
Chris@4 227 inflate_fast:
Chris@4 228 pushl %edi
Chris@4 229 pushl %esi
Chris@4 230 pushl %ebp
Chris@4 231 pushl %ebx
Chris@4 232 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
Chris@4 233 subl $local_var_size, %esp
Chris@4 234 cld
Chris@4 235
Chris@4 236 #define strm_r %esi
Chris@4 237 #define state_r %edi
Chris@4 238
Chris@4 239 movl strm_sp(%esp), strm_r
Chris@4 240 movl state_strm(strm_r), state_r
Chris@4 241
Chris@4 242 /* in = strm->next_in;
Chris@4 243 * out = strm->next_out;
Chris@4 244 * last = in + strm->avail_in - 11;
Chris@4 245 * beg = out - (start - strm->avail_out);
Chris@4 246 * end = out + (strm->avail_out - 257);
Chris@4 247 */
Chris@4 248 movl avail_in_strm(strm_r), %edx
Chris@4 249 movl next_in_strm(strm_r), %eax
Chris@4 250
Chris@4 251 addl %eax, %edx /* avail_in += next_in */
Chris@4 252 subl $11, %edx /* avail_in -= 11 */
Chris@4 253
Chris@4 254 movl %eax, in(%esp)
Chris@4 255 movl %edx, last(%esp)
Chris@4 256
Chris@4 257 movl start_sp(%esp), %ebp
Chris@4 258 movl avail_out_strm(strm_r), %ecx
Chris@4 259 movl next_out_strm(strm_r), %ebx
Chris@4 260
Chris@4 261 subl %ecx, %ebp /* start -= avail_out */
Chris@4 262 negl %ebp /* start = -start */
Chris@4 263 addl %ebx, %ebp /* start += next_out */
Chris@4 264
Chris@4 265 subl $257, %ecx /* avail_out -= 257 */
Chris@4 266 addl %ebx, %ecx /* avail_out += out */
Chris@4 267
Chris@4 268 movl %ebx, out(%esp)
Chris@4 269 movl %ebp, beg(%esp)
Chris@4 270 movl %ecx, end(%esp)
Chris@4 271
Chris@4 272 /* wsize = state->wsize;
Chris@4 273 * write = state->write;
Chris@4 274 * window = state->window;
Chris@4 275 * hold = state->hold;
Chris@4 276 * bits = state->bits;
Chris@4 277 * lcode = state->lencode;
Chris@4 278 * dcode = state->distcode;
Chris@4 279 * lmask = ( 1 << state->lenbits ) - 1;
Chris@4 280 * dmask = ( 1 << state->distbits ) - 1;
Chris@4 281 */
Chris@4 282
Chris@4 283 movl lencode_state(state_r), %eax
Chris@4 284 movl distcode_state(state_r), %ecx
Chris@4 285
Chris@4 286 movl %eax, lcode(%esp)
Chris@4 287 movl %ecx, dcode(%esp)
Chris@4 288
Chris@4 289 movl $1, %eax
Chris@4 290 movl lenbits_state(state_r), %ecx
Chris@4 291 shll %cl, %eax
Chris@4 292 decl %eax
Chris@4 293 movl %eax, lmask(%esp)
Chris@4 294
Chris@4 295 movl $1, %eax
Chris@4 296 movl distbits_state(state_r), %ecx
Chris@4 297 shll %cl, %eax
Chris@4 298 decl %eax
Chris@4 299 movl %eax, dmask(%esp)
Chris@4 300
Chris@4 301 movl wsize_state(state_r), %eax
Chris@4 302 movl write_state(state_r), %ecx
Chris@4 303 movl window_state(state_r), %edx
Chris@4 304
Chris@4 305 movl %eax, wsize(%esp)
Chris@4 306 movl %ecx, write(%esp)
Chris@4 307 movl %edx, window(%esp)
Chris@4 308
Chris@4 309 movl hold_state(state_r), %ebp
Chris@4 310 movl bits_state(state_r), %ebx
Chris@4 311
Chris@4 312 #undef strm_r
Chris@4 313 #undef state_r
Chris@4 314
Chris@4 315 #define in_r %esi
Chris@4 316 #define from_r %esi
Chris@4 317 #define out_r %edi
Chris@4 318
Chris@4 319 movl in(%esp), in_r
Chris@4 320 movl last(%esp), %ecx
Chris@4 321 cmpl in_r, %ecx
Chris@4 322 ja .L_align_long /* if in < last */
Chris@4 323
Chris@4 324 addl $11, %ecx /* ecx = &in[ avail_in ] */
Chris@4 325 subl in_r, %ecx /* ecx = avail_in */
Chris@4 326 movl $12, %eax
Chris@4 327 subl %ecx, %eax /* eax = 12 - avail_in */
Chris@4 328 leal buf(%esp), %edi
Chris@4 329 rep movsb /* memcpy( buf, in, avail_in ) */
Chris@4 330 movl %eax, %ecx
Chris@4 331 xorl %eax, %eax
Chris@4 332 rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
Chris@4 333 leal buf(%esp), in_r /* in = buf */
Chris@4 334 movl in_r, last(%esp) /* last = in, do just one iteration */
Chris@4 335 jmp .L_is_aligned
Chris@4 336
Chris@4 337 /* align in_r on long boundary */
Chris@4 338 .L_align_long:
Chris@4 339 testl $3, in_r
Chris@4 340 jz .L_is_aligned
Chris@4 341 xorl %eax, %eax
Chris@4 342 movb (in_r), %al
Chris@4 343 incl in_r
Chris@4 344 movl %ebx, %ecx
Chris@4 345 addl $8, %ebx
Chris@4 346 shll %cl, %eax
Chris@4 347 orl %eax, %ebp
Chris@4 348 jmp .L_align_long
Chris@4 349
Chris@4 350 .L_is_aligned:
Chris@4 351 movl out(%esp), out_r
Chris@4 352
Chris@4 353 #if defined( NO_MMX )
Chris@4 354 jmp .L_do_loop
Chris@4 355 #endif
Chris@4 356
Chris@4 357 #if defined( USE_MMX )
Chris@4 358 jmp .L_init_mmx
Chris@4 359 #endif
Chris@4 360
Chris@4 361 /*** Runtime MMX check ***/
Chris@4 362
Chris@4 363 #if defined( RUN_TIME_MMX )
Chris@4 364 .L_check_mmx:
Chris@4 365 cmpl $DO_USE_MMX, inflate_fast_use_mmx
Chris@4 366 je .L_init_mmx
Chris@4 367 ja .L_do_loop /* > 2 */
Chris@4 368
Chris@4 369 pushl %eax
Chris@4 370 pushl %ebx
Chris@4 371 pushl %ecx
Chris@4 372 pushl %edx
Chris@4 373 pushf
Chris@4 374 movl (%esp), %eax /* copy eflags to eax */
Chris@4 375 xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
Chris@4 376 * to see if cpu supports cpuid...
Chris@4 377 * ID bit method not supported by NexGen but
Chris@4 378 * bios may load a cpuid instruction and
Chris@4 379 * cpuid may be disabled on Cyrix 5-6x86 */
Chris@4 380 popf
Chris@4 381 pushf
Chris@4 382 popl %edx /* copy new eflags to edx */
Chris@4 383 xorl %eax, %edx /* test if ID bit is flipped */
Chris@4 384 jz .L_dont_use_mmx /* not flipped if zero */
Chris@4 385 xorl %eax, %eax
Chris@4 386 cpuid
Chris@4 387 cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
Chris@4 388 jne .L_dont_use_mmx
Chris@4 389 cmpl $0x6c65746e, %ecx
Chris@4 390 jne .L_dont_use_mmx
Chris@4 391 cmpl $0x49656e69, %edx
Chris@4 392 jne .L_dont_use_mmx
Chris@4 393 movl $1, %eax
Chris@4 394 cpuid /* get cpu features */
Chris@4 395 shrl $8, %eax
Chris@4 396 andl $15, %eax
Chris@4 397 cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */
Chris@4 398 jne .L_dont_use_mmx
Chris@4 399 testl $0x800000, %edx /* test if MMX feature is set (bit 23) */
Chris@4 400 jnz .L_use_mmx
Chris@4 401 jmp .L_dont_use_mmx
Chris@4 402 .L_use_mmx:
Chris@4 403 movl $DO_USE_MMX, inflate_fast_use_mmx
Chris@4 404 jmp .L_check_mmx_pop
Chris@4 405 .L_dont_use_mmx:
Chris@4 406 movl $DONT_USE_MMX, inflate_fast_use_mmx
Chris@4 407 .L_check_mmx_pop:
Chris@4 408 popl %edx
Chris@4 409 popl %ecx
Chris@4 410 popl %ebx
Chris@4 411 popl %eax
Chris@4 412 jmp .L_check_mmx
Chris@4 413 #endif
Chris@4 414
Chris@4 415
Chris@4 416 /*** Non-MMX code ***/
Chris@4 417
Chris@4 418 #if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
Chris@4 419
Chris@4 420 #define hold_r %ebp
Chris@4 421 #define bits_r %bl
Chris@4 422 #define bitslong_r %ebx
Chris@4 423
Chris@4 424 .align 32,0x90
Chris@4 425 .L_while_test:
Chris@4 426 /* while (in < last && out < end)
Chris@4 427 */
Chris@4 428 cmpl out_r, end(%esp)
Chris@4 429 jbe .L_break_loop /* if (out >= end) */
Chris@4 430
Chris@4 431 cmpl in_r, last(%esp)
Chris@4 432 jbe .L_break_loop
Chris@4 433
Chris@4 434 .L_do_loop:
Chris@4 435 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
Chris@4 436 *
Chris@4 437 * do {
Chris@4 438 * if (bits < 15) {
Chris@4 439 * hold |= *((unsigned short *)in)++ << bits;
Chris@4 440 * bits += 16
Chris@4 441 * }
Chris@4 442 * this = lcode[hold & lmask]
Chris@4 443 */
Chris@4 444 cmpb $15, bits_r
Chris@4 445 ja .L_get_length_code /* if (15 < bits) */
Chris@4 446
Chris@4 447 xorl %eax, %eax
Chris@4 448 lodsw /* al = *(ushort *)in++ */
Chris@4 449 movb bits_r, %cl /* cl = bits, needs it for shifting */
Chris@4 450 addb $16, bits_r /* bits += 16 */
Chris@4 451 shll %cl, %eax
Chris@4 452 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
Chris@4 453
Chris@4 454 .L_get_length_code:
Chris@4 455 movl lmask(%esp), %edx /* edx = lmask */
Chris@4 456 movl lcode(%esp), %ecx /* ecx = lcode */
Chris@4 457 andl hold_r, %edx /* edx &= hold */
Chris@4 458 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
Chris@4 459
Chris@4 460 .L_dolen:
Chris@4 461 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
Chris@4 462 *
Chris@4 463 * dolen:
Chris@4 464 * bits -= this.bits;
Chris@4 465 * hold >>= this.bits
Chris@4 466 */
Chris@4 467 movb %ah, %cl /* cl = this.bits */
Chris@4 468 subb %ah, bits_r /* bits -= this.bits */
Chris@4 469 shrl %cl, hold_r /* hold >>= this.bits */
Chris@4 470
Chris@4 471 /* check if op is a literal
Chris@4 472 * if (op == 0) {
Chris@4 473 * PUP(out) = this.val;
Chris@4 474 * }
Chris@4 475 */
Chris@4 476 testb %al, %al
Chris@4 477 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
Chris@4 478
Chris@4 479 shrl $16, %eax /* output this.val char */
Chris@4 480 stosb
Chris@4 481 jmp .L_while_test
Chris@4 482
Chris@4 483 .L_test_for_length_base:
Chris@4 484 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
Chris@4 485 *
Chris@4 486 * else if (op & 16) {
Chris@4 487 * len = this.val
Chris@4 488 * op &= 15
Chris@4 489 * if (op) {
Chris@4 490 * if (op > bits) {
Chris@4 491 * hold |= *((unsigned short *)in)++ << bits;
Chris@4 492 * bits += 16
Chris@4 493 * }
Chris@4 494 * len += hold & mask[op];
Chris@4 495 * bits -= op;
Chris@4 496 * hold >>= op;
Chris@4 497 * }
Chris@4 498 */
Chris@4 499 #define len_r %edx
Chris@4 500 movl %eax, len_r /* len = this */
Chris@4 501 shrl $16, len_r /* len = this.val */
Chris@4 502 movb %al, %cl
Chris@4 503
Chris@4 504 testb $16, %al
Chris@4 505 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
Chris@4 506 andb $15, %cl /* op &= 15 */
Chris@4 507 jz .L_save_len /* if (!op) */
Chris@4 508 cmpb %cl, bits_r
Chris@4 509 jae .L_add_bits_to_len /* if (op <= bits) */
Chris@4 510
Chris@4 511 movb %cl, %ch /* stash op in ch, freeing cl */
Chris@4 512 xorl %eax, %eax
Chris@4 513 lodsw /* al = *(ushort *)in++ */
Chris@4 514 movb bits_r, %cl /* cl = bits, needs it for shifting */
Chris@4 515 addb $16, bits_r /* bits += 16 */
Chris@4 516 shll %cl, %eax
Chris@4 517 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
Chris@4 518 movb %ch, %cl /* move op back to ecx */
Chris@4 519
Chris@4 520 .L_add_bits_to_len:
Chris@4 521 movl $1, %eax
Chris@4 522 shll %cl, %eax
Chris@4 523 decl %eax
Chris@4 524 subb %cl, bits_r
Chris@4 525 andl hold_r, %eax /* eax &= hold */
Chris@4 526 shrl %cl, hold_r
Chris@4 527 addl %eax, len_r /* len += hold & mask[op] */
Chris@4 528
Chris@4 529 .L_save_len:
Chris@4 530 movl len_r, len(%esp) /* save len */
Chris@4 531 #undef len_r
Chris@4 532
Chris@4 533 .L_decode_distance:
Chris@4 534 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 535 *
Chris@4 536 * if (bits < 15) {
Chris@4 537 * hold |= *((unsigned short *)in)++ << bits;
Chris@4 538 * bits += 16
Chris@4 539 * }
Chris@4 540 * this = dcode[hold & dmask];
Chris@4 541 * dodist:
Chris@4 542 * bits -= this.bits;
Chris@4 543 * hold >>= this.bits;
Chris@4 544 * op = this.op;
Chris@4 545 */
Chris@4 546
Chris@4 547 cmpb $15, bits_r
Chris@4 548 ja .L_get_distance_code /* if (15 < bits) */
Chris@4 549
Chris@4 550 xorl %eax, %eax
Chris@4 551 lodsw /* al = *(ushort *)in++ */
Chris@4 552 movb bits_r, %cl /* cl = bits, needs it for shifting */
Chris@4 553 addb $16, bits_r /* bits += 16 */
Chris@4 554 shll %cl, %eax
Chris@4 555 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
Chris@4 556
Chris@4 557 .L_get_distance_code:
Chris@4 558 movl dmask(%esp), %edx /* edx = dmask */
Chris@4 559 movl dcode(%esp), %ecx /* ecx = dcode */
Chris@4 560 andl hold_r, %edx /* edx &= hold */
Chris@4 561 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
Chris@4 562
Chris@4 563 #define dist_r %edx
Chris@4 564 .L_dodist:
Chris@4 565 movl %eax, dist_r /* dist = this */
Chris@4 566 shrl $16, dist_r /* dist = this.val */
Chris@4 567 movb %ah, %cl
Chris@4 568 subb %ah, bits_r /* bits -= this.bits */
Chris@4 569 shrl %cl, hold_r /* hold >>= this.bits */
Chris@4 570
Chris@4 571 /* if (op & 16) {
Chris@4 572 * dist = this.val
Chris@4 573 * op &= 15
Chris@4 574 * if (op > bits) {
Chris@4 575 * hold |= *((unsigned short *)in)++ << bits;
Chris@4 576 * bits += 16
Chris@4 577 * }
Chris@4 578 * dist += hold & mask[op];
Chris@4 579 * bits -= op;
Chris@4 580 * hold >>= op;
Chris@4 581 */
Chris@4 582 movb %al, %cl /* cl = this.op */
Chris@4 583
Chris@4 584 testb $16, %al /* if ((op & 16) == 0) */
Chris@4 585 jz .L_test_for_second_level_dist
Chris@4 586 andb $15, %cl /* op &= 15 */
Chris@4 587 jz .L_check_dist_one
Chris@4 588 cmpb %cl, bits_r
Chris@4 589 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
Chris@4 590
Chris@4 591 movb %cl, %ch /* stash op in ch, freeing cl */
Chris@4 592 xorl %eax, %eax
Chris@4 593 lodsw /* al = *(ushort *)in++ */
Chris@4 594 movb bits_r, %cl /* cl = bits, needs it for shifting */
Chris@4 595 addb $16, bits_r /* bits += 16 */
Chris@4 596 shll %cl, %eax
Chris@4 597 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
Chris@4 598 movb %ch, %cl /* move op back to ecx */
Chris@4 599
Chris@4 600 .L_add_bits_to_dist:
Chris@4 601 movl $1, %eax
Chris@4 602 shll %cl, %eax
Chris@4 603 decl %eax /* (1 << op) - 1 */
Chris@4 604 subb %cl, bits_r
Chris@4 605 andl hold_r, %eax /* eax &= hold */
Chris@4 606 shrl %cl, hold_r
Chris@4 607 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
Chris@4 608 jmp .L_check_window
Chris@4 609
Chris@4 610 .L_check_window:
Chris@4 611 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 612 * %ecx = nbytes
Chris@4 613 *
Chris@4 614 * nbytes = out - beg;
Chris@4 615 * if (dist <= nbytes) {
Chris@4 616 * from = out - dist;
Chris@4 617 * do {
Chris@4 618 * PUP(out) = PUP(from);
Chris@4 619 * } while (--len > 0) {
Chris@4 620 * }
Chris@4 621 */
Chris@4 622
Chris@4 623 movl in_r, in(%esp) /* save in so from can use it's reg */
Chris@4 624 movl out_r, %eax
Chris@4 625 subl beg(%esp), %eax /* nbytes = out - beg */
Chris@4 626
Chris@4 627 cmpl dist_r, %eax
Chris@4 628 jb .L_clip_window /* if (dist > nbytes) 4.2% */
Chris@4 629
Chris@4 630 movl len(%esp), %ecx
Chris@4 631 movl out_r, from_r
Chris@4 632 subl dist_r, from_r /* from = out - dist */
Chris@4 633
Chris@4 634 subl $3, %ecx
Chris@4 635 movb (from_r), %al
Chris@4 636 movb %al, (out_r)
Chris@4 637 movb 1(from_r), %al
Chris@4 638 movb 2(from_r), %dl
Chris@4 639 addl $3, from_r
Chris@4 640 movb %al, 1(out_r)
Chris@4 641 movb %dl, 2(out_r)
Chris@4 642 addl $3, out_r
Chris@4 643 rep movsb
Chris@4 644
Chris@4 645 movl in(%esp), in_r /* move in back to %esi, toss from */
Chris@4 646 jmp .L_while_test
Chris@4 647
Chris@4 648 .align 16,0x90
Chris@4 649 .L_check_dist_one:
Chris@4 650 cmpl $1, dist_r
Chris@4 651 jne .L_check_window
Chris@4 652 cmpl out_r, beg(%esp)
Chris@4 653 je .L_check_window
Chris@4 654
Chris@4 655 decl out_r
Chris@4 656 movl len(%esp), %ecx
Chris@4 657 movb (out_r), %al
Chris@4 658 subl $3, %ecx
Chris@4 659
Chris@4 660 movb %al, 1(out_r)
Chris@4 661 movb %al, 2(out_r)
Chris@4 662 movb %al, 3(out_r)
Chris@4 663 addl $4, out_r
Chris@4 664 rep stosb
Chris@4 665
Chris@4 666 jmp .L_while_test
Chris@4 667
Chris@4 668 .align 16,0x90
Chris@4 669 .L_test_for_second_level_length:
Chris@4 670 /* else if ((op & 64) == 0) {
Chris@4 671 * this = lcode[this.val + (hold & mask[op])];
Chris@4 672 * }
Chris@4 673 */
Chris@4 674 testb $64, %al
Chris@4 675 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
Chris@4 676
Chris@4 677 movl $1, %eax
Chris@4 678 shll %cl, %eax
Chris@4 679 decl %eax
Chris@4 680 andl hold_r, %eax /* eax &= hold */
Chris@4 681 addl %edx, %eax /* eax += this.val */
Chris@4 682 movl lcode(%esp), %edx /* edx = lcode */
Chris@4 683 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
Chris@4 684 jmp .L_dolen
Chris@4 685
Chris@4 686 .align 16,0x90
Chris@4 687 .L_test_for_second_level_dist:
Chris@4 688 /* else if ((op & 64) == 0) {
Chris@4 689 * this = dcode[this.val + (hold & mask[op])];
Chris@4 690 * }
Chris@4 691 */
Chris@4 692 testb $64, %al
Chris@4 693 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
Chris@4 694
Chris@4 695 movl $1, %eax
Chris@4 696 shll %cl, %eax
Chris@4 697 decl %eax
Chris@4 698 andl hold_r, %eax /* eax &= hold */
Chris@4 699 addl %edx, %eax /* eax += this.val */
Chris@4 700 movl dcode(%esp), %edx /* edx = dcode */
Chris@4 701 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
Chris@4 702 jmp .L_dodist
Chris@4 703
Chris@4 704 .align 16,0x90
Chris@4 705 .L_clip_window:
Chris@4 706 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 707 * %ecx = nbytes
Chris@4 708 *
Chris@4 709 * else {
Chris@4 710 * if (dist > wsize) {
Chris@4 711 * invalid distance
Chris@4 712 * }
Chris@4 713 * from = window;
Chris@4 714 * nbytes = dist - nbytes;
Chris@4 715 * if (write == 0) {
Chris@4 716 * from += wsize - nbytes;
Chris@4 717 */
Chris@4 718 #define nbytes_r %ecx
Chris@4 719 movl %eax, nbytes_r
Chris@4 720 movl wsize(%esp), %eax /* prepare for dist compare */
Chris@4 721 negl nbytes_r /* nbytes = -nbytes */
Chris@4 722 movl window(%esp), from_r /* from = window */
Chris@4 723
Chris@4 724 cmpl dist_r, %eax
Chris@4 725 jb .L_invalid_distance_too_far /* if (dist > wsize) */
Chris@4 726
Chris@4 727 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
Chris@4 728 cmpl $0, write(%esp)
Chris@4 729 jne .L_wrap_around_window /* if (write != 0) */
Chris@4 730
Chris@4 731 subl nbytes_r, %eax
Chris@4 732 addl %eax, from_r /* from += wsize - nbytes */
Chris@4 733
Chris@4 734 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 735 * %ecx = nbytes, %eax = len
Chris@4 736 *
Chris@4 737 * if (nbytes < len) {
Chris@4 738 * len -= nbytes;
Chris@4 739 * do {
Chris@4 740 * PUP(out) = PUP(from);
Chris@4 741 * } while (--nbytes);
Chris@4 742 * from = out - dist;
Chris@4 743 * }
Chris@4 744 * }
Chris@4 745 */
Chris@4 746 #define len_r %eax
Chris@4 747 movl len(%esp), len_r
Chris@4 748 cmpl nbytes_r, len_r
Chris@4 749 jbe .L_do_copy1 /* if (nbytes >= len) */
Chris@4 750
Chris@4 751 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 752 rep movsb
Chris@4 753 movl out_r, from_r
Chris@4 754 subl dist_r, from_r /* from = out - dist */
Chris@4 755 jmp .L_do_copy1
Chris@4 756
Chris@4 757 cmpl nbytes_r, len_r
Chris@4 758 jbe .L_do_copy1 /* if (nbytes >= len) */
Chris@4 759
Chris@4 760 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 761 rep movsb
Chris@4 762 movl out_r, from_r
Chris@4 763 subl dist_r, from_r /* from = out - dist */
Chris@4 764 jmp .L_do_copy1
Chris@4 765
Chris@4 766 .L_wrap_around_window:
Chris@4 767 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 768 * %ecx = nbytes, %eax = write, %eax = len
Chris@4 769 *
Chris@4 770 * else if (write < nbytes) {
Chris@4 771 * from += wsize + write - nbytes;
Chris@4 772 * nbytes -= write;
Chris@4 773 * if (nbytes < len) {
Chris@4 774 * len -= nbytes;
Chris@4 775 * do {
Chris@4 776 * PUP(out) = PUP(from);
Chris@4 777 * } while (--nbytes);
Chris@4 778 * from = window;
Chris@4 779 * nbytes = write;
Chris@4 780 * if (nbytes < len) {
Chris@4 781 * len -= nbytes;
Chris@4 782 * do {
Chris@4 783 * PUP(out) = PUP(from);
Chris@4 784 * } while(--nbytes);
Chris@4 785 * from = out - dist;
Chris@4 786 * }
Chris@4 787 * }
Chris@4 788 * }
Chris@4 789 */
Chris@4 790 #define write_r %eax
Chris@4 791 movl write(%esp), write_r
Chris@4 792 cmpl write_r, nbytes_r
Chris@4 793 jbe .L_contiguous_in_window /* if (write >= nbytes) */
Chris@4 794
Chris@4 795 addl wsize(%esp), from_r
Chris@4 796 addl write_r, from_r
Chris@4 797 subl nbytes_r, from_r /* from += wsize + write - nbytes */
Chris@4 798 subl write_r, nbytes_r /* nbytes -= write */
Chris@4 799 #undef write_r
Chris@4 800
Chris@4 801 movl len(%esp), len_r
Chris@4 802 cmpl nbytes_r, len_r
Chris@4 803 jbe .L_do_copy1 /* if (nbytes >= len) */
Chris@4 804
Chris@4 805 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 806 rep movsb
Chris@4 807 movl window(%esp), from_r /* from = window */
Chris@4 808 movl write(%esp), nbytes_r /* nbytes = write */
Chris@4 809 cmpl nbytes_r, len_r
Chris@4 810 jbe .L_do_copy1 /* if (nbytes >= len) */
Chris@4 811
Chris@4 812 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 813 rep movsb
Chris@4 814 movl out_r, from_r
Chris@4 815 subl dist_r, from_r /* from = out - dist */
Chris@4 816 jmp .L_do_copy1
Chris@4 817
Chris@4 818 .L_contiguous_in_window:
Chris@4 819 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
Chris@4 820 * %ecx = nbytes, %eax = write, %eax = len
Chris@4 821 *
Chris@4 822 * else {
Chris@4 823 * from += write - nbytes;
Chris@4 824 * if (nbytes < len) {
Chris@4 825 * len -= nbytes;
Chris@4 826 * do {
Chris@4 827 * PUP(out) = PUP(from);
Chris@4 828 * } while (--nbytes);
Chris@4 829 * from = out - dist;
Chris@4 830 * }
Chris@4 831 * }
Chris@4 832 */
Chris@4 833 #define write_r %eax
Chris@4 834 addl write_r, from_r
Chris@4 835 subl nbytes_r, from_r /* from += write - nbytes */
Chris@4 836 #undef write_r
Chris@4 837
Chris@4 838 movl len(%esp), len_r
Chris@4 839 cmpl nbytes_r, len_r
Chris@4 840 jbe .L_do_copy1 /* if (nbytes >= len) */
Chris@4 841
Chris@4 842 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 843 rep movsb
Chris@4 844 movl out_r, from_r
Chris@4 845 subl dist_r, from_r /* from = out - dist */
Chris@4 846
Chris@4 847 .L_do_copy1:
Chris@4 848 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
Chris@4 849 * %eax = len
Chris@4 850 *
Chris@4 851 * while (len > 0) {
Chris@4 852 * PUP(out) = PUP(from);
Chris@4 853 * len--;
Chris@4 854 * }
Chris@4 855 * }
Chris@4 856 * } while (in < last && out < end);
Chris@4 857 */
Chris@4 858 #undef nbytes_r
Chris@4 859 #define in_r %esi
Chris@4 860 movl len_r, %ecx
Chris@4 861 rep movsb
Chris@4 862
Chris@4 863 movl in(%esp), in_r /* move in back to %esi, toss from */
Chris@4 864 jmp .L_while_test
Chris@4 865
Chris@4 866 #undef len_r
Chris@4 867 #undef dist_r
Chris@4 868
Chris@4 869 #endif /* NO_MMX || RUN_TIME_MMX */
Chris@4 870
Chris@4 871
Chris@4 872 /*** MMX code ***/
Chris@4 873
Chris@4 874 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
Chris@4 875
Chris@4 876 .align 32,0x90
Chris@4 877 .L_init_mmx:
Chris@4 878 emms
Chris@4 879
Chris@4 880 #undef bits_r
Chris@4 881 #undef bitslong_r
Chris@4 882 #define bitslong_r %ebp
Chris@4 883 #define hold_mm %mm0
Chris@4 884 movd %ebp, hold_mm
Chris@4 885 movl %ebx, bitslong_r
Chris@4 886
Chris@4 887 #define used_mm %mm1
Chris@4 888 #define dmask2_mm %mm2
Chris@4 889 #define lmask2_mm %mm3
Chris@4 890 #define lmask_mm %mm4
Chris@4 891 #define dmask_mm %mm5
Chris@4 892 #define tmp_mm %mm6
Chris@4 893
Chris@4 894 movd lmask(%esp), lmask_mm
Chris@4 895 movq lmask_mm, lmask2_mm
Chris@4 896 movd dmask(%esp), dmask_mm
Chris@4 897 movq dmask_mm, dmask2_mm
Chris@4 898 pxor used_mm, used_mm
Chris@4 899 movl lcode(%esp), %ebx /* ebx = lcode */
Chris@4 900 jmp .L_do_loop_mmx
Chris@4 901
Chris@4 902 .align 32,0x90
Chris@4 903 .L_while_test_mmx:
Chris@4 904 /* while (in < last && out < end)
Chris@4 905 */
Chris@4 906 cmpl out_r, end(%esp)
Chris@4 907 jbe .L_break_loop /* if (out >= end) */
Chris@4 908
Chris@4 909 cmpl in_r, last(%esp)
Chris@4 910 jbe .L_break_loop
Chris@4 911
Chris@4 912 .L_do_loop_mmx:
Chris@4 913 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 914
Chris@4 915 cmpl $32, bitslong_r
Chris@4 916 ja .L_get_length_code_mmx /* if (32 < bits) */
Chris@4 917
Chris@4 918 movd bitslong_r, tmp_mm
Chris@4 919 movd (in_r), %mm7
Chris@4 920 addl $4, in_r
Chris@4 921 psllq tmp_mm, %mm7
Chris@4 922 addl $32, bitslong_r
Chris@4 923 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
Chris@4 924
Chris@4 925 .L_get_length_code_mmx:
Chris@4 926 pand hold_mm, lmask_mm
Chris@4 927 movd lmask_mm, %eax
Chris@4 928 movq lmask2_mm, lmask_mm
Chris@4 929 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
Chris@4 930
Chris@4 931 .L_dolen_mmx:
Chris@4 932 movzbl %ah, %ecx /* ecx = this.bits */
Chris@4 933 movd %ecx, used_mm
Chris@4 934 subl %ecx, bitslong_r /* bits -= this.bits */
Chris@4 935
Chris@4 936 testb %al, %al
Chris@4 937 jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
Chris@4 938
Chris@4 939 shrl $16, %eax /* output this.val char */
Chris@4 940 stosb
Chris@4 941 jmp .L_while_test_mmx
Chris@4 942
Chris@4 943 .L_test_for_length_base_mmx:
Chris@4 944 #define len_r %edx
Chris@4 945 movl %eax, len_r /* len = this */
Chris@4 946 shrl $16, len_r /* len = this.val */
Chris@4 947
Chris@4 948 testb $16, %al
Chris@4 949 jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
Chris@4 950 andl $15, %eax /* op &= 15 */
Chris@4 951 jz .L_decode_distance_mmx /* if (!op) */
Chris@4 952
Chris@4 953 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 954 movd %eax, used_mm
Chris@4 955 movd hold_mm, %ecx
Chris@4 956 subl %eax, bitslong_r
Chris@4 957 andl .L_mask(,%eax,4), %ecx
Chris@4 958 addl %ecx, len_r /* len += hold & mask[op] */
Chris@4 959
Chris@4 960 .L_decode_distance_mmx:
Chris@4 961 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 962
Chris@4 963 cmpl $32, bitslong_r
Chris@4 964 ja .L_get_dist_code_mmx /* if (32 < bits) */
Chris@4 965
Chris@4 966 movd bitslong_r, tmp_mm
Chris@4 967 movd (in_r), %mm7
Chris@4 968 addl $4, in_r
Chris@4 969 psllq tmp_mm, %mm7
Chris@4 970 addl $32, bitslong_r
Chris@4 971 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
Chris@4 972
Chris@4 973 .L_get_dist_code_mmx:
Chris@4 974 movl dcode(%esp), %ebx /* ebx = dcode */
Chris@4 975 pand hold_mm, dmask_mm
Chris@4 976 movd dmask_mm, %eax
Chris@4 977 movq dmask2_mm, dmask_mm
Chris@4 978 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
Chris@4 979
Chris@4 980 .L_dodist_mmx:
Chris@4 981 #define dist_r %ebx
Chris@4 982 movzbl %ah, %ecx /* ecx = this.bits */
Chris@4 983 movl %eax, dist_r
Chris@4 984 shrl $16, dist_r /* dist = this.val */
Chris@4 985 subl %ecx, bitslong_r /* bits -= this.bits */
Chris@4 986 movd %ecx, used_mm
Chris@4 987
Chris@4 988 testb $16, %al /* if ((op & 16) == 0) */
Chris@4 989 jz .L_test_for_second_level_dist_mmx
Chris@4 990 andl $15, %eax /* op &= 15 */
Chris@4 991 jz .L_check_dist_one_mmx
Chris@4 992
Chris@4 993 .L_add_bits_to_dist_mmx:
Chris@4 994 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 995 movd %eax, used_mm /* save bit length of current op */
Chris@4 996 movd hold_mm, %ecx /* get the next bits on input stream */
Chris@4 997 subl %eax, bitslong_r /* bits -= op bits */
Chris@4 998 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
Chris@4 999 addl %ecx, dist_r /* dist += hold & mask[op] */
Chris@4 1000
Chris@4 1001 .L_check_window_mmx:
Chris@4 1002 movl in_r, in(%esp) /* save in so from can use it's reg */
Chris@4 1003 movl out_r, %eax
Chris@4 1004 subl beg(%esp), %eax /* nbytes = out - beg */
Chris@4 1005
Chris@4 1006 cmpl dist_r, %eax
Chris@4 1007 jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */
Chris@4 1008
Chris@4 1009 movl len_r, %ecx
Chris@4 1010 movl out_r, from_r
Chris@4 1011 subl dist_r, from_r /* from = out - dist */
Chris@4 1012
Chris@4 1013 subl $3, %ecx
Chris@4 1014 movb (from_r), %al
Chris@4 1015 movb %al, (out_r)
Chris@4 1016 movb 1(from_r), %al
Chris@4 1017 movb 2(from_r), %dl
Chris@4 1018 addl $3, from_r
Chris@4 1019 movb %al, 1(out_r)
Chris@4 1020 movb %dl, 2(out_r)
Chris@4 1021 addl $3, out_r
Chris@4 1022 rep movsb
Chris@4 1023
Chris@4 1024 movl in(%esp), in_r /* move in back to %esi, toss from */
Chris@4 1025 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
Chris@4 1026 jmp .L_while_test_mmx
Chris@4 1027
Chris@4 1028 .align 16,0x90
Chris@4 1029 .L_check_dist_one_mmx:
Chris@4 1030 cmpl $1, dist_r
Chris@4 1031 jne .L_check_window_mmx
Chris@4 1032 cmpl out_r, beg(%esp)
Chris@4 1033 je .L_check_window_mmx
Chris@4 1034
Chris@4 1035 decl out_r
Chris@4 1036 movl len_r, %ecx
Chris@4 1037 movb (out_r), %al
Chris@4 1038 subl $3, %ecx
Chris@4 1039
Chris@4 1040 movb %al, 1(out_r)
Chris@4 1041 movb %al, 2(out_r)
Chris@4 1042 movb %al, 3(out_r)
Chris@4 1043 addl $4, out_r
Chris@4 1044 rep stosb
Chris@4 1045
Chris@4 1046 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
Chris@4 1047 jmp .L_while_test_mmx
Chris@4 1048
Chris@4 1049 .align 16,0x90
Chris@4 1050 .L_test_for_second_level_length_mmx:
Chris@4 1051 testb $64, %al
Chris@4 1052 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
Chris@4 1053
Chris@4 1054 andl $15, %eax
Chris@4 1055 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 1056 movd hold_mm, %ecx
Chris@4 1057 andl .L_mask(,%eax,4), %ecx
Chris@4 1058 addl len_r, %ecx
Chris@4 1059 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
Chris@4 1060 jmp .L_dolen_mmx
Chris@4 1061
Chris@4 1062 .align 16,0x90
Chris@4 1063 .L_test_for_second_level_dist_mmx:
Chris@4 1064 testb $64, %al
Chris@4 1065 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
Chris@4 1066
Chris@4 1067 andl $15, %eax
Chris@4 1068 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 1069 movd hold_mm, %ecx
Chris@4 1070 andl .L_mask(,%eax,4), %ecx
Chris@4 1071 movl dcode(%esp), %eax /* ecx = dcode */
Chris@4 1072 addl dist_r, %ecx
Chris@4 1073 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
Chris@4 1074 jmp .L_dodist_mmx
Chris@4 1075
Chris@4 1076 .align 16,0x90
Chris@4 1077 .L_clip_window_mmx:
Chris@4 1078 #define nbytes_r %ecx
Chris@4 1079 movl %eax, nbytes_r
Chris@4 1080 movl wsize(%esp), %eax /* prepare for dist compare */
Chris@4 1081 negl nbytes_r /* nbytes = -nbytes */
Chris@4 1082 movl window(%esp), from_r /* from = window */
Chris@4 1083
Chris@4 1084 cmpl dist_r, %eax
Chris@4 1085 jb .L_invalid_distance_too_far /* if (dist > wsize) */
Chris@4 1086
Chris@4 1087 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
Chris@4 1088 cmpl $0, write(%esp)
Chris@4 1089 jne .L_wrap_around_window_mmx /* if (write != 0) */
Chris@4 1090
Chris@4 1091 subl nbytes_r, %eax
Chris@4 1092 addl %eax, from_r /* from += wsize - nbytes */
Chris@4 1093
Chris@4 1094 cmpl nbytes_r, len_r
Chris@4 1095 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
Chris@4 1096
Chris@4 1097 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 1098 rep movsb
Chris@4 1099 movl out_r, from_r
Chris@4 1100 subl dist_r, from_r /* from = out - dist */
Chris@4 1101 jmp .L_do_copy1_mmx
Chris@4 1102
Chris@4 1103 cmpl nbytes_r, len_r
Chris@4 1104 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
Chris@4 1105
Chris@4 1106 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 1107 rep movsb
Chris@4 1108 movl out_r, from_r
Chris@4 1109 subl dist_r, from_r /* from = out - dist */
Chris@4 1110 jmp .L_do_copy1_mmx
Chris@4 1111
Chris@4 1112 .L_wrap_around_window_mmx:
Chris@4 1113 #define write_r %eax
Chris@4 1114 movl write(%esp), write_r
Chris@4 1115 cmpl write_r, nbytes_r
Chris@4 1116 jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */
Chris@4 1117
Chris@4 1118 addl wsize(%esp), from_r
Chris@4 1119 addl write_r, from_r
Chris@4 1120 subl nbytes_r, from_r /* from += wsize + write - nbytes */
Chris@4 1121 subl write_r, nbytes_r /* nbytes -= write */
Chris@4 1122 #undef write_r
Chris@4 1123
Chris@4 1124 cmpl nbytes_r, len_r
Chris@4 1125 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
Chris@4 1126
Chris@4 1127 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 1128 rep movsb
Chris@4 1129 movl window(%esp), from_r /* from = window */
Chris@4 1130 movl write(%esp), nbytes_r /* nbytes = write */
Chris@4 1131 cmpl nbytes_r, len_r
Chris@4 1132 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
Chris@4 1133
Chris@4 1134 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 1135 rep movsb
Chris@4 1136 movl out_r, from_r
Chris@4 1137 subl dist_r, from_r /* from = out - dist */
Chris@4 1138 jmp .L_do_copy1_mmx
Chris@4 1139
Chris@4 1140 .L_contiguous_in_window_mmx:
Chris@4 1141 #define write_r %eax
Chris@4 1142 addl write_r, from_r
Chris@4 1143 subl nbytes_r, from_r /* from += write - nbytes */
Chris@4 1144 #undef write_r
Chris@4 1145
Chris@4 1146 cmpl nbytes_r, len_r
Chris@4 1147 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
Chris@4 1148
Chris@4 1149 subl nbytes_r, len_r /* len -= nbytes */
Chris@4 1150 rep movsb
Chris@4 1151 movl out_r, from_r
Chris@4 1152 subl dist_r, from_r /* from = out - dist */
Chris@4 1153
Chris@4 1154 .L_do_copy1_mmx:
Chris@4 1155 #undef nbytes_r
Chris@4 1156 #define in_r %esi
Chris@4 1157 movl len_r, %ecx
Chris@4 1158 rep movsb
Chris@4 1159
Chris@4 1160 movl in(%esp), in_r /* move in back to %esi, toss from */
Chris@4 1161 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
Chris@4 1162 jmp .L_while_test_mmx
Chris@4 1163
Chris@4 1164 #undef hold_r
Chris@4 1165 #undef bitslong_r
Chris@4 1166
Chris@4 1167 #endif /* USE_MMX || RUN_TIME_MMX */
Chris@4 1168
Chris@4 1169
Chris@4 1170 /*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
Chris@4 1171
Chris@4 1172 .L_invalid_distance_code:
Chris@4 1173 /* else {
Chris@4 1174 * strm->msg = "invalid distance code";
Chris@4 1175 * state->mode = BAD;
Chris@4 1176 * }
Chris@4 1177 */
Chris@4 1178 movl $.L_invalid_distance_code_msg, %ecx
Chris@4 1179 movl $INFLATE_MODE_BAD, %edx
Chris@4 1180 jmp .L_update_stream_state
Chris@4 1181
Chris@4 1182 .L_test_for_end_of_block:
Chris@4 1183 /* else if (op & 32) {
Chris@4 1184 * state->mode = TYPE;
Chris@4 1185 * break;
Chris@4 1186 * }
Chris@4 1187 */
Chris@4 1188 testb $32, %al
Chris@4 1189 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
Chris@4 1190
Chris@4 1191 movl $0, %ecx
Chris@4 1192 movl $INFLATE_MODE_TYPE, %edx
Chris@4 1193 jmp .L_update_stream_state
Chris@4 1194
Chris@4 1195 .L_invalid_literal_length_code:
Chris@4 1196 /* else {
Chris@4 1197 * strm->msg = "invalid literal/length code";
Chris@4 1198 * state->mode = BAD;
Chris@4 1199 * }
Chris@4 1200 */
Chris@4 1201 movl $.L_invalid_literal_length_code_msg, %ecx
Chris@4 1202 movl $INFLATE_MODE_BAD, %edx
Chris@4 1203 jmp .L_update_stream_state
Chris@4 1204
Chris@4 1205 .L_invalid_distance_too_far:
Chris@4 1206 /* strm->msg = "invalid distance too far back";
Chris@4 1207 * state->mode = BAD;
Chris@4 1208 */
Chris@4 1209 movl in(%esp), in_r /* from_r has in's reg, put in back */
Chris@4 1210 movl $.L_invalid_distance_too_far_msg, %ecx
Chris@4 1211 movl $INFLATE_MODE_BAD, %edx
Chris@4 1212 jmp .L_update_stream_state
Chris@4 1213
Chris@4 1214 .L_update_stream_state:
Chris@4 1215 /* set strm->msg = %ecx, strm->state->mode = %edx */
Chris@4 1216 movl strm_sp(%esp), %eax
Chris@4 1217 testl %ecx, %ecx /* if (msg != NULL) */
Chris@4 1218 jz .L_skip_msg
Chris@4 1219 movl %ecx, msg_strm(%eax) /* strm->msg = msg */
Chris@4 1220 .L_skip_msg:
Chris@4 1221 movl state_strm(%eax), %eax /* state = strm->state */
Chris@4 1222 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
Chris@4 1223 jmp .L_break_loop
Chris@4 1224
Chris@4 1225 .align 32,0x90
Chris@4 1226 .L_break_loop:
Chris@4 1227
Chris@4 1228 /*
Chris@4 1229 * Regs:
Chris@4 1230 *
Chris@4 1231 * bits = %ebp when mmx, and in %ebx when non-mmx
Chris@4 1232 * hold = %hold_mm when mmx, and in %ebp when non-mmx
Chris@4 1233 * in = %esi
Chris@4 1234 * out = %edi
Chris@4 1235 */
Chris@4 1236
Chris@4 1237 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
Chris@4 1238
Chris@4 1239 #if defined( RUN_TIME_MMX )
Chris@4 1240
Chris@4 1241 cmpl $DO_USE_MMX, inflate_fast_use_mmx
Chris@4 1242 jne .L_update_next_in
Chris@4 1243
Chris@4 1244 #endif /* RUN_TIME_MMX */
Chris@4 1245
Chris@4 1246 movl %ebp, %ebx
Chris@4 1247
Chris@4 1248 .L_update_next_in:
Chris@4 1249
Chris@4 1250 #endif
Chris@4 1251
Chris@4 1252 #define strm_r %eax
Chris@4 1253 #define state_r %edx
Chris@4 1254
Chris@4 1255 /* len = bits >> 3;
Chris@4 1256 * in -= len;
Chris@4 1257 * bits -= len << 3;
Chris@4 1258 * hold &= (1U << bits) - 1;
Chris@4 1259 * state->hold = hold;
Chris@4 1260 * state->bits = bits;
Chris@4 1261 * strm->next_in = in;
Chris@4 1262 * strm->next_out = out;
Chris@4 1263 */
Chris@4 1264 movl strm_sp(%esp), strm_r
Chris@4 1265 movl %ebx, %ecx
Chris@4 1266 movl state_strm(strm_r), state_r
Chris@4 1267 shrl $3, %ecx
Chris@4 1268 subl %ecx, in_r
Chris@4 1269 shll $3, %ecx
Chris@4 1270 subl %ecx, %ebx
Chris@4 1271 movl out_r, next_out_strm(strm_r)
Chris@4 1272 movl %ebx, bits_state(state_r)
Chris@4 1273 movl %ebx, %ecx
Chris@4 1274
Chris@4 1275 leal buf(%esp), %ebx
Chris@4 1276 cmpl %ebx, last(%esp)
Chris@4 1277 jne .L_buf_not_used /* if buf != last */
Chris@4 1278
Chris@4 1279 subl %ebx, in_r /* in -= buf */
Chris@4 1280 movl next_in_strm(strm_r), %ebx
Chris@4 1281 movl %ebx, last(%esp) /* last = strm->next_in */
Chris@4 1282 addl %ebx, in_r /* in += strm->next_in */
Chris@4 1283 movl avail_in_strm(strm_r), %ebx
Chris@4 1284 subl $11, %ebx
Chris@4 1285 addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */
Chris@4 1286
Chris@4 1287 .L_buf_not_used:
Chris@4 1288 movl in_r, next_in_strm(strm_r)
Chris@4 1289
Chris@4 1290 movl $1, %ebx
Chris@4 1291 shll %cl, %ebx
Chris@4 1292 decl %ebx
Chris@4 1293
Chris@4 1294 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
Chris@4 1295
Chris@4 1296 #if defined( RUN_TIME_MMX )
Chris@4 1297
Chris@4 1298 cmpl $DO_USE_MMX, inflate_fast_use_mmx
Chris@4 1299 jne .L_update_hold
Chris@4 1300
Chris@4 1301 #endif /* RUN_TIME_MMX */
Chris@4 1302
Chris@4 1303 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
Chris@4 1304 movd hold_mm, %ebp
Chris@4 1305
Chris@4 1306 emms
Chris@4 1307
Chris@4 1308 .L_update_hold:
Chris@4 1309
Chris@4 1310 #endif /* USE_MMX || RUN_TIME_MMX */
Chris@4 1311
Chris@4 1312 andl %ebx, %ebp
Chris@4 1313 movl %ebp, hold_state(state_r)
Chris@4 1314
Chris@4 1315 #define last_r %ebx
Chris@4 1316
Chris@4 1317 /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
Chris@4 1318 movl last(%esp), last_r
Chris@4 1319 cmpl in_r, last_r
Chris@4 1320 jbe .L_last_is_smaller /* if (in >= last) */
Chris@4 1321
Chris@4 1322 subl in_r, last_r /* last -= in */
Chris@4 1323 addl $11, last_r /* last += 11 */
Chris@4 1324 movl last_r, avail_in_strm(strm_r)
Chris@4 1325 jmp .L_fixup_out
Chris@4 1326 .L_last_is_smaller:
Chris@4 1327 subl last_r, in_r /* in -= last */
Chris@4 1328 negl in_r /* in = -in */
Chris@4 1329 addl $11, in_r /* in += 11 */
Chris@4 1330 movl in_r, avail_in_strm(strm_r)
Chris@4 1331
Chris@4 1332 #undef last_r
Chris@4 1333 #define end_r %ebx
Chris@4 1334
Chris@4 1335 .L_fixup_out:
Chris@4 1336 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
Chris@4 1337 movl end(%esp), end_r
Chris@4 1338 cmpl out_r, end_r
Chris@4 1339 jbe .L_end_is_smaller /* if (out >= end) */
Chris@4 1340
Chris@4 1341 subl out_r, end_r /* end -= out */
Chris@4 1342 addl $257, end_r /* end += 257 */
Chris@4 1343 movl end_r, avail_out_strm(strm_r)
Chris@4 1344 jmp .L_done
Chris@4 1345 .L_end_is_smaller:
Chris@4 1346 subl end_r, out_r /* out -= end */
Chris@4 1347 negl out_r /* out = -out */
Chris@4 1348 addl $257, out_r /* out += 257 */
Chris@4 1349 movl out_r, avail_out_strm(strm_r)
Chris@4 1350
Chris@4 1351 #undef end_r
Chris@4 1352 #undef strm_r
Chris@4 1353 #undef state_r
Chris@4 1354
Chris@4 1355 .L_done:
Chris@4 1356 addl $local_var_size, %esp
Chris@4 1357 popf
Chris@4 1358 popl %ebx
Chris@4 1359 popl %ebp
Chris@4 1360 popl %esi
Chris@4 1361 popl %edi
Chris@4 1362 ret
Chris@4 1363
Chris@4 1364 #if defined( GAS_ELF )
Chris@4 1365 /* elf info */
Chris@4 1366 .type inflate_fast,@function
Chris@4 1367 .size inflate_fast,.-inflate_fast
Chris@4 1368 #endif