annotate src/zlib-1.2.7/contrib/masmx64/gvmat64.asm @ 148:b4bfdf10c4b3

Update Win64 capnp builds to v0.6
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 22 May 2017 18:56:49 +0100
parents 8a15ff55d9af
children
rev   line source
cannam@89 1 ;uInt longest_match_x64(
cannam@89 2 ; deflate_state *s,
cannam@89 3 ; IPos cur_match); /* current match */
cannam@89 4
cannam@89 5 ; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86_64
cannam@89 6 ; (AMD64 on Athlon 64, Opteron, Phenom
cannam@89 7 ; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7)
cannam@89 8 ; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
cannam@89 9 ;
cannam@89 10 ; File written by Gilles Vollant, by converting to assembly the longest_match
cannam@89 11 ; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.
cannam@89 12 ;
cannam@89 13 ; and by taking inspiration on asm686 with masm, optimised assembly code
cannam@89 14 ; from Brian Raiter, written 1998
cannam@89 15 ;
cannam@89 16 ; This software is provided 'as-is', without any express or implied
cannam@89 17 ; warranty. In no event will the authors be held liable for any damages
cannam@89 18 ; arising from the use of this software.
cannam@89 19 ;
cannam@89 20 ; Permission is granted to anyone to use this software for any purpose,
cannam@89 21 ; including commercial applications, and to alter it and redistribute it
cannam@89 22 ; freely, subject to the following restrictions:
cannam@89 23 ;
cannam@89 24 ; 1. The origin of this software must not be misrepresented; you must not
cannam@89 25 ; claim that you wrote the original software. If you use this software
cannam@89 26 ; in a product, an acknowledgment in the product documentation would be
cannam@89 27 ; appreciated but is not required.
cannam@89 28 ; 2. Altered source versions must be plainly marked as such, and must not be
cannam@89 29 ; misrepresented as being the original software
cannam@89 30 ; 3. This notice may not be removed or altered from any source distribution.
cannam@89 31 ;
cannam@89 32 ;
cannam@89 33 ;
cannam@89 34 ; http://www.zlib.net
cannam@89 35 ; http://www.winimage.com/zLibDll
cannam@89 36 ; http://www.muppetlabs.com/~breadbox/software/assembly.html
cannam@89 37 ;
cannam@89 38 ; to compile this file for infozip Zip, I use option:
cannam@89 39 ; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm
cannam@89 40 ;
cannam@89 41 ; to compile this file for zLib, I use option:
cannam@89 42 ; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm
cannam@89 43 ; Be carrefull to adapt zlib1222add below to your version of zLib
cannam@89 44 ; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change
cannam@89 45 ; value of zlib1222add later)
cannam@89 46 ;
cannam@89 47 ; This file compile with Microsoft Macro Assembler (x64) for AMD64
cannam@89 48 ;
cannam@89 49 ; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK
cannam@89 50 ;
cannam@89 51 ; (you can get Windows WDK with ml64 for AMD64 from
cannam@89 52 ; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price)
cannam@89 53 ;
cannam@89 54
cannam@89 55
cannam@89 56 ;uInt longest_match(s, cur_match)
cannam@89 57 ; deflate_state *s;
cannam@89 58 ; IPos cur_match; /* current match */
cannam@89 59 .code
cannam@89 60 longest_match PROC
cannam@89 61
cannam@89 62
cannam@89 63 ;LocalVarsSize equ 88
cannam@89 64 LocalVarsSize equ 72
cannam@89 65
cannam@89 66 ; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12
cannam@89 67 ; free register : r14,r15
cannam@89 68 ; register can be saved : rsp
cannam@89 69
cannam@89 70 chainlenwmask equ rsp + 8 - LocalVarsSize ; high word: current chain len
cannam@89 71 ; low word: s->wmask
cannam@89 72 ;window equ rsp + xx - LocalVarsSize ; local copy of s->window ; stored in r10
cannam@89 73 ;windowbestlen equ rsp + xx - LocalVarsSize ; s->window + bestlen , use r10+r11
cannam@89 74 ;scanstart equ rsp + xx - LocalVarsSize ; first two bytes of string ; stored in r12w
cannam@89 75 ;scanend equ rsp + xx - LocalVarsSize ; last two bytes of string use ebx
cannam@89 76 ;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13
cannam@89 77 ;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d
cannam@89 78 ;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9
cannam@89 79 IFDEF INFOZIP
cannam@89 80 ELSE
cannam@89 81 nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size
cannam@89 82 ENDIF
cannam@89 83
cannam@89 84 save_rdi equ rsp + 24 - LocalVarsSize
cannam@89 85 save_rsi equ rsp + 32 - LocalVarsSize
cannam@89 86 save_rbx equ rsp + 40 - LocalVarsSize
cannam@89 87 save_rbp equ rsp + 48 - LocalVarsSize
cannam@89 88 save_r12 equ rsp + 56 - LocalVarsSize
cannam@89 89 save_r13 equ rsp + 64 - LocalVarsSize
cannam@89 90 ;save_r14 equ rsp + 72 - LocalVarsSize
cannam@89 91 ;save_r15 equ rsp + 80 - LocalVarsSize
cannam@89 92
cannam@89 93
cannam@89 94 ; summary of register usage
cannam@89 95 ; scanend ebx
cannam@89 96 ; scanendw bx
cannam@89 97 ; chainlenwmask edx
cannam@89 98 ; curmatch rsi
cannam@89 99 ; curmatchd esi
cannam@89 100 ; windowbestlen r8
cannam@89 101 ; scanalign r9
cannam@89 102 ; scanalignd r9d
cannam@89 103 ; window r10
cannam@89 104 ; bestlen r11
cannam@89 105 ; bestlend r11d
cannam@89 106 ; scanstart r12d
cannam@89 107 ; scanstartw r12w
cannam@89 108 ; scan r13
cannam@89 109 ; nicematch r14d
cannam@89 110 ; limit r15
cannam@89 111 ; limitd r15d
cannam@89 112 ; prev rcx
cannam@89 113
cannam@89 114 ; all the +4 offsets are due to the addition of pending_buf_size (in zlib
cannam@89 115 ; in the deflate_state structure since the asm code was first written
cannam@89 116 ; (if you compile with zlib 1.0.4 or older, remove the +4).
cannam@89 117 ; Note : these value are good with a 8 bytes boundary pack structure
cannam@89 118
cannam@89 119
cannam@89 120 MAX_MATCH equ 258
cannam@89 121 MIN_MATCH equ 3
cannam@89 122 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
cannam@89 123
cannam@89 124
cannam@89 125 ;;; Offsets for fields in the deflate_state structure. These numbers
cannam@89 126 ;;; are calculated from the definition of deflate_state, with the
cannam@89 127 ;;; assumption that the compiler will dword-align the fields. (Thus,
cannam@89 128 ;;; changing the definition of deflate_state could easily cause this
cannam@89 129 ;;; program to crash horribly, without so much as a warning at
cannam@89 130 ;;; compile time. Sigh.)
cannam@89 131
cannam@89 132 ; all the +zlib1222add offsets are due to the addition of fields
cannam@89 133 ; in zlib in the deflate_state structure since the asm code was first written
cannam@89 134 ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
cannam@89 135 ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
cannam@89 136 ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
cannam@89 137
cannam@89 138
cannam@89 139 IFDEF INFOZIP
cannam@89 140
cannam@89 141 _DATA SEGMENT
cannam@89 142 COMM window_size:DWORD
cannam@89 143 ; WMask ; 7fff
cannam@89 144 COMM window:BYTE:010040H
cannam@89 145 COMM prev:WORD:08000H
cannam@89 146 ; MatchLen : unused
cannam@89 147 ; PrevMatch : unused
cannam@89 148 COMM strstart:DWORD
cannam@89 149 COMM match_start:DWORD
cannam@89 150 ; Lookahead : ignore
cannam@89 151 COMM prev_length:DWORD ; PrevLen
cannam@89 152 COMM max_chain_length:DWORD
cannam@89 153 COMM good_match:DWORD
cannam@89 154 COMM nice_match:DWORD
cannam@89 155 prev_ad equ OFFSET prev
cannam@89 156 window_ad equ OFFSET window
cannam@89 157 nicematch equ nice_match
cannam@89 158 _DATA ENDS
cannam@89 159 WMask equ 07fffh
cannam@89 160
cannam@89 161 ELSE
cannam@89 162
cannam@89 163 IFNDEF zlib1222add
cannam@89 164 zlib1222add equ 8
cannam@89 165 ENDIF
cannam@89 166 dsWSize equ 56+zlib1222add+(zlib1222add/2)
cannam@89 167 dsWMask equ 64+zlib1222add+(zlib1222add/2)
cannam@89 168 dsWindow equ 72+zlib1222add
cannam@89 169 dsPrev equ 88+zlib1222add
cannam@89 170 dsMatchLen equ 128+zlib1222add
cannam@89 171 dsPrevMatch equ 132+zlib1222add
cannam@89 172 dsStrStart equ 140+zlib1222add
cannam@89 173 dsMatchStart equ 144+zlib1222add
cannam@89 174 dsLookahead equ 148+zlib1222add
cannam@89 175 dsPrevLen equ 152+zlib1222add
cannam@89 176 dsMaxChainLen equ 156+zlib1222add
cannam@89 177 dsGoodMatch equ 172+zlib1222add
cannam@89 178 dsNiceMatch equ 176+zlib1222add
cannam@89 179
cannam@89 180 window_size equ [ rcx + dsWSize]
cannam@89 181 WMask equ [ rcx + dsWMask]
cannam@89 182 window_ad equ [ rcx + dsWindow]
cannam@89 183 prev_ad equ [ rcx + dsPrev]
cannam@89 184 strstart equ [ rcx + dsStrStart]
cannam@89 185 match_start equ [ rcx + dsMatchStart]
cannam@89 186 Lookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip
cannam@89 187 prev_length equ [ rcx + dsPrevLen]
cannam@89 188 max_chain_length equ [ rcx + dsMaxChainLen]
cannam@89 189 good_match equ [ rcx + dsGoodMatch]
cannam@89 190 nice_match equ [ rcx + dsNiceMatch]
cannam@89 191 ENDIF
cannam@89 192
cannam@89 193 ; parameter 1 in r8(deflate state s), param 2 in rdx (cur match)
cannam@89 194
cannam@89 195 ; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and
cannam@89 196 ; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp
cannam@89 197 ;
cannam@89 198 ; All registers must be preserved across the call, except for
cannam@89 199 ; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.
cannam@89 200
cannam@89 201
cannam@89 202
cannam@89 203 ;;; Save registers that the compiler may be using, and adjust esp to
cannam@89 204 ;;; make room for our stack frame.
cannam@89 205
cannam@89 206
cannam@89 207 ;;; Retrieve the function arguments. r8d will hold cur_match
cannam@89 208 ;;; throughout the entire function. edx will hold the pointer to the
cannam@89 209 ;;; deflate_state structure during the function's setup (before
cannam@89 210 ;;; entering the main loop.
cannam@89 211
cannam@89 212 ; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)
cannam@89 213
cannam@89 214 ; this clear high 32 bits of r8, which can be garbage in both r8 and rdx
cannam@89 215
cannam@89 216 mov [save_rdi],rdi
cannam@89 217 mov [save_rsi],rsi
cannam@89 218 mov [save_rbx],rbx
cannam@89 219 mov [save_rbp],rbp
cannam@89 220 IFDEF INFOZIP
cannam@89 221 mov r8d,ecx
cannam@89 222 ELSE
cannam@89 223 mov r8d,edx
cannam@89 224 ENDIF
cannam@89 225 mov [save_r12],r12
cannam@89 226 mov [save_r13],r13
cannam@89 227 ; mov [save_r14],r14
cannam@89 228 ; mov [save_r15],r15
cannam@89 229
cannam@89 230
cannam@89 231 ;;; uInt wmask = s->w_mask;
cannam@89 232 ;;; unsigned chain_length = s->max_chain_length;
cannam@89 233 ;;; if (s->prev_length >= s->good_match) {
cannam@89 234 ;;; chain_length >>= 2;
cannam@89 235 ;;; }
cannam@89 236
cannam@89 237 mov edi, prev_length
cannam@89 238 mov esi, good_match
cannam@89 239 mov eax, WMask
cannam@89 240 mov ebx, max_chain_length
cannam@89 241 cmp edi, esi
cannam@89 242 jl LastMatchGood
cannam@89 243 shr ebx, 2
cannam@89 244 LastMatchGood:
cannam@89 245
cannam@89 246 ;;; chainlen is decremented once beforehand so that the function can
cannam@89 247 ;;; use the sign flag instead of the zero flag for the exit test.
cannam@89 248 ;;; It is then shifted into the high word, to make room for the wmask
cannam@89 249 ;;; value, which it will always accompany.
cannam@89 250
cannam@89 251 dec ebx
cannam@89 252 shl ebx, 16
cannam@89 253 or ebx, eax
cannam@89 254
cannam@89 255 ;;; on zlib only
cannam@89 256 ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
cannam@89 257
cannam@89 258 IFDEF INFOZIP
cannam@89 259 mov [chainlenwmask], ebx
cannam@89 260 ; on infozip nice_match = [nice_match]
cannam@89 261 ELSE
cannam@89 262 mov eax, nice_match
cannam@89 263 mov [chainlenwmask], ebx
cannam@89 264 mov r10d, Lookahead
cannam@89 265 cmp r10d, eax
cannam@89 266 cmovnl r10d, eax
cannam@89 267 mov [nicematch],r10d
cannam@89 268 ENDIF
cannam@89 269
cannam@89 270 ;;; register Bytef *scan = s->window + s->strstart;
cannam@89 271 mov r10, window_ad
cannam@89 272 mov ebp, strstart
cannam@89 273 lea r13, [r10 + rbp]
cannam@89 274
cannam@89 275 ;;; Determine how many bytes the scan ptr is off from being
cannam@89 276 ;;; dword-aligned.
cannam@89 277
cannam@89 278 mov r9,r13
cannam@89 279 neg r13
cannam@89 280 and r13,3
cannam@89 281
cannam@89 282 ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
cannam@89 283 ;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
cannam@89 284 IFDEF INFOZIP
cannam@89 285 mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1))
cannam@89 286 ELSE
cannam@89 287 mov eax, window_size
cannam@89 288 sub eax, MIN_LOOKAHEAD
cannam@89 289 ENDIF
cannam@89 290 xor edi,edi
cannam@89 291 sub ebp, eax
cannam@89 292
cannam@89 293 mov r11d, prev_length
cannam@89 294
cannam@89 295 cmovng ebp,edi
cannam@89 296
cannam@89 297 ;;; int best_len = s->prev_length;
cannam@89 298
cannam@89 299
cannam@89 300 ;;; Store the sum of s->window + best_len in esi locally, and in esi.
cannam@89 301
cannam@89 302 lea rsi,[r10+r11]
cannam@89 303
cannam@89 304 ;;; register ush scan_start = *(ushf*)scan;
cannam@89 305 ;;; register ush scan_end = *(ushf*)(scan+best_len-1);
cannam@89 306 ;;; Posf *prev = s->prev;
cannam@89 307
cannam@89 308 movzx r12d,word ptr [r9]
cannam@89 309 movzx ebx, word ptr [r9 + r11 - 1]
cannam@89 310
cannam@89 311 mov rdi, prev_ad
cannam@89 312
cannam@89 313 ;;; Jump into the main loop.
cannam@89 314
cannam@89 315 mov edx, [chainlenwmask]
cannam@89 316
cannam@89 317 cmp bx,word ptr [rsi + r8 - 1]
cannam@89 318 jz LookupLoopIsZero
cannam@89 319
cannam@89 320 LookupLoop1:
cannam@89 321 and r8d, edx
cannam@89 322
cannam@89 323 movzx r8d, word ptr [rdi + r8*2]
cannam@89 324 cmp r8d, ebp
cannam@89 325 jbe LeaveNow
cannam@89 326 sub edx, 00010000h
cannam@89 327 js LeaveNow
cannam@89 328
cannam@89 329 LoopEntry1:
cannam@89 330 cmp bx,word ptr [rsi + r8 - 1]
cannam@89 331 jz LookupLoopIsZero
cannam@89 332
cannam@89 333 LookupLoop2:
cannam@89 334 and r8d, edx
cannam@89 335
cannam@89 336 movzx r8d, word ptr [rdi + r8*2]
cannam@89 337 cmp r8d, ebp
cannam@89 338 jbe LeaveNow
cannam@89 339 sub edx, 00010000h
cannam@89 340 js LeaveNow
cannam@89 341
cannam@89 342 LoopEntry2:
cannam@89 343 cmp bx,word ptr [rsi + r8 - 1]
cannam@89 344 jz LookupLoopIsZero
cannam@89 345
cannam@89 346 LookupLoop4:
cannam@89 347 and r8d, edx
cannam@89 348
cannam@89 349 movzx r8d, word ptr [rdi + r8*2]
cannam@89 350 cmp r8d, ebp
cannam@89 351 jbe LeaveNow
cannam@89 352 sub edx, 00010000h
cannam@89 353 js LeaveNow
cannam@89 354
cannam@89 355 LoopEntry4:
cannam@89 356
cannam@89 357 cmp bx,word ptr [rsi + r8 - 1]
cannam@89 358 jnz LookupLoop1
cannam@89 359 jmp LookupLoopIsZero
cannam@89 360
cannam@89 361
cannam@89 362 ;;; do {
cannam@89 363 ;;; match = s->window + cur_match;
cannam@89 364 ;;; if (*(ushf*)(match+best_len-1) != scan_end ||
cannam@89 365 ;;; *(ushf*)match != scan_start) continue;
cannam@89 366 ;;; [...]
cannam@89 367 ;;; } while ((cur_match = prev[cur_match & wmask]) > limit
cannam@89 368 ;;; && --chain_length != 0);
cannam@89 369 ;;;
cannam@89 370 ;;; Here is the inner loop of the function. The function will spend the
cannam@89 371 ;;; majority of its time in this loop, and majority of that time will
cannam@89 372 ;;; be spent in the first ten instructions.
cannam@89 373 ;;;
cannam@89 374 ;;; Within this loop:
cannam@89 375 ;;; ebx = scanend
cannam@89 376 ;;; r8d = curmatch
cannam@89 377 ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
cannam@89 378 ;;; esi = windowbestlen - i.e., (window + bestlen)
cannam@89 379 ;;; edi = prev
cannam@89 380 ;;; ebp = limit
cannam@89 381
cannam@89 382 LookupLoop:
cannam@89 383 and r8d, edx
cannam@89 384
cannam@89 385 movzx r8d, word ptr [rdi + r8*2]
cannam@89 386 cmp r8d, ebp
cannam@89 387 jbe LeaveNow
cannam@89 388 sub edx, 00010000h
cannam@89 389 js LeaveNow
cannam@89 390
cannam@89 391 LoopEntry:
cannam@89 392
cannam@89 393 cmp bx,word ptr [rsi + r8 - 1]
cannam@89 394 jnz LookupLoop1
cannam@89 395 LookupLoopIsZero:
cannam@89 396 cmp r12w, word ptr [r10 + r8]
cannam@89 397 jnz LookupLoop1
cannam@89 398
cannam@89 399
cannam@89 400 ;;; Store the current value of chainlen.
cannam@89 401 mov [chainlenwmask], edx
cannam@89 402
cannam@89 403 ;;; Point edi to the string under scrutiny, and esi to the string we
cannam@89 404 ;;; are hoping to match it up with. In actuality, esi and edi are
cannam@89 405 ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
cannam@89 406 ;;; initialized to -(MAX_MATCH_8 - scanalign).
cannam@89 407
cannam@89 408 lea rsi,[r8+r10]
cannam@89 409 mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8)
cannam@89 410 lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8]
cannam@89 411 lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8]
cannam@89 412
cannam@89 413 prefetcht1 [rsi+rdx]
cannam@89 414 prefetcht1 [rdi+rdx]
cannam@89 415
cannam@89 416
cannam@89 417 ;;; Test the strings for equality, 8 bytes at a time. At the end,
cannam@89 418 ;;; adjust rdx so that it is offset to the exact byte that mismatched.
cannam@89 419 ;;;
cannam@89 420 ;;; We already know at this point that the first three bytes of the
cannam@89 421 ;;; strings match each other, and they can be safely passed over before
cannam@89 422 ;;; starting the compare loop. So what this code does is skip over 0-3
cannam@89 423 ;;; bytes, as much as necessary in order to dword-align the edi
cannam@89 424 ;;; pointer. (rsi will still be misaligned three times out of four.)
cannam@89 425 ;;;
cannam@89 426 ;;; It should be confessed that this loop usually does not represent
cannam@89 427 ;;; much of the total running time. Replacing it with a more
cannam@89 428 ;;; straightforward "rep cmpsb" would not drastically degrade
cannam@89 429 ;;; performance.
cannam@89 430
cannam@89 431
cannam@89 432 LoopCmps:
cannam@89 433 mov rax, [rsi + rdx]
cannam@89 434 xor rax, [rdi + rdx]
cannam@89 435 jnz LeaveLoopCmps
cannam@89 436
cannam@89 437 mov rax, [rsi + rdx + 8]
cannam@89 438 xor rax, [rdi + rdx + 8]
cannam@89 439 jnz LeaveLoopCmps8
cannam@89 440
cannam@89 441
cannam@89 442 mov rax, [rsi + rdx + 8+8]
cannam@89 443 xor rax, [rdi + rdx + 8+8]
cannam@89 444 jnz LeaveLoopCmps16
cannam@89 445
cannam@89 446 add rdx,8+8+8
cannam@89 447
cannam@89 448 jnz short LoopCmps
cannam@89 449 jmp short LenMaximum
cannam@89 450 LeaveLoopCmps16: add rdx,8
cannam@89 451 LeaveLoopCmps8: add rdx,8
cannam@89 452 LeaveLoopCmps:
cannam@89 453
cannam@89 454 test eax, 0000FFFFh
cannam@89 455 jnz LenLower
cannam@89 456
cannam@89 457 test eax,0ffffffffh
cannam@89 458
cannam@89 459 jnz LenLower32
cannam@89 460
cannam@89 461 add rdx,4
cannam@89 462 shr rax,32
cannam@89 463 or ax,ax
cannam@89 464 jnz LenLower
cannam@89 465
cannam@89 466 LenLower32:
cannam@89 467 shr eax,16
cannam@89 468 add rdx,2
cannam@89 469 LenLower: sub al, 1
cannam@89 470 adc rdx, 0
cannam@89 471 ;;; Calculate the length of the match. If it is longer than MAX_MATCH,
cannam@89 472 ;;; then automatically accept it as the best possible match and leave.
cannam@89 473
cannam@89 474 lea rax, [rdi + rdx]
cannam@89 475 sub rax, r9
cannam@89 476 cmp eax, MAX_MATCH
cannam@89 477 jge LenMaximum
cannam@89 478
cannam@89 479 ;;; If the length of the match is not longer than the best match we
cannam@89 480 ;;; have so far, then forget it and return to the lookup loop.
cannam@89 481 ;///////////////////////////////////
cannam@89 482
cannam@89 483 cmp eax, r11d
cannam@89 484 jg LongerMatch
cannam@89 485
cannam@89 486 lea rsi,[r10+r11]
cannam@89 487
cannam@89 488 mov rdi, prev_ad
cannam@89 489 mov edx, [chainlenwmask]
cannam@89 490 jmp LookupLoop
cannam@89 491
cannam@89 492 ;;; s->match_start = cur_match;
cannam@89 493 ;;; best_len = len;
cannam@89 494 ;;; if (len >= nice_match) break;
cannam@89 495 ;;; scan_end = *(ushf*)(scan+best_len-1);
cannam@89 496
cannam@89 497 LongerMatch:
cannam@89 498 mov r11d, eax
cannam@89 499 mov match_start, r8d
cannam@89 500 cmp eax, [nicematch]
cannam@89 501 jge LeaveNow
cannam@89 502
cannam@89 503 lea rsi,[r10+rax]
cannam@89 504
cannam@89 505 movzx ebx, word ptr [r9 + rax - 1]
cannam@89 506 mov rdi, prev_ad
cannam@89 507 mov edx, [chainlenwmask]
cannam@89 508 jmp LookupLoop
cannam@89 509
cannam@89 510 ;;; Accept the current string, with the maximum possible length.
cannam@89 511
cannam@89 512 LenMaximum:
cannam@89 513 mov r11d,MAX_MATCH
cannam@89 514 mov match_start, r8d
cannam@89 515
cannam@89 516 ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
cannam@89 517 ;;; return s->lookahead;
cannam@89 518
cannam@89 519 LeaveNow:
cannam@89 520 IFDEF INFOZIP
cannam@89 521 mov eax,r11d
cannam@89 522 ELSE
cannam@89 523 mov eax, Lookahead
cannam@89 524 cmp r11d, eax
cannam@89 525 cmovng eax, r11d
cannam@89 526 ENDIF
cannam@89 527
cannam@89 528 ;;; Restore the stack and return from whence we came.
cannam@89 529
cannam@89 530
cannam@89 531 mov rsi,[save_rsi]
cannam@89 532 mov rdi,[save_rdi]
cannam@89 533 mov rbx,[save_rbx]
cannam@89 534 mov rbp,[save_rbp]
cannam@89 535 mov r12,[save_r12]
cannam@89 536 mov r13,[save_r13]
cannam@89 537 ; mov r14,[save_r14]
cannam@89 538 ; mov r15,[save_r15]
cannam@89 539
cannam@89 540
cannam@89 541 ret 0
cannam@89 542 ; please don't remove this string !
cannam@89 543 ; Your can freely use gvmat64 in any free or commercial app
cannam@89 544 ; but it is far better don't remove the string in the binary!
cannam@89 545 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0
cannam@89 546 longest_match ENDP
cannam@89 547
cannam@89 548 match_init PROC
cannam@89 549 ret 0
cannam@89 550 match_init ENDP
cannam@89 551
cannam@89 552
cannam@89 553 END