cannam@128
|
1 ; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
|
cannam@128
|
2 ; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
|
cannam@128
|
3 ; File written by Gilles Vollant, by converting match686.S from Brian Raiter
|
cannam@128
|
4 ; for MASM. This is as assembly version of longest_match
|
cannam@128
|
5 ; from Jean-loup Gailly in deflate.c
|
cannam@128
|
6 ;
|
cannam@128
|
7 ; http://www.zlib.net
|
cannam@128
|
8 ; http://www.winimage.com/zLibDll
|
cannam@128
|
9 ; http://www.muppetlabs.com/~breadbox/software/assembly.html
|
cannam@128
|
10 ;
|
cannam@128
|
11 ; For Visual C++ 4.x and higher and ML 6.x and higher
|
cannam@128
|
12 ; ml.exe is distributed in
|
cannam@128
|
13 ; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
|
cannam@128
|
14 ;
|
cannam@128
|
15 ; this file contain two implementation of longest_match
|
cannam@128
|
16 ;
|
cannam@128
|
17 ; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
|
cannam@128
|
18 ; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
|
cannam@128
|
19 ;
|
cannam@128
|
20 ; for using an assembly version of longest_match, you need define ASMV in project
|
cannam@128
|
21 ;
|
cannam@128
|
22 ; compile the asm file running
|
cannam@128
|
23 ; ml /coff /Zi /c /Flmatch686.lst match686.asm
|
cannam@128
|
24 ; and do not include match686.obj in your project
|
cannam@128
|
25 ;
|
cannam@128
|
26 ; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
|
cannam@128
|
27 ; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
|
cannam@128
|
28 ; with autoselect (with cpu detection code)
|
cannam@128
|
29 ; if you want support the old pentium optimization, you can still use these version
|
cannam@128
|
30 ;
|
cannam@128
|
31 ; this file is not optimized for old pentium, but it compatible with all x86 32 bits
|
cannam@128
|
32 ; processor (starting 80386)
|
cannam@128
|
33 ;
|
cannam@128
|
34 ;
|
cannam@128
|
35 ; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
|
cannam@128
|
36
|
cannam@128
|
37 ;uInt longest_match(s, cur_match)
|
cannam@128
|
38 ; deflate_state *s;
|
cannam@128
|
39 ; IPos cur_match; /* current match */
|
cannam@128
|
40
|
cannam@128
|
41 NbStack equ 76
|
cannam@128
|
42 cur_match equ dword ptr[esp+NbStack-0]
|
cannam@128
|
43 str_s equ dword ptr[esp+NbStack-4]
|
cannam@128
|
44 ; 5 dword on top (ret,ebp,esi,edi,ebx)
|
cannam@128
|
45 adrret equ dword ptr[esp+NbStack-8]
|
cannam@128
|
46 pushebp equ dword ptr[esp+NbStack-12]
|
cannam@128
|
47 pushedi equ dword ptr[esp+NbStack-16]
|
cannam@128
|
48 pushesi equ dword ptr[esp+NbStack-20]
|
cannam@128
|
49 pushebx equ dword ptr[esp+NbStack-24]
|
cannam@128
|
50
|
cannam@128
|
51 chain_length equ dword ptr [esp+NbStack-28]
|
cannam@128
|
52 limit equ dword ptr [esp+NbStack-32]
|
cannam@128
|
53 best_len equ dword ptr [esp+NbStack-36]
|
cannam@128
|
54 window equ dword ptr [esp+NbStack-40]
|
cannam@128
|
55 prev equ dword ptr [esp+NbStack-44]
|
cannam@128
|
56 scan_start equ word ptr [esp+NbStack-48]
|
cannam@128
|
57 wmask equ dword ptr [esp+NbStack-52]
|
cannam@128
|
58 match_start_ptr equ dword ptr [esp+NbStack-56]
|
cannam@128
|
59 nice_match equ dword ptr [esp+NbStack-60]
|
cannam@128
|
60 scan equ dword ptr [esp+NbStack-64]
|
cannam@128
|
61
|
cannam@128
|
62 windowlen equ dword ptr [esp+NbStack-68]
|
cannam@128
|
63 match_start equ dword ptr [esp+NbStack-72]
|
cannam@128
|
64 strend equ dword ptr [esp+NbStack-76]
|
cannam@128
|
65 NbStackAdd equ (NbStack-24)
|
cannam@128
|
66
|
cannam@128
|
67 .386p
|
cannam@128
|
68
|
cannam@128
|
69 name gvmatch
|
cannam@128
|
70 .MODEL FLAT
|
cannam@128
|
71
|
cannam@128
|
72
|
cannam@128
|
73
|
cannam@128
|
74 ; all the +zlib1222add offsets are due to the addition of fields
|
cannam@128
|
75 ; in zlib in the deflate_state structure since the asm code was first written
|
cannam@128
|
76 ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
|
cannam@128
|
77 ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
|
cannam@128
|
78 ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
|
cannam@128
|
79
|
cannam@128
|
80 zlib1222add equ 8
|
cannam@128
|
81
|
cannam@128
|
82 ; Note : these value are good with a 8 bytes boundary pack structure
|
cannam@128
|
83 dep_chain_length equ 74h+zlib1222add
|
cannam@128
|
84 dep_window equ 30h+zlib1222add
|
cannam@128
|
85 dep_strstart equ 64h+zlib1222add
|
cannam@128
|
86 dep_prev_length equ 70h+zlib1222add
|
cannam@128
|
87 dep_nice_match equ 88h+zlib1222add
|
cannam@128
|
88 dep_w_size equ 24h+zlib1222add
|
cannam@128
|
89 dep_prev equ 38h+zlib1222add
|
cannam@128
|
90 dep_w_mask equ 2ch+zlib1222add
|
cannam@128
|
91 dep_good_match equ 84h+zlib1222add
|
cannam@128
|
92 dep_match_start equ 68h+zlib1222add
|
cannam@128
|
93 dep_lookahead equ 6ch+zlib1222add
|
cannam@128
|
94
|
cannam@128
|
95
|
cannam@128
|
96 _TEXT segment
|
cannam@128
|
97
|
cannam@128
|
98 IFDEF NOUNDERLINE
|
cannam@128
|
99 public longest_match
|
cannam@128
|
100 public match_init
|
cannam@128
|
101 ELSE
|
cannam@128
|
102 public _longest_match
|
cannam@128
|
103 public _match_init
|
cannam@128
|
104 ENDIF
|
cannam@128
|
105
|
cannam@128
|
106 MAX_MATCH equ 258
|
cannam@128
|
107 MIN_MATCH equ 3
|
cannam@128
|
108 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
|
cannam@128
|
109
|
cannam@128
|
110
|
cannam@128
|
111
|
cannam@128
|
112 MAX_MATCH equ 258
|
cannam@128
|
113 MIN_MATCH equ 3
|
cannam@128
|
114 MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
|
cannam@128
|
115 MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
|
cannam@128
|
116
|
cannam@128
|
117
|
cannam@128
|
118 ;;; stack frame offsets
|
cannam@128
|
119
|
cannam@128
|
120 chainlenwmask equ esp + 0 ; high word: current chain len
|
cannam@128
|
121 ; low word: s->wmask
|
cannam@128
|
122 window equ esp + 4 ; local copy of s->window
|
cannam@128
|
123 windowbestlen equ esp + 8 ; s->window + bestlen
|
cannam@128
|
124 scanstart equ esp + 16 ; first two bytes of string
|
cannam@128
|
125 scanend equ esp + 12 ; last two bytes of string
|
cannam@128
|
126 scanalign equ esp + 20 ; dword-misalignment of string
|
cannam@128
|
127 nicematch equ esp + 24 ; a good enough match size
|
cannam@128
|
128 bestlen equ esp + 28 ; size of best match so far
|
cannam@128
|
129 scan equ esp + 32 ; ptr to string wanting match
|
cannam@128
|
130
|
cannam@128
|
131 LocalVarsSize equ 36
|
cannam@128
|
132 ; saved ebx byte esp + 36
|
cannam@128
|
133 ; saved edi byte esp + 40
|
cannam@128
|
134 ; saved esi byte esp + 44
|
cannam@128
|
135 ; saved ebp byte esp + 48
|
cannam@128
|
136 ; return address byte esp + 52
|
cannam@128
|
137 deflatestate equ esp + 56 ; the function arguments
|
cannam@128
|
138 curmatch equ esp + 60
|
cannam@128
|
139
|
cannam@128
|
140 ;;; Offsets for fields in the deflate_state structure. These numbers
|
cannam@128
|
141 ;;; are calculated from the definition of deflate_state, with the
|
cannam@128
|
142 ;;; assumption that the compiler will dword-align the fields. (Thus,
|
cannam@128
|
143 ;;; changing the definition of deflate_state could easily cause this
|
cannam@128
|
144 ;;; program to crash horribly, without so much as a warning at
|
cannam@128
|
145 ;;; compile time. Sigh.)
|
cannam@128
|
146
|
cannam@128
|
147 dsWSize equ 36+zlib1222add
|
cannam@128
|
148 dsWMask equ 44+zlib1222add
|
cannam@128
|
149 dsWindow equ 48+zlib1222add
|
cannam@128
|
150 dsPrev equ 56+zlib1222add
|
cannam@128
|
151 dsMatchLen equ 88+zlib1222add
|
cannam@128
|
152 dsPrevMatch equ 92+zlib1222add
|
cannam@128
|
153 dsStrStart equ 100+zlib1222add
|
cannam@128
|
154 dsMatchStart equ 104+zlib1222add
|
cannam@128
|
155 dsLookahead equ 108+zlib1222add
|
cannam@128
|
156 dsPrevLen equ 112+zlib1222add
|
cannam@128
|
157 dsMaxChainLen equ 116+zlib1222add
|
cannam@128
|
158 dsGoodMatch equ 132+zlib1222add
|
cannam@128
|
159 dsNiceMatch equ 136+zlib1222add
|
cannam@128
|
160
|
cannam@128
|
161
|
cannam@128
|
162 ;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
|
cannam@128
|
163 ;;; Written for zlib 1.1.2
|
cannam@128
|
164 ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
|
cannam@128
|
165 ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
|
cannam@128
|
166 ;;;
|
cannam@128
|
167 ;;
|
cannam@128
|
168 ;; This software is provided 'as-is', without any express or implied
|
cannam@128
|
169 ;; warranty. In no event will the authors be held liable for any damages
|
cannam@128
|
170 ;; arising from the use of this software.
|
cannam@128
|
171 ;;
|
cannam@128
|
172 ;; Permission is granted to anyone to use this software for any purpose,
|
cannam@128
|
173 ;; including commercial applications, and to alter it and redistribute it
|
cannam@128
|
174 ;; freely, subject to the following restrictions:
|
cannam@128
|
175 ;;
|
cannam@128
|
176 ;; 1. The origin of this software must not be misrepresented; you must not
|
cannam@128
|
177 ;; claim that you wrote the original software. If you use this software
|
cannam@128
|
178 ;; in a product, an acknowledgment in the product documentation would be
|
cannam@128
|
179 ;; appreciated but is not required.
|
cannam@128
|
180 ;; 2. Altered source versions must be plainly marked as such, and must not be
|
cannam@128
|
181 ;; misrepresented as being the original software
|
cannam@128
|
182 ;; 3. This notice may not be removed or altered from any source distribution.
|
cannam@128
|
183 ;;
|
cannam@128
|
184
|
cannam@128
|
185 ;GLOBAL _longest_match, _match_init
|
cannam@128
|
186
|
cannam@128
|
187
|
cannam@128
|
188 ;SECTION .text
|
cannam@128
|
189
|
cannam@128
|
190 ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
|
cannam@128
|
191
|
cannam@128
|
192 ;_longest_match:
|
cannam@128
|
193 IFDEF NOUNDERLINE
|
cannam@128
|
194 longest_match proc near
|
cannam@128
|
195 ELSE
|
cannam@128
|
196 _longest_match proc near
|
cannam@128
|
197 ENDIF
|
cannam@128
|
198 .FPO (9, 4, 0, 0, 1, 0)
|
cannam@128
|
199
|
cannam@128
|
200 ;;; Save registers that the compiler may be using, and adjust esp to
|
cannam@128
|
201 ;;; make room for our stack frame.
|
cannam@128
|
202
|
cannam@128
|
203 push ebp
|
cannam@128
|
204 push edi
|
cannam@128
|
205 push esi
|
cannam@128
|
206 push ebx
|
cannam@128
|
207 sub esp, LocalVarsSize
|
cannam@128
|
208
|
cannam@128
|
209 ;;; Retrieve the function arguments. ecx will hold cur_match
|
cannam@128
|
210 ;;; throughout the entire function. edx will hold the pointer to the
|
cannam@128
|
211 ;;; deflate_state structure during the function's setup (before
|
cannam@128
|
212 ;;; entering the main loop.
|
cannam@128
|
213
|
cannam@128
|
214 mov edx, [deflatestate]
|
cannam@128
|
215 mov ecx, [curmatch]
|
cannam@128
|
216
|
cannam@128
|
217 ;;; uInt wmask = s->w_mask;
|
cannam@128
|
218 ;;; unsigned chain_length = s->max_chain_length;
|
cannam@128
|
219 ;;; if (s->prev_length >= s->good_match) {
|
cannam@128
|
220 ;;; chain_length >>= 2;
|
cannam@128
|
221 ;;; }
|
cannam@128
|
222
|
cannam@128
|
223 mov eax, [edx + dsPrevLen]
|
cannam@128
|
224 mov ebx, [edx + dsGoodMatch]
|
cannam@128
|
225 cmp eax, ebx
|
cannam@128
|
226 mov eax, [edx + dsWMask]
|
cannam@128
|
227 mov ebx, [edx + dsMaxChainLen]
|
cannam@128
|
228 jl LastMatchGood
|
cannam@128
|
229 shr ebx, 2
|
cannam@128
|
230 LastMatchGood:
|
cannam@128
|
231
|
cannam@128
|
232 ;;; chainlen is decremented once beforehand so that the function can
|
cannam@128
|
233 ;;; use the sign flag instead of the zero flag for the exit test.
|
cannam@128
|
234 ;;; It is then shifted into the high word, to make room for the wmask
|
cannam@128
|
235 ;;; value, which it will always accompany.
|
cannam@128
|
236
|
cannam@128
|
237 dec ebx
|
cannam@128
|
238 shl ebx, 16
|
cannam@128
|
239 or ebx, eax
|
cannam@128
|
240 mov [chainlenwmask], ebx
|
cannam@128
|
241
|
cannam@128
|
242 ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
|
cannam@128
|
243
|
cannam@128
|
244 mov eax, [edx + dsNiceMatch]
|
cannam@128
|
245 mov ebx, [edx + dsLookahead]
|
cannam@128
|
246 cmp ebx, eax
|
cannam@128
|
247 jl LookaheadLess
|
cannam@128
|
248 mov ebx, eax
|
cannam@128
|
249 LookaheadLess: mov [nicematch], ebx
|
cannam@128
|
250
|
cannam@128
|
251 ;;; register Bytef *scan = s->window + s->strstart;
|
cannam@128
|
252
|
cannam@128
|
253 mov esi, [edx + dsWindow]
|
cannam@128
|
254 mov [window], esi
|
cannam@128
|
255 mov ebp, [edx + dsStrStart]
|
cannam@128
|
256 lea edi, [esi + ebp]
|
cannam@128
|
257 mov [scan], edi
|
cannam@128
|
258
|
cannam@128
|
259 ;;; Determine how many bytes the scan ptr is off from being
|
cannam@128
|
260 ;;; dword-aligned.
|
cannam@128
|
261
|
cannam@128
|
262 mov eax, edi
|
cannam@128
|
263 neg eax
|
cannam@128
|
264 and eax, 3
|
cannam@128
|
265 mov [scanalign], eax
|
cannam@128
|
266
|
cannam@128
|
267 ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
|
cannam@128
|
268 ;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
|
cannam@128
|
269
|
cannam@128
|
270 mov eax, [edx + dsWSize]
|
cannam@128
|
271 sub eax, MIN_LOOKAHEAD
|
cannam@128
|
272 sub ebp, eax
|
cannam@128
|
273 jg LimitPositive
|
cannam@128
|
274 xor ebp, ebp
|
cannam@128
|
275 LimitPositive:
|
cannam@128
|
276
|
cannam@128
|
277 ;;; int best_len = s->prev_length;
|
cannam@128
|
278
|
cannam@128
|
279 mov eax, [edx + dsPrevLen]
|
cannam@128
|
280 mov [bestlen], eax
|
cannam@128
|
281
|
cannam@128
|
282 ;;; Store the sum of s->window + best_len in esi locally, and in esi.
|
cannam@128
|
283
|
cannam@128
|
284 add esi, eax
|
cannam@128
|
285 mov [windowbestlen], esi
|
cannam@128
|
286
|
cannam@128
|
287 ;;; register ush scan_start = *(ushf*)scan;
|
cannam@128
|
288 ;;; register ush scan_end = *(ushf*)(scan+best_len-1);
|
cannam@128
|
289 ;;; Posf *prev = s->prev;
|
cannam@128
|
290
|
cannam@128
|
291 movzx ebx, word ptr [edi]
|
cannam@128
|
292 mov [scanstart], ebx
|
cannam@128
|
293 movzx ebx, word ptr [edi + eax - 1]
|
cannam@128
|
294 mov [scanend], ebx
|
cannam@128
|
295 mov edi, [edx + dsPrev]
|
cannam@128
|
296
|
cannam@128
|
297 ;;; Jump into the main loop.
|
cannam@128
|
298
|
cannam@128
|
299 mov edx, [chainlenwmask]
|
cannam@128
|
300 jmp short LoopEntry
|
cannam@128
|
301
|
cannam@128
|
302 align 4
|
cannam@128
|
303
|
cannam@128
|
304 ;;; do {
|
cannam@128
|
305 ;;; match = s->window + cur_match;
|
cannam@128
|
306 ;;; if (*(ushf*)(match+best_len-1) != scan_end ||
|
cannam@128
|
307 ;;; *(ushf*)match != scan_start) continue;
|
cannam@128
|
308 ;;; [...]
|
cannam@128
|
309 ;;; } while ((cur_match = prev[cur_match & wmask]) > limit
|
cannam@128
|
310 ;;; && --chain_length != 0);
|
cannam@128
|
311 ;;;
|
cannam@128
|
312 ;;; Here is the inner loop of the function. The function will spend the
|
cannam@128
|
313 ;;; majority of its time in this loop, and majority of that time will
|
cannam@128
|
314 ;;; be spent in the first ten instructions.
|
cannam@128
|
315 ;;;
|
cannam@128
|
316 ;;; Within this loop:
|
cannam@128
|
317 ;;; ebx = scanend
|
cannam@128
|
318 ;;; ecx = curmatch
|
cannam@128
|
319 ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
|
cannam@128
|
320 ;;; esi = windowbestlen - i.e., (window + bestlen)
|
cannam@128
|
321 ;;; edi = prev
|
cannam@128
|
322 ;;; ebp = limit
|
cannam@128
|
323
|
cannam@128
|
324 LookupLoop:
|
cannam@128
|
325 and ecx, edx
|
cannam@128
|
326 movzx ecx, word ptr [edi + ecx*2]
|
cannam@128
|
327 cmp ecx, ebp
|
cannam@128
|
328 jbe LeaveNow
|
cannam@128
|
329 sub edx, 00010000h
|
cannam@128
|
330 js LeaveNow
|
cannam@128
|
331 LoopEntry: movzx eax, word ptr [esi + ecx - 1]
|
cannam@128
|
332 cmp eax, ebx
|
cannam@128
|
333 jnz LookupLoop
|
cannam@128
|
334 mov eax, [window]
|
cannam@128
|
335 movzx eax, word ptr [eax + ecx]
|
cannam@128
|
336 cmp eax, [scanstart]
|
cannam@128
|
337 jnz LookupLoop
|
cannam@128
|
338
|
cannam@128
|
339 ;;; Store the current value of chainlen.
|
cannam@128
|
340
|
cannam@128
|
341 mov [chainlenwmask], edx
|
cannam@128
|
342
|
cannam@128
|
343 ;;; Point edi to the string under scrutiny, and esi to the string we
|
cannam@128
|
344 ;;; are hoping to match it up with. In actuality, esi and edi are
|
cannam@128
|
345 ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
|
cannam@128
|
346 ;;; initialized to -(MAX_MATCH_8 - scanalign).
|
cannam@128
|
347
|
cannam@128
|
348 mov esi, [window]
|
cannam@128
|
349 mov edi, [scan]
|
cannam@128
|
350 add esi, ecx
|
cannam@128
|
351 mov eax, [scanalign]
|
cannam@128
|
352 mov edx, 0fffffef8h; -(MAX_MATCH_8)
|
cannam@128
|
353 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
|
cannam@128
|
354 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
|
cannam@128
|
355
|
cannam@128
|
356 ;;; Test the strings for equality, 8 bytes at a time. At the end,
|
cannam@128
|
357 ;;; adjust edx so that it is offset to the exact byte that mismatched.
|
cannam@128
|
358 ;;;
|
cannam@128
|
359 ;;; We already know at this point that the first three bytes of the
|
cannam@128
|
360 ;;; strings match each other, and they can be safely passed over before
|
cannam@128
|
361 ;;; starting the compare loop. So what this code does is skip over 0-3
|
cannam@128
|
362 ;;; bytes, as much as necessary in order to dword-align the edi
|
cannam@128
|
363 ;;; pointer. (esi will still be misaligned three times out of four.)
|
cannam@128
|
364 ;;;
|
cannam@128
|
365 ;;; It should be confessed that this loop usually does not represent
|
cannam@128
|
366 ;;; much of the total running time. Replacing it with a more
|
cannam@128
|
367 ;;; straightforward "rep cmpsb" would not drastically degrade
|
cannam@128
|
368 ;;; performance.
|
cannam@128
|
369
|
cannam@128
|
370 LoopCmps:
|
cannam@128
|
371 mov eax, [esi + edx]
|
cannam@128
|
372 xor eax, [edi + edx]
|
cannam@128
|
373 jnz LeaveLoopCmps
|
cannam@128
|
374 mov eax, [esi + edx + 4]
|
cannam@128
|
375 xor eax, [edi + edx + 4]
|
cannam@128
|
376 jnz LeaveLoopCmps4
|
cannam@128
|
377 add edx, 8
|
cannam@128
|
378 jnz LoopCmps
|
cannam@128
|
379 jmp short LenMaximum
|
cannam@128
|
380 LeaveLoopCmps4: add edx, 4
|
cannam@128
|
381 LeaveLoopCmps: test eax, 0000FFFFh
|
cannam@128
|
382 jnz LenLower
|
cannam@128
|
383 add edx, 2
|
cannam@128
|
384 shr eax, 16
|
cannam@128
|
385 LenLower: sub al, 1
|
cannam@128
|
386 adc edx, 0
|
cannam@128
|
387
|
cannam@128
|
388 ;;; Calculate the length of the match. If it is longer than MAX_MATCH,
|
cannam@128
|
389 ;;; then automatically accept it as the best possible match and leave.
|
cannam@128
|
390
|
cannam@128
|
391 lea eax, [edi + edx]
|
cannam@128
|
392 mov edi, [scan]
|
cannam@128
|
393 sub eax, edi
|
cannam@128
|
394 cmp eax, MAX_MATCH
|
cannam@128
|
395 jge LenMaximum
|
cannam@128
|
396
|
cannam@128
|
397 ;;; If the length of the match is not longer than the best match we
|
cannam@128
|
398 ;;; have so far, then forget it and return to the lookup loop.
|
cannam@128
|
399
|
cannam@128
|
400 mov edx, [deflatestate]
|
cannam@128
|
401 mov ebx, [bestlen]
|
cannam@128
|
402 cmp eax, ebx
|
cannam@128
|
403 jg LongerMatch
|
cannam@128
|
404 mov esi, [windowbestlen]
|
cannam@128
|
405 mov edi, [edx + dsPrev]
|
cannam@128
|
406 mov ebx, [scanend]
|
cannam@128
|
407 mov edx, [chainlenwmask]
|
cannam@128
|
408 jmp LookupLoop
|
cannam@128
|
409
|
cannam@128
|
410 ;;; s->match_start = cur_match;
|
cannam@128
|
411 ;;; best_len = len;
|
cannam@128
|
412 ;;; if (len >= nice_match) break;
|
cannam@128
|
413 ;;; scan_end = *(ushf*)(scan+best_len-1);
|
cannam@128
|
414
|
cannam@128
|
415 LongerMatch: mov ebx, [nicematch]
|
cannam@128
|
416 mov [bestlen], eax
|
cannam@128
|
417 mov [edx + dsMatchStart], ecx
|
cannam@128
|
418 cmp eax, ebx
|
cannam@128
|
419 jge LeaveNow
|
cannam@128
|
420 mov esi, [window]
|
cannam@128
|
421 add esi, eax
|
cannam@128
|
422 mov [windowbestlen], esi
|
cannam@128
|
423 movzx ebx, word ptr [edi + eax - 1]
|
cannam@128
|
424 mov edi, [edx + dsPrev]
|
cannam@128
|
425 mov [scanend], ebx
|
cannam@128
|
426 mov edx, [chainlenwmask]
|
cannam@128
|
427 jmp LookupLoop
|
cannam@128
|
428
|
cannam@128
|
429 ;;; Accept the current string, with the maximum possible length.
|
cannam@128
|
430
|
cannam@128
|
431 LenMaximum: mov edx, [deflatestate]
|
cannam@128
|
432 mov dword ptr [bestlen], MAX_MATCH
|
cannam@128
|
433 mov [edx + dsMatchStart], ecx
|
cannam@128
|
434
|
cannam@128
|
435 ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
|
cannam@128
|
436 ;;; return s->lookahead;
|
cannam@128
|
437
|
cannam@128
|
438 LeaveNow:
|
cannam@128
|
439 mov edx, [deflatestate]
|
cannam@128
|
440 mov ebx, [bestlen]
|
cannam@128
|
441 mov eax, [edx + dsLookahead]
|
cannam@128
|
442 cmp ebx, eax
|
cannam@128
|
443 jg LookaheadRet
|
cannam@128
|
444 mov eax, ebx
|
cannam@128
|
445 LookaheadRet:
|
cannam@128
|
446
|
cannam@128
|
447 ;;; Restore the stack and return from whence we came.
|
cannam@128
|
448
|
cannam@128
|
449 add esp, LocalVarsSize
|
cannam@128
|
450 pop ebx
|
cannam@128
|
451 pop esi
|
cannam@128
|
452 pop edi
|
cannam@128
|
453 pop ebp
|
cannam@128
|
454
|
cannam@128
|
455 ret
|
cannam@128
|
456 ; please don't remove this string !
|
cannam@128
|
457 ; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
|
cannam@128
|
458 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
|
cannam@128
|
459
|
cannam@128
|
460
|
cannam@128
|
461 IFDEF NOUNDERLINE
|
cannam@128
|
462 longest_match endp
|
cannam@128
|
463 ELSE
|
cannam@128
|
464 _longest_match endp
|
cannam@128
|
465 ENDIF
|
cannam@128
|
466
|
cannam@128
|
467 IFDEF NOUNDERLINE
|
cannam@128
|
468 match_init proc near
|
cannam@128
|
469 ret
|
cannam@128
|
470 match_init endp
|
cannam@128
|
471 ELSE
|
cannam@128
|
472 _match_init proc near
|
cannam@128
|
473 ret
|
cannam@128
|
474 _match_init endp
|
cannam@128
|
475 ENDIF
|
cannam@128
|
476
|
cannam@128
|
477
|
cannam@128
|
478 _TEXT ends
|
cannam@128
|
479 end
|