cannam@89
|
1
|
cannam@89
|
2 /*-------------------------------------------------------------*/
|
cannam@89
|
3 /*--- Block sorting machinery ---*/
|
cannam@89
|
4 /*--- blocksort.c ---*/
|
cannam@89
|
5 /*-------------------------------------------------------------*/
|
cannam@89
|
6
|
cannam@89
|
7 /* ------------------------------------------------------------------
|
cannam@89
|
8 This file is part of bzip2/libbzip2, a program and library for
|
cannam@89
|
9 lossless, block-sorting data compression.
|
cannam@89
|
10
|
cannam@89
|
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
|
cannam@89
|
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
|
cannam@89
|
13
|
cannam@89
|
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
cannam@89
|
15 README file.
|
cannam@89
|
16
|
cannam@89
|
17 This program is released under the terms of the license contained
|
cannam@89
|
18 in the file LICENSE.
|
cannam@89
|
19 ------------------------------------------------------------------ */
|
cannam@89
|
20
|
cannam@89
|
21
|
cannam@89
|
22 #include "bzlib_private.h"
|
cannam@89
|
23
|
cannam@89
|
24 /*---------------------------------------------*/
|
cannam@89
|
25 /*--- Fallback O(N log(N)^2) sorting ---*/
|
cannam@89
|
26 /*--- algorithm, for repetitive blocks ---*/
|
cannam@89
|
27 /*---------------------------------------------*/
|
cannam@89
|
28
|
cannam@89
|
29 /*---------------------------------------------*/
|
cannam@89
|
30 static
|
cannam@89
|
31 __inline__
|
cannam@89
|
32 void fallbackSimpleSort ( UInt32* fmap,
|
cannam@89
|
33 UInt32* eclass,
|
cannam@89
|
34 Int32 lo,
|
cannam@89
|
35 Int32 hi )
|
cannam@89
|
36 {
|
cannam@89
|
37 Int32 i, j, tmp;
|
cannam@89
|
38 UInt32 ec_tmp;
|
cannam@89
|
39
|
cannam@89
|
40 if (lo == hi) return;
|
cannam@89
|
41
|
cannam@89
|
42 if (hi - lo > 3) {
|
cannam@89
|
43 for ( i = hi-4; i >= lo; i-- ) {
|
cannam@89
|
44 tmp = fmap[i];
|
cannam@89
|
45 ec_tmp = eclass[tmp];
|
cannam@89
|
46 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
|
cannam@89
|
47 fmap[j-4] = fmap[j];
|
cannam@89
|
48 fmap[j-4] = tmp;
|
cannam@89
|
49 }
|
cannam@89
|
50 }
|
cannam@89
|
51
|
cannam@89
|
52 for ( i = hi-1; i >= lo; i-- ) {
|
cannam@89
|
53 tmp = fmap[i];
|
cannam@89
|
54 ec_tmp = eclass[tmp];
|
cannam@89
|
55 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
|
cannam@89
|
56 fmap[j-1] = fmap[j];
|
cannam@89
|
57 fmap[j-1] = tmp;
|
cannam@89
|
58 }
|
cannam@89
|
59 }
|
cannam@89
|
60
|
cannam@89
|
61
|
cannam@89
|
62 /*---------------------------------------------*/
|
cannam@89
|
63 #define fswap(zz1, zz2) \
|
cannam@89
|
64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
|
cannam@89
|
65
|
cannam@89
|
66 #define fvswap(zzp1, zzp2, zzn) \
|
cannam@89
|
67 { \
|
cannam@89
|
68 Int32 yyp1 = (zzp1); \
|
cannam@89
|
69 Int32 yyp2 = (zzp2); \
|
cannam@89
|
70 Int32 yyn = (zzn); \
|
cannam@89
|
71 while (yyn > 0) { \
|
cannam@89
|
72 fswap(fmap[yyp1], fmap[yyp2]); \
|
cannam@89
|
73 yyp1++; yyp2++; yyn--; \
|
cannam@89
|
74 } \
|
cannam@89
|
75 }
|
cannam@89
|
76
|
cannam@89
|
77
|
cannam@89
|
78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
|
cannam@89
|
79
|
cannam@89
|
80 #define fpush(lz,hz) { stackLo[sp] = lz; \
|
cannam@89
|
81 stackHi[sp] = hz; \
|
cannam@89
|
82 sp++; }
|
cannam@89
|
83
|
cannam@89
|
84 #define fpop(lz,hz) { sp--; \
|
cannam@89
|
85 lz = stackLo[sp]; \
|
cannam@89
|
86 hz = stackHi[sp]; }
|
cannam@89
|
87
|
cannam@89
|
88 #define FALLBACK_QSORT_SMALL_THRESH 10
|
cannam@89
|
89 #define FALLBACK_QSORT_STACK_SIZE 100
|
cannam@89
|
90
|
cannam@89
|
91
|
cannam@89
|
92 static
|
cannam@89
|
93 void fallbackQSort3 ( UInt32* fmap,
|
cannam@89
|
94 UInt32* eclass,
|
cannam@89
|
95 Int32 loSt,
|
cannam@89
|
96 Int32 hiSt )
|
cannam@89
|
97 {
|
cannam@89
|
98 Int32 unLo, unHi, ltLo, gtHi, n, m;
|
cannam@89
|
99 Int32 sp, lo, hi;
|
cannam@89
|
100 UInt32 med, r, r3;
|
cannam@89
|
101 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
|
cannam@89
|
102 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
|
cannam@89
|
103
|
cannam@89
|
104 r = 0;
|
cannam@89
|
105
|
cannam@89
|
106 sp = 0;
|
cannam@89
|
107 fpush ( loSt, hiSt );
|
cannam@89
|
108
|
cannam@89
|
109 while (sp > 0) {
|
cannam@89
|
110
|
cannam@89
|
111 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
|
cannam@89
|
112
|
cannam@89
|
113 fpop ( lo, hi );
|
cannam@89
|
114 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
|
cannam@89
|
115 fallbackSimpleSort ( fmap, eclass, lo, hi );
|
cannam@89
|
116 continue;
|
cannam@89
|
117 }
|
cannam@89
|
118
|
cannam@89
|
119 /* Random partitioning. Median of 3 sometimes fails to
|
cannam@89
|
120 avoid bad cases. Median of 9 seems to help but
|
cannam@89
|
121 looks rather expensive. This too seems to work but
|
cannam@89
|
122 is cheaper. Guidance for the magic constants
|
cannam@89
|
123 7621 and 32768 is taken from Sedgewick's algorithms
|
cannam@89
|
124 book, chapter 35.
|
cannam@89
|
125 */
|
cannam@89
|
126 r = ((r * 7621) + 1) % 32768;
|
cannam@89
|
127 r3 = r % 3;
|
cannam@89
|
128 if (r3 == 0) med = eclass[fmap[lo]]; else
|
cannam@89
|
129 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
|
cannam@89
|
130 med = eclass[fmap[hi]];
|
cannam@89
|
131
|
cannam@89
|
132 unLo = ltLo = lo;
|
cannam@89
|
133 unHi = gtHi = hi;
|
cannam@89
|
134
|
cannam@89
|
135 while (1) {
|
cannam@89
|
136 while (1) {
|
cannam@89
|
137 if (unLo > unHi) break;
|
cannam@89
|
138 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
|
cannam@89
|
139 if (n == 0) {
|
cannam@89
|
140 fswap(fmap[unLo], fmap[ltLo]);
|
cannam@89
|
141 ltLo++; unLo++;
|
cannam@89
|
142 continue;
|
cannam@89
|
143 };
|
cannam@89
|
144 if (n > 0) break;
|
cannam@89
|
145 unLo++;
|
cannam@89
|
146 }
|
cannam@89
|
147 while (1) {
|
cannam@89
|
148 if (unLo > unHi) break;
|
cannam@89
|
149 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
|
cannam@89
|
150 if (n == 0) {
|
cannam@89
|
151 fswap(fmap[unHi], fmap[gtHi]);
|
cannam@89
|
152 gtHi--; unHi--;
|
cannam@89
|
153 continue;
|
cannam@89
|
154 };
|
cannam@89
|
155 if (n < 0) break;
|
cannam@89
|
156 unHi--;
|
cannam@89
|
157 }
|
cannam@89
|
158 if (unLo > unHi) break;
|
cannam@89
|
159 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
|
cannam@89
|
160 }
|
cannam@89
|
161
|
cannam@89
|
162 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
|
cannam@89
|
163
|
cannam@89
|
164 if (gtHi < ltLo) continue;
|
cannam@89
|
165
|
cannam@89
|
166 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
|
cannam@89
|
167 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
|
cannam@89
|
168
|
cannam@89
|
169 n = lo + unLo - ltLo - 1;
|
cannam@89
|
170 m = hi - (gtHi - unHi) + 1;
|
cannam@89
|
171
|
cannam@89
|
172 if (n - lo > hi - m) {
|
cannam@89
|
173 fpush ( lo, n );
|
cannam@89
|
174 fpush ( m, hi );
|
cannam@89
|
175 } else {
|
cannam@89
|
176 fpush ( m, hi );
|
cannam@89
|
177 fpush ( lo, n );
|
cannam@89
|
178 }
|
cannam@89
|
179 }
|
cannam@89
|
180 }
|
cannam@89
|
181
|
cannam@89
|
182 #undef fmin
|
cannam@89
|
183 #undef fpush
|
cannam@89
|
184 #undef fpop
|
cannam@89
|
185 #undef fswap
|
cannam@89
|
186 #undef fvswap
|
cannam@89
|
187 #undef FALLBACK_QSORT_SMALL_THRESH
|
cannam@89
|
188 #undef FALLBACK_QSORT_STACK_SIZE
|
cannam@89
|
189
|
cannam@89
|
190
|
cannam@89
|
191 /*---------------------------------------------*/
|
cannam@89
|
192 /* Pre:
|
cannam@89
|
193 nblock > 0
|
cannam@89
|
194 eclass exists for [0 .. nblock-1]
|
cannam@89
|
195 ((UChar*)eclass) [0 .. nblock-1] holds block
|
cannam@89
|
196 ptr exists for [0 .. nblock-1]
|
cannam@89
|
197
|
cannam@89
|
198 Post:
|
cannam@89
|
199 ((UChar*)eclass) [0 .. nblock-1] holds block
|
cannam@89
|
200 All other areas of eclass destroyed
|
cannam@89
|
201 fmap [0 .. nblock-1] holds sorted order
|
cannam@89
|
202 bhtab [ 0 .. 2+(nblock/32) ] destroyed
|
cannam@89
|
203 */
|
cannam@89
|
204
|
cannam@89
|
205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
|
cannam@89
|
206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
|
cannam@89
|
207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
|
cannam@89
|
208 #define WORD_BH(zz) bhtab[(zz) >> 5]
|
cannam@89
|
209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
|
cannam@89
|
210
|
cannam@89
|
211 static
|
cannam@89
|
212 void fallbackSort ( UInt32* fmap,
|
cannam@89
|
213 UInt32* eclass,
|
cannam@89
|
214 UInt32* bhtab,
|
cannam@89
|
215 Int32 nblock,
|
cannam@89
|
216 Int32 verb )
|
cannam@89
|
217 {
|
cannam@89
|
218 Int32 ftab[257];
|
cannam@89
|
219 Int32 ftabCopy[256];
|
cannam@89
|
220 Int32 H, i, j, k, l, r, cc, cc1;
|
cannam@89
|
221 Int32 nNotDone;
|
cannam@89
|
222 Int32 nBhtab;
|
cannam@89
|
223 UChar* eclass8 = (UChar*)eclass;
|
cannam@89
|
224
|
cannam@89
|
225 /*--
|
cannam@89
|
226 Initial 1-char radix sort to generate
|
cannam@89
|
227 initial fmap and initial BH bits.
|
cannam@89
|
228 --*/
|
cannam@89
|
229 if (verb >= 4)
|
cannam@89
|
230 VPrintf0 ( " bucket sorting ...\n" );
|
cannam@89
|
231 for (i = 0; i < 257; i++) ftab[i] = 0;
|
cannam@89
|
232 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
|
cannam@89
|
233 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
|
cannam@89
|
234 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
|
cannam@89
|
235
|
cannam@89
|
236 for (i = 0; i < nblock; i++) {
|
cannam@89
|
237 j = eclass8[i];
|
cannam@89
|
238 k = ftab[j] - 1;
|
cannam@89
|
239 ftab[j] = k;
|
cannam@89
|
240 fmap[k] = i;
|
cannam@89
|
241 }
|
cannam@89
|
242
|
cannam@89
|
243 nBhtab = 2 + (nblock / 32);
|
cannam@89
|
244 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
|
cannam@89
|
245 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
|
cannam@89
|
246
|
cannam@89
|
247 /*--
|
cannam@89
|
248 Inductively refine the buckets. Kind-of an
|
cannam@89
|
249 "exponential radix sort" (!), inspired by the
|
cannam@89
|
250 Manber-Myers suffix array construction algorithm.
|
cannam@89
|
251 --*/
|
cannam@89
|
252
|
cannam@89
|
253 /*-- set sentinel bits for block-end detection --*/
|
cannam@89
|
254 for (i = 0; i < 32; i++) {
|
cannam@89
|
255 SET_BH(nblock + 2*i);
|
cannam@89
|
256 CLEAR_BH(nblock + 2*i + 1);
|
cannam@89
|
257 }
|
cannam@89
|
258
|
cannam@89
|
259 /*-- the log(N) loop --*/
|
cannam@89
|
260 H = 1;
|
cannam@89
|
261 while (1) {
|
cannam@89
|
262
|
cannam@89
|
263 if (verb >= 4)
|
cannam@89
|
264 VPrintf1 ( " depth %6d has ", H );
|
cannam@89
|
265
|
cannam@89
|
266 j = 0;
|
cannam@89
|
267 for (i = 0; i < nblock; i++) {
|
cannam@89
|
268 if (ISSET_BH(i)) j = i;
|
cannam@89
|
269 k = fmap[i] - H; if (k < 0) k += nblock;
|
cannam@89
|
270 eclass[k] = j;
|
cannam@89
|
271 }
|
cannam@89
|
272
|
cannam@89
|
273 nNotDone = 0;
|
cannam@89
|
274 r = -1;
|
cannam@89
|
275 while (1) {
|
cannam@89
|
276
|
cannam@89
|
277 /*-- find the next non-singleton bucket --*/
|
cannam@89
|
278 k = r + 1;
|
cannam@89
|
279 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
|
cannam@89
|
280 if (ISSET_BH(k)) {
|
cannam@89
|
281 while (WORD_BH(k) == 0xffffffff) k += 32;
|
cannam@89
|
282 while (ISSET_BH(k)) k++;
|
cannam@89
|
283 }
|
cannam@89
|
284 l = k - 1;
|
cannam@89
|
285 if (l >= nblock) break;
|
cannam@89
|
286 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
|
cannam@89
|
287 if (!ISSET_BH(k)) {
|
cannam@89
|
288 while (WORD_BH(k) == 0x00000000) k += 32;
|
cannam@89
|
289 while (!ISSET_BH(k)) k++;
|
cannam@89
|
290 }
|
cannam@89
|
291 r = k - 1;
|
cannam@89
|
292 if (r >= nblock) break;
|
cannam@89
|
293
|
cannam@89
|
294 /*-- now [l, r] bracket current bucket --*/
|
cannam@89
|
295 if (r > l) {
|
cannam@89
|
296 nNotDone += (r - l + 1);
|
cannam@89
|
297 fallbackQSort3 ( fmap, eclass, l, r );
|
cannam@89
|
298
|
cannam@89
|
299 /*-- scan bucket and generate header bits-- */
|
cannam@89
|
300 cc = -1;
|
cannam@89
|
301 for (i = l; i <= r; i++) {
|
cannam@89
|
302 cc1 = eclass[fmap[i]];
|
cannam@89
|
303 if (cc != cc1) { SET_BH(i); cc = cc1; };
|
cannam@89
|
304 }
|
cannam@89
|
305 }
|
cannam@89
|
306 }
|
cannam@89
|
307
|
cannam@89
|
308 if (verb >= 4)
|
cannam@89
|
309 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
|
cannam@89
|
310
|
cannam@89
|
311 H *= 2;
|
cannam@89
|
312 if (H > nblock || nNotDone == 0) break;
|
cannam@89
|
313 }
|
cannam@89
|
314
|
cannam@89
|
315 /*--
|
cannam@89
|
316 Reconstruct the original block in
|
cannam@89
|
317 eclass8 [0 .. nblock-1], since the
|
cannam@89
|
318 previous phase destroyed it.
|
cannam@89
|
319 --*/
|
cannam@89
|
320 if (verb >= 4)
|
cannam@89
|
321 VPrintf0 ( " reconstructing block ...\n" );
|
cannam@89
|
322 j = 0;
|
cannam@89
|
323 for (i = 0; i < nblock; i++) {
|
cannam@89
|
324 while (ftabCopy[j] == 0) j++;
|
cannam@89
|
325 ftabCopy[j]--;
|
cannam@89
|
326 eclass8[fmap[i]] = (UChar)j;
|
cannam@89
|
327 }
|
cannam@89
|
328 AssertH ( j < 256, 1005 );
|
cannam@89
|
329 }
|
cannam@89
|
330
|
cannam@89
|
331 #undef SET_BH
|
cannam@89
|
332 #undef CLEAR_BH
|
cannam@89
|
333 #undef ISSET_BH
|
cannam@89
|
334 #undef WORD_BH
|
cannam@89
|
335 #undef UNALIGNED_BH
|
cannam@89
|
336
|
cannam@89
|
337
|
cannam@89
|
338 /*---------------------------------------------*/
|
cannam@89
|
339 /*--- The main, O(N^2 log(N)) sorting ---*/
|
cannam@89
|
340 /*--- algorithm. Faster for "normal" ---*/
|
cannam@89
|
341 /*--- non-repetitive blocks. ---*/
|
cannam@89
|
342 /*---------------------------------------------*/
|
cannam@89
|
343
|
cannam@89
|
344 /*---------------------------------------------*/
|
cannam@89
|
345 static
|
cannam@89
|
346 __inline__
|
cannam@89
|
347 Bool mainGtU ( UInt32 i1,
|
cannam@89
|
348 UInt32 i2,
|
cannam@89
|
349 UChar* block,
|
cannam@89
|
350 UInt16* quadrant,
|
cannam@89
|
351 UInt32 nblock,
|
cannam@89
|
352 Int32* budget )
|
cannam@89
|
353 {
|
cannam@89
|
354 Int32 k;
|
cannam@89
|
355 UChar c1, c2;
|
cannam@89
|
356 UInt16 s1, s2;
|
cannam@89
|
357
|
cannam@89
|
358 AssertD ( i1 != i2, "mainGtU" );
|
cannam@89
|
359 /* 1 */
|
cannam@89
|
360 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
361 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
362 i1++; i2++;
|
cannam@89
|
363 /* 2 */
|
cannam@89
|
364 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
365 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
366 i1++; i2++;
|
cannam@89
|
367 /* 3 */
|
cannam@89
|
368 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
369 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
370 i1++; i2++;
|
cannam@89
|
371 /* 4 */
|
cannam@89
|
372 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
373 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
374 i1++; i2++;
|
cannam@89
|
375 /* 5 */
|
cannam@89
|
376 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
377 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
378 i1++; i2++;
|
cannam@89
|
379 /* 6 */
|
cannam@89
|
380 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
381 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
382 i1++; i2++;
|
cannam@89
|
383 /* 7 */
|
cannam@89
|
384 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
385 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
386 i1++; i2++;
|
cannam@89
|
387 /* 8 */
|
cannam@89
|
388 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
389 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
390 i1++; i2++;
|
cannam@89
|
391 /* 9 */
|
cannam@89
|
392 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
393 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
394 i1++; i2++;
|
cannam@89
|
395 /* 10 */
|
cannam@89
|
396 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
397 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
398 i1++; i2++;
|
cannam@89
|
399 /* 11 */
|
cannam@89
|
400 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
401 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
402 i1++; i2++;
|
cannam@89
|
403 /* 12 */
|
cannam@89
|
404 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
405 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
406 i1++; i2++;
|
cannam@89
|
407
|
cannam@89
|
408 k = nblock + 8;
|
cannam@89
|
409
|
cannam@89
|
410 do {
|
cannam@89
|
411 /* 1 */
|
cannam@89
|
412 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
413 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
414 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
415 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
416 i1++; i2++;
|
cannam@89
|
417 /* 2 */
|
cannam@89
|
418 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
419 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
420 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
421 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
422 i1++; i2++;
|
cannam@89
|
423 /* 3 */
|
cannam@89
|
424 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
425 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
426 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
427 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
428 i1++; i2++;
|
cannam@89
|
429 /* 4 */
|
cannam@89
|
430 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
431 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
432 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
433 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
434 i1++; i2++;
|
cannam@89
|
435 /* 5 */
|
cannam@89
|
436 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
437 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
438 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
439 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
440 i1++; i2++;
|
cannam@89
|
441 /* 6 */
|
cannam@89
|
442 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
443 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
444 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
445 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
446 i1++; i2++;
|
cannam@89
|
447 /* 7 */
|
cannam@89
|
448 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
449 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
450 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
451 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
452 i1++; i2++;
|
cannam@89
|
453 /* 8 */
|
cannam@89
|
454 c1 = block[i1]; c2 = block[i2];
|
cannam@89
|
455 if (c1 != c2) return (c1 > c2);
|
cannam@89
|
456 s1 = quadrant[i1]; s2 = quadrant[i2];
|
cannam@89
|
457 if (s1 != s2) return (s1 > s2);
|
cannam@89
|
458 i1++; i2++;
|
cannam@89
|
459
|
cannam@89
|
460 if (i1 >= nblock) i1 -= nblock;
|
cannam@89
|
461 if (i2 >= nblock) i2 -= nblock;
|
cannam@89
|
462
|
cannam@89
|
463 k -= 8;
|
cannam@89
|
464 (*budget)--;
|
cannam@89
|
465 }
|
cannam@89
|
466 while (k >= 0);
|
cannam@89
|
467
|
cannam@89
|
468 return False;
|
cannam@89
|
469 }
|
cannam@89
|
470
|
cannam@89
|
471
|
cannam@89
|
472 /*---------------------------------------------*/
|
cannam@89
|
473 /*--
|
cannam@89
|
474 Knuth's increments seem to work better
|
cannam@89
|
475 than Incerpi-Sedgewick here. Possibly
|
cannam@89
|
476 because the number of elems to sort is
|
cannam@89
|
477 usually small, typically <= 20.
|
cannam@89
|
478 --*/
|
cannam@89
|
479 static
|
cannam@89
|
480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
|
cannam@89
|
481 9841, 29524, 88573, 265720,
|
cannam@89
|
482 797161, 2391484 };
|
cannam@89
|
483
|
cannam@89
|
484 static
|
cannam@89
|
485 void mainSimpleSort ( UInt32* ptr,
|
cannam@89
|
486 UChar* block,
|
cannam@89
|
487 UInt16* quadrant,
|
cannam@89
|
488 Int32 nblock,
|
cannam@89
|
489 Int32 lo,
|
cannam@89
|
490 Int32 hi,
|
cannam@89
|
491 Int32 d,
|
cannam@89
|
492 Int32* budget )
|
cannam@89
|
493 {
|
cannam@89
|
494 Int32 i, j, h, bigN, hp;
|
cannam@89
|
495 UInt32 v;
|
cannam@89
|
496
|
cannam@89
|
497 bigN = hi - lo + 1;
|
cannam@89
|
498 if (bigN < 2) return;
|
cannam@89
|
499
|
cannam@89
|
500 hp = 0;
|
cannam@89
|
501 while (incs[hp] < bigN) hp++;
|
cannam@89
|
502 hp--;
|
cannam@89
|
503
|
cannam@89
|
504 for (; hp >= 0; hp--) {
|
cannam@89
|
505 h = incs[hp];
|
cannam@89
|
506
|
cannam@89
|
507 i = lo + h;
|
cannam@89
|
508 while (True) {
|
cannam@89
|
509
|
cannam@89
|
510 /*-- copy 1 --*/
|
cannam@89
|
511 if (i > hi) break;
|
cannam@89
|
512 v = ptr[i];
|
cannam@89
|
513 j = i;
|
cannam@89
|
514 while ( mainGtU (
|
cannam@89
|
515 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
cannam@89
|
516 ) ) {
|
cannam@89
|
517 ptr[j] = ptr[j-h];
|
cannam@89
|
518 j = j - h;
|
cannam@89
|
519 if (j <= (lo + h - 1)) break;
|
cannam@89
|
520 }
|
cannam@89
|
521 ptr[j] = v;
|
cannam@89
|
522 i++;
|
cannam@89
|
523
|
cannam@89
|
524 /*-- copy 2 --*/
|
cannam@89
|
525 if (i > hi) break;
|
cannam@89
|
526 v = ptr[i];
|
cannam@89
|
527 j = i;
|
cannam@89
|
528 while ( mainGtU (
|
cannam@89
|
529 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
cannam@89
|
530 ) ) {
|
cannam@89
|
531 ptr[j] = ptr[j-h];
|
cannam@89
|
532 j = j - h;
|
cannam@89
|
533 if (j <= (lo + h - 1)) break;
|
cannam@89
|
534 }
|
cannam@89
|
535 ptr[j] = v;
|
cannam@89
|
536 i++;
|
cannam@89
|
537
|
cannam@89
|
538 /*-- copy 3 --*/
|
cannam@89
|
539 if (i > hi) break;
|
cannam@89
|
540 v = ptr[i];
|
cannam@89
|
541 j = i;
|
cannam@89
|
542 while ( mainGtU (
|
cannam@89
|
543 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
|
cannam@89
|
544 ) ) {
|
cannam@89
|
545 ptr[j] = ptr[j-h];
|
cannam@89
|
546 j = j - h;
|
cannam@89
|
547 if (j <= (lo + h - 1)) break;
|
cannam@89
|
548 }
|
cannam@89
|
549 ptr[j] = v;
|
cannam@89
|
550 i++;
|
cannam@89
|
551
|
cannam@89
|
552 if (*budget < 0) return;
|
cannam@89
|
553 }
|
cannam@89
|
554 }
|
cannam@89
|
555 }
|
cannam@89
|
556
|
cannam@89
|
557
|
cannam@89
|
558 /*---------------------------------------------*/
|
cannam@89
|
559 /*--
|
cannam@89
|
560 The following is an implementation of
|
cannam@89
|
561 an elegant 3-way quicksort for strings,
|
cannam@89
|
562 described in a paper "Fast Algorithms for
|
cannam@89
|
563 Sorting and Searching Strings", by Robert
|
cannam@89
|
564 Sedgewick and Jon L. Bentley.
|
cannam@89
|
565 --*/
|
cannam@89
|
566
|
cannam@89
|
567 #define mswap(zz1, zz2) \
|
cannam@89
|
568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
|
cannam@89
|
569
|
cannam@89
|
570 #define mvswap(zzp1, zzp2, zzn) \
|
cannam@89
|
571 { \
|
cannam@89
|
572 Int32 yyp1 = (zzp1); \
|
cannam@89
|
573 Int32 yyp2 = (zzp2); \
|
cannam@89
|
574 Int32 yyn = (zzn); \
|
cannam@89
|
575 while (yyn > 0) { \
|
cannam@89
|
576 mswap(ptr[yyp1], ptr[yyp2]); \
|
cannam@89
|
577 yyp1++; yyp2++; yyn--; \
|
cannam@89
|
578 } \
|
cannam@89
|
579 }
|
cannam@89
|
580
|
cannam@89
|
581 static
|
cannam@89
|
582 __inline__
|
cannam@89
|
583 UChar mmed3 ( UChar a, UChar b, UChar c )
|
cannam@89
|
584 {
|
cannam@89
|
585 UChar t;
|
cannam@89
|
586 if (a > b) { t = a; a = b; b = t; };
|
cannam@89
|
587 if (b > c) {
|
cannam@89
|
588 b = c;
|
cannam@89
|
589 if (a > b) b = a;
|
cannam@89
|
590 }
|
cannam@89
|
591 return b;
|
cannam@89
|
592 }
|
cannam@89
|
593
|
cannam@89
|
594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
|
cannam@89
|
595
|
cannam@89
|
596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
|
cannam@89
|
597 stackHi[sp] = hz; \
|
cannam@89
|
598 stackD [sp] = dz; \
|
cannam@89
|
599 sp++; }
|
cannam@89
|
600
|
cannam@89
|
601 #define mpop(lz,hz,dz) { sp--; \
|
cannam@89
|
602 lz = stackLo[sp]; \
|
cannam@89
|
603 hz = stackHi[sp]; \
|
cannam@89
|
604 dz = stackD [sp]; }
|
cannam@89
|
605
|
cannam@89
|
606
|
cannam@89
|
607 #define mnextsize(az) (nextHi[az]-nextLo[az])
|
cannam@89
|
608
|
cannam@89
|
609 #define mnextswap(az,bz) \
|
cannam@89
|
610 { Int32 tz; \
|
cannam@89
|
611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
|
cannam@89
|
612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
|
cannam@89
|
613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
|
cannam@89
|
614
|
cannam@89
|
615
|
cannam@89
|
616 #define MAIN_QSORT_SMALL_THRESH 20
|
cannam@89
|
617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
|
cannam@89
|
618 #define MAIN_QSORT_STACK_SIZE 100
|
cannam@89
|
619
|
cannam@89
|
620 static
|
cannam@89
|
621 void mainQSort3 ( UInt32* ptr,
|
cannam@89
|
622 UChar* block,
|
cannam@89
|
623 UInt16* quadrant,
|
cannam@89
|
624 Int32 nblock,
|
cannam@89
|
625 Int32 loSt,
|
cannam@89
|
626 Int32 hiSt,
|
cannam@89
|
627 Int32 dSt,
|
cannam@89
|
628 Int32* budget )
|
cannam@89
|
629 {
|
cannam@89
|
630 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
|
cannam@89
|
631 Int32 sp, lo, hi, d;
|
cannam@89
|
632
|
cannam@89
|
633 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
|
cannam@89
|
634 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
|
cannam@89
|
635 Int32 stackD [MAIN_QSORT_STACK_SIZE];
|
cannam@89
|
636
|
cannam@89
|
637 Int32 nextLo[3];
|
cannam@89
|
638 Int32 nextHi[3];
|
cannam@89
|
639 Int32 nextD [3];
|
cannam@89
|
640
|
cannam@89
|
641 sp = 0;
|
cannam@89
|
642 mpush ( loSt, hiSt, dSt );
|
cannam@89
|
643
|
cannam@89
|
644 while (sp > 0) {
|
cannam@89
|
645
|
cannam@89
|
646 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
|
cannam@89
|
647
|
cannam@89
|
648 mpop ( lo, hi, d );
|
cannam@89
|
649 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
|
cannam@89
|
650 d > MAIN_QSORT_DEPTH_THRESH) {
|
cannam@89
|
651 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
|
cannam@89
|
652 if (*budget < 0) return;
|
cannam@89
|
653 continue;
|
cannam@89
|
654 }
|
cannam@89
|
655
|
cannam@89
|
656 med = (Int32)
|
cannam@89
|
657 mmed3 ( block[ptr[ lo ]+d],
|
cannam@89
|
658 block[ptr[ hi ]+d],
|
cannam@89
|
659 block[ptr[ (lo+hi)>>1 ]+d] );
|
cannam@89
|
660
|
cannam@89
|
661 unLo = ltLo = lo;
|
cannam@89
|
662 unHi = gtHi = hi;
|
cannam@89
|
663
|
cannam@89
|
664 while (True) {
|
cannam@89
|
665 while (True) {
|
cannam@89
|
666 if (unLo > unHi) break;
|
cannam@89
|
667 n = ((Int32)block[ptr[unLo]+d]) - med;
|
cannam@89
|
668 if (n == 0) {
|
cannam@89
|
669 mswap(ptr[unLo], ptr[ltLo]);
|
cannam@89
|
670 ltLo++; unLo++; continue;
|
cannam@89
|
671 };
|
cannam@89
|
672 if (n > 0) break;
|
cannam@89
|
673 unLo++;
|
cannam@89
|
674 }
|
cannam@89
|
675 while (True) {
|
cannam@89
|
676 if (unLo > unHi) break;
|
cannam@89
|
677 n = ((Int32)block[ptr[unHi]+d]) - med;
|
cannam@89
|
678 if (n == 0) {
|
cannam@89
|
679 mswap(ptr[unHi], ptr[gtHi]);
|
cannam@89
|
680 gtHi--; unHi--; continue;
|
cannam@89
|
681 };
|
cannam@89
|
682 if (n < 0) break;
|
cannam@89
|
683 unHi--;
|
cannam@89
|
684 }
|
cannam@89
|
685 if (unLo > unHi) break;
|
cannam@89
|
686 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
|
cannam@89
|
687 }
|
cannam@89
|
688
|
cannam@89
|
689 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
|
cannam@89
|
690
|
cannam@89
|
691 if (gtHi < ltLo) {
|
cannam@89
|
692 mpush(lo, hi, d+1 );
|
cannam@89
|
693 continue;
|
cannam@89
|
694 }
|
cannam@89
|
695
|
cannam@89
|
696 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
|
cannam@89
|
697 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
|
cannam@89
|
698
|
cannam@89
|
699 n = lo + unLo - ltLo - 1;
|
cannam@89
|
700 m = hi - (gtHi - unHi) + 1;
|
cannam@89
|
701
|
cannam@89
|
702 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
|
cannam@89
|
703 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
|
cannam@89
|
704 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
|
cannam@89
|
705
|
cannam@89
|
706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
|
cannam@89
|
707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
|
cannam@89
|
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
|
cannam@89
|
709
|
cannam@89
|
710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
|
cannam@89
|
711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
|
cannam@89
|
712
|
cannam@89
|
713 mpush (nextLo[0], nextHi[0], nextD[0]);
|
cannam@89
|
714 mpush (nextLo[1], nextHi[1], nextD[1]);
|
cannam@89
|
715 mpush (nextLo[2], nextHi[2], nextD[2]);
|
cannam@89
|
716 }
|
cannam@89
|
717 }
|
cannam@89
|
718
|
cannam@89
|
719 #undef mswap
|
cannam@89
|
720 #undef mvswap
|
cannam@89
|
721 #undef mpush
|
cannam@89
|
722 #undef mpop
|
cannam@89
|
723 #undef mmin
|
cannam@89
|
724 #undef mnextsize
|
cannam@89
|
725 #undef mnextswap
|
cannam@89
|
726 #undef MAIN_QSORT_SMALL_THRESH
|
cannam@89
|
727 #undef MAIN_QSORT_DEPTH_THRESH
|
cannam@89
|
728 #undef MAIN_QSORT_STACK_SIZE
|
cannam@89
|
729
|
cannam@89
|
730
|
cannam@89
|
731 /*---------------------------------------------*/
|
cannam@89
|
732 /* Pre:
|
cannam@89
|
733 nblock > N_OVERSHOOT
|
cannam@89
|
734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
cannam@89
|
735 ((UChar*)block32) [0 .. nblock-1] holds block
|
cannam@89
|
736 ptr exists for [0 .. nblock-1]
|
cannam@89
|
737
|
cannam@89
|
738 Post:
|
cannam@89
|
739 ((UChar*)block32) [0 .. nblock-1] holds block
|
cannam@89
|
740 All other areas of block32 destroyed
|
cannam@89
|
741 ftab [0 .. 65536 ] destroyed
|
cannam@89
|
742 ptr [0 .. nblock-1] holds sorted order
|
cannam@89
|
743 if (*budget < 0), sorting was abandoned
|
cannam@89
|
744 */
|
cannam@89
|
745
|
cannam@89
|
746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
|
cannam@89
|
747 #define SETMASK (1 << 21)
|
cannam@89
|
748 #define CLEARMASK (~(SETMASK))
|
cannam@89
|
749
|
cannam@89
|
750 static
|
cannam@89
|
751 void mainSort ( UInt32* ptr,
|
cannam@89
|
752 UChar* block,
|
cannam@89
|
753 UInt16* quadrant,
|
cannam@89
|
754 UInt32* ftab,
|
cannam@89
|
755 Int32 nblock,
|
cannam@89
|
756 Int32 verb,
|
cannam@89
|
757 Int32* budget )
|
cannam@89
|
758 {
|
cannam@89
|
759 Int32 i, j, k, ss, sb;
|
cannam@89
|
760 Int32 runningOrder[256];
|
cannam@89
|
761 Bool bigDone[256];
|
cannam@89
|
762 Int32 copyStart[256];
|
cannam@89
|
763 Int32 copyEnd [256];
|
cannam@89
|
764 UChar c1;
|
cannam@89
|
765 Int32 numQSorted;
|
cannam@89
|
766 UInt16 s;
|
cannam@89
|
767 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
|
cannam@89
|
768
|
cannam@89
|
769 /*-- set up the 2-byte frequency table --*/
|
cannam@89
|
770 for (i = 65536; i >= 0; i--) ftab[i] = 0;
|
cannam@89
|
771
|
cannam@89
|
772 j = block[0] << 8;
|
cannam@89
|
773 i = nblock-1;
|
cannam@89
|
774 for (; i >= 3; i -= 4) {
|
cannam@89
|
775 quadrant[i] = 0;
|
cannam@89
|
776 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
|
cannam@89
|
777 ftab[j]++;
|
cannam@89
|
778 quadrant[i-1] = 0;
|
cannam@89
|
779 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
|
cannam@89
|
780 ftab[j]++;
|
cannam@89
|
781 quadrant[i-2] = 0;
|
cannam@89
|
782 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
|
cannam@89
|
783 ftab[j]++;
|
cannam@89
|
784 quadrant[i-3] = 0;
|
cannam@89
|
785 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
|
cannam@89
|
786 ftab[j]++;
|
cannam@89
|
787 }
|
cannam@89
|
788 for (; i >= 0; i--) {
|
cannam@89
|
789 quadrant[i] = 0;
|
cannam@89
|
790 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
|
cannam@89
|
791 ftab[j]++;
|
cannam@89
|
792 }
|
cannam@89
|
793
|
cannam@89
|
794 /*-- (emphasises close relationship of block & quadrant) --*/
|
cannam@89
|
795 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
|
cannam@89
|
796 block [nblock+i] = block[i];
|
cannam@89
|
797 quadrant[nblock+i] = 0;
|
cannam@89
|
798 }
|
cannam@89
|
799
|
cannam@89
|
800 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
|
cannam@89
|
801
|
cannam@89
|
802 /*-- Complete the initial radix sort --*/
|
cannam@89
|
803 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
|
cannam@89
|
804
|
cannam@89
|
805 s = block[0] << 8;
|
cannam@89
|
806 i = nblock-1;
|
cannam@89
|
807 for (; i >= 3; i -= 4) {
|
cannam@89
|
808 s = (s >> 8) | (block[i] << 8);
|
cannam@89
|
809 j = ftab[s] -1;
|
cannam@89
|
810 ftab[s] = j;
|
cannam@89
|
811 ptr[j] = i;
|
cannam@89
|
812 s = (s >> 8) | (block[i-1] << 8);
|
cannam@89
|
813 j = ftab[s] -1;
|
cannam@89
|
814 ftab[s] = j;
|
cannam@89
|
815 ptr[j] = i-1;
|
cannam@89
|
816 s = (s >> 8) | (block[i-2] << 8);
|
cannam@89
|
817 j = ftab[s] -1;
|
cannam@89
|
818 ftab[s] = j;
|
cannam@89
|
819 ptr[j] = i-2;
|
cannam@89
|
820 s = (s >> 8) | (block[i-3] << 8);
|
cannam@89
|
821 j = ftab[s] -1;
|
cannam@89
|
822 ftab[s] = j;
|
cannam@89
|
823 ptr[j] = i-3;
|
cannam@89
|
824 }
|
cannam@89
|
825 for (; i >= 0; i--) {
|
cannam@89
|
826 s = (s >> 8) | (block[i] << 8);
|
cannam@89
|
827 j = ftab[s] -1;
|
cannam@89
|
828 ftab[s] = j;
|
cannam@89
|
829 ptr[j] = i;
|
cannam@89
|
830 }
|
cannam@89
|
831
|
cannam@89
|
832 /*--
|
cannam@89
|
833 Now ftab contains the first loc of every small bucket.
|
cannam@89
|
834 Calculate the running order, from smallest to largest
|
cannam@89
|
835 big bucket.
|
cannam@89
|
836 --*/
|
cannam@89
|
837 for (i = 0; i <= 255; i++) {
|
cannam@89
|
838 bigDone [i] = False;
|
cannam@89
|
839 runningOrder[i] = i;
|
cannam@89
|
840 }
|
cannam@89
|
841
|
cannam@89
|
842 {
|
cannam@89
|
843 Int32 vv;
|
cannam@89
|
844 Int32 h = 1;
|
cannam@89
|
845 do h = 3 * h + 1; while (h <= 256);
|
cannam@89
|
846 do {
|
cannam@89
|
847 h = h / 3;
|
cannam@89
|
848 for (i = h; i <= 255; i++) {
|
cannam@89
|
849 vv = runningOrder[i];
|
cannam@89
|
850 j = i;
|
cannam@89
|
851 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
|
cannam@89
|
852 runningOrder[j] = runningOrder[j-h];
|
cannam@89
|
853 j = j - h;
|
cannam@89
|
854 if (j <= (h - 1)) goto zero;
|
cannam@89
|
855 }
|
cannam@89
|
856 zero:
|
cannam@89
|
857 runningOrder[j] = vv;
|
cannam@89
|
858 }
|
cannam@89
|
859 } while (h != 1);
|
cannam@89
|
860 }
|
cannam@89
|
861
|
cannam@89
|
862 /*--
|
cannam@89
|
863 The main sorting loop.
|
cannam@89
|
864 --*/
|
cannam@89
|
865
|
cannam@89
|
866 numQSorted = 0;
|
cannam@89
|
867
|
cannam@89
|
868 for (i = 0; i <= 255; i++) {
|
cannam@89
|
869
|
cannam@89
|
870 /*--
|
cannam@89
|
871 Process big buckets, starting with the least full.
|
cannam@89
|
872 Basically this is a 3-step process in which we call
|
cannam@89
|
873 mainQSort3 to sort the small buckets [ss, j], but
|
cannam@89
|
874 also make a big effort to avoid the calls if we can.
|
cannam@89
|
875 --*/
|
cannam@89
|
876 ss = runningOrder[i];
|
cannam@89
|
877
|
cannam@89
|
878 /*--
|
cannam@89
|
879 Step 1:
|
cannam@89
|
880 Complete the big bucket [ss] by quicksorting
|
cannam@89
|
881 any unsorted small buckets [ss, j], for j != ss.
|
cannam@89
|
882 Hopefully previous pointer-scanning phases have already
|
cannam@89
|
883 completed many of the small buckets [ss, j], so
|
cannam@89
|
884 we don't have to sort them at all.
|
cannam@89
|
885 --*/
|
cannam@89
|
886 for (j = 0; j <= 255; j++) {
|
cannam@89
|
887 if (j != ss) {
|
cannam@89
|
888 sb = (ss << 8) + j;
|
cannam@89
|
889 if ( ! (ftab[sb] & SETMASK) ) {
|
cannam@89
|
890 Int32 lo = ftab[sb] & CLEARMASK;
|
cannam@89
|
891 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
|
cannam@89
|
892 if (hi > lo) {
|
cannam@89
|
893 if (verb >= 4)
|
cannam@89
|
894 VPrintf4 ( " qsort [0x%x, 0x%x] "
|
cannam@89
|
895 "done %d this %d\n",
|
cannam@89
|
896 ss, j, numQSorted, hi - lo + 1 );
|
cannam@89
|
897 mainQSort3 (
|
cannam@89
|
898 ptr, block, quadrant, nblock,
|
cannam@89
|
899 lo, hi, BZ_N_RADIX, budget
|
cannam@89
|
900 );
|
cannam@89
|
901 numQSorted += (hi - lo + 1);
|
cannam@89
|
902 if (*budget < 0) return;
|
cannam@89
|
903 }
|
cannam@89
|
904 }
|
cannam@89
|
905 ftab[sb] |= SETMASK;
|
cannam@89
|
906 }
|
cannam@89
|
907 }
|
cannam@89
|
908
|
cannam@89
|
909 AssertH ( !bigDone[ss], 1006 );
|
cannam@89
|
910
|
cannam@89
|
911 /*--
|
cannam@89
|
912 Step 2:
|
cannam@89
|
913 Now scan this big bucket [ss] so as to synthesise the
|
cannam@89
|
914 sorted order for small buckets [t, ss] for all t,
|
cannam@89
|
915 including, magically, the bucket [ss,ss] too.
|
cannam@89
|
916 This will avoid doing Real Work in subsequent Step 1's.
|
cannam@89
|
917 --*/
|
cannam@89
|
918 {
|
cannam@89
|
919 for (j = 0; j <= 255; j++) {
|
cannam@89
|
920 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
cannam@89
|
921 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
|
cannam@89
|
922 }
|
cannam@89
|
923 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
|
cannam@89
|
924 k = ptr[j]-1; if (k < 0) k += nblock;
|
cannam@89
|
925 c1 = block[k];
|
cannam@89
|
926 if (!bigDone[c1])
|
cannam@89
|
927 ptr[ copyStart[c1]++ ] = k;
|
cannam@89
|
928 }
|
cannam@89
|
929 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
|
cannam@89
|
930 k = ptr[j]-1; if (k < 0) k += nblock;
|
cannam@89
|
931 c1 = block[k];
|
cannam@89
|
932 if (!bigDone[c1])
|
cannam@89
|
933 ptr[ copyEnd[c1]-- ] = k;
|
cannam@89
|
934 }
|
cannam@89
|
935 }
|
cannam@89
|
936
|
cannam@89
|
937 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
|
cannam@89
|
938 ||
|
cannam@89
|
939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
|
cannam@89
|
940 Necessity for this case is demonstrated by compressing
|
cannam@89
|
941 a sequence of approximately 48.5 million of character
|
cannam@89
|
942 251; 1.0.0/1.0.1 will then die here. */
|
cannam@89
|
943 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
|
cannam@89
|
944 1007 )
|
cannam@89
|
945
|
cannam@89
|
946 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
|
cannam@89
|
947
|
cannam@89
|
948 /*--
|
cannam@89
|
949 Step 3:
|
cannam@89
|
950 The [ss] big bucket is now done. Record this fact,
|
cannam@89
|
951 and update the quadrant descriptors. Remember to
|
cannam@89
|
952 update quadrants in the overshoot area too, if
|
cannam@89
|
953 necessary. The "if (i < 255)" test merely skips
|
cannam@89
|
954 this updating for the last bucket processed, since
|
cannam@89
|
955 updating for the last bucket is pointless.
|
cannam@89
|
956
|
cannam@89
|
957 The quadrant array provides a way to incrementally
|
cannam@89
|
958 cache sort orderings, as they appear, so as to
|
cannam@89
|
959 make subsequent comparisons in fullGtU() complete
|
cannam@89
|
960 faster. For repetitive blocks this makes a big
|
cannam@89
|
961 difference (but not big enough to be able to avoid
|
cannam@89
|
962 the fallback sorting mechanism, exponential radix sort).
|
cannam@89
|
963
|
cannam@89
|
964 The precise meaning is: at all times:
|
cannam@89
|
965
|
cannam@89
|
966 for 0 <= i < nblock and 0 <= j <= nblock
|
cannam@89
|
967
|
cannam@89
|
968 if block[i] != block[j],
|
cannam@89
|
969
|
cannam@89
|
970 then the relative values of quadrant[i] and
|
cannam@89
|
971 quadrant[j] are meaningless.
|
cannam@89
|
972
|
cannam@89
|
973 else {
|
cannam@89
|
974 if quadrant[i] < quadrant[j]
|
cannam@89
|
975 then the string starting at i lexicographically
|
cannam@89
|
976 precedes the string starting at j
|
cannam@89
|
977
|
cannam@89
|
978 else if quadrant[i] > quadrant[j]
|
cannam@89
|
979 then the string starting at j lexicographically
|
cannam@89
|
980 precedes the string starting at i
|
cannam@89
|
981
|
cannam@89
|
982 else
|
cannam@89
|
983 the relative ordering of the strings starting
|
cannam@89
|
984 at i and j has not yet been determined.
|
cannam@89
|
985 }
|
cannam@89
|
986 --*/
|
cannam@89
|
987 bigDone[ss] = True;
|
cannam@89
|
988
|
cannam@89
|
989 if (i < 255) {
|
cannam@89
|
990 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
|
cannam@89
|
991 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
|
cannam@89
|
992 Int32 shifts = 0;
|
cannam@89
|
993
|
cannam@89
|
994 while ((bbSize >> shifts) > 65534) shifts++;
|
cannam@89
|
995
|
cannam@89
|
996 for (j = bbSize-1; j >= 0; j--) {
|
cannam@89
|
997 Int32 a2update = ptr[bbStart + j];
|
cannam@89
|
998 UInt16 qVal = (UInt16)(j >> shifts);
|
cannam@89
|
999 quadrant[a2update] = qVal;
|
cannam@89
|
1000 if (a2update < BZ_N_OVERSHOOT)
|
cannam@89
|
1001 quadrant[a2update + nblock] = qVal;
|
cannam@89
|
1002 }
|
cannam@89
|
1003 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
|
cannam@89
|
1004 }
|
cannam@89
|
1005
|
cannam@89
|
1006 }
|
cannam@89
|
1007
|
cannam@89
|
1008 if (verb >= 4)
|
cannam@89
|
1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
|
cannam@89
|
1010 nblock, numQSorted, nblock - numQSorted );
|
cannam@89
|
1011 }
|
cannam@89
|
1012
|
cannam@89
|
1013 #undef BIGFREQ
|
cannam@89
|
1014 #undef SETMASK
|
cannam@89
|
1015 #undef CLEARMASK
|
cannam@89
|
1016
|
cannam@89
|
1017
|
cannam@89
|
1018 /*---------------------------------------------*/
|
cannam@89
|
1019 /* Pre:
|
cannam@89
|
1020 nblock > 0
|
cannam@89
|
1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
cannam@89
|
1022 ((UChar*)arr2) [0 .. nblock-1] holds block
|
cannam@89
|
1023 arr1 exists for [0 .. nblock-1]
|
cannam@89
|
1024
|
cannam@89
|
1025 Post:
|
cannam@89
|
1026 ((UChar*)arr2) [0 .. nblock-1] holds block
|
cannam@89
|
1027 All other areas of block destroyed
|
cannam@89
|
1028 ftab [ 0 .. 65536 ] destroyed
|
cannam@89
|
1029 arr1 [0 .. nblock-1] holds sorted order
|
cannam@89
|
1030 */
|
cannam@89
|
1031 void BZ2_blockSort ( EState* s )
|
cannam@89
|
1032 {
|
cannam@89
|
1033 UInt32* ptr = s->ptr;
|
cannam@89
|
1034 UChar* block = s->block;
|
cannam@89
|
1035 UInt32* ftab = s->ftab;
|
cannam@89
|
1036 Int32 nblock = s->nblock;
|
cannam@89
|
1037 Int32 verb = s->verbosity;
|
cannam@89
|
1038 Int32 wfact = s->workFactor;
|
cannam@89
|
1039 UInt16* quadrant;
|
cannam@89
|
1040 Int32 budget;
|
cannam@89
|
1041 Int32 budgetInit;
|
cannam@89
|
1042 Int32 i;
|
cannam@89
|
1043
|
cannam@89
|
1044 if (nblock < 10000) {
|
cannam@89
|
1045 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
|
cannam@89
|
1046 } else {
|
cannam@89
|
1047 /* Calculate the location for quadrant, remembering to get
|
cannam@89
|
1048 the alignment right. Assumes that &(block[0]) is at least
|
cannam@89
|
1049 2-byte aligned -- this should be ok since block is really
|
cannam@89
|
1050 the first section of arr2.
|
cannam@89
|
1051 */
|
cannam@89
|
1052 i = nblock+BZ_N_OVERSHOOT;
|
cannam@89
|
1053 if (i & 1) i++;
|
cannam@89
|
1054 quadrant = (UInt16*)(&(block[i]));
|
cannam@89
|
1055
|
cannam@89
|
1056 /* (wfact-1) / 3 puts the default-factor-30
|
cannam@89
|
1057 transition point at very roughly the same place as
|
cannam@89
|
1058 with v0.1 and v0.9.0.
|
cannam@89
|
1059 Not that it particularly matters any more, since the
|
cannam@89
|
1060 resulting compressed stream is now the same regardless
|
cannam@89
|
1061 of whether or not we use the main sort or fallback sort.
|
cannam@89
|
1062 */
|
cannam@89
|
1063 if (wfact < 1 ) wfact = 1;
|
cannam@89
|
1064 if (wfact > 100) wfact = 100;
|
cannam@89
|
1065 budgetInit = nblock * ((wfact-1) / 3);
|
cannam@89
|
1066 budget = budgetInit;
|
cannam@89
|
1067
|
cannam@89
|
1068 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
|
cannam@89
|
1069 if (verb >= 3)
|
cannam@89
|
1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
|
cannam@89
|
1071 budgetInit - budget,
|
cannam@89
|
1072 nblock,
|
cannam@89
|
1073 (float)(budgetInit - budget) /
|
cannam@89
|
1074 (float)(nblock==0 ? 1 : nblock) );
|
cannam@89
|
1075 if (budget < 0) {
|
cannam@89
|
1076 if (verb >= 2)
|
cannam@89
|
1077 VPrintf0 ( " too repetitive; using fallback"
|
cannam@89
|
1078 " sorting algorithm\n" );
|
cannam@89
|
1079 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
|
cannam@89
|
1080 }
|
cannam@89
|
1081 }
|
cannam@89
|
1082
|
cannam@89
|
1083 s->origPtr = -1;
|
cannam@89
|
1084 for (i = 0; i < s->nblock; i++)
|
cannam@89
|
1085 if (ptr[i] == 0)
|
cannam@89
|
1086 { s->origPtr = i; break; };
|
cannam@89
|
1087
|
cannam@89
|
1088 AssertH( s->origPtr != -1, 1003 );
|
cannam@89
|
1089 }
|
cannam@89
|
1090
|
cannam@89
|
1091
|
cannam@89
|
1092 /*-------------------------------------------------------------*/
|
cannam@89
|
1093 /*--- end blocksort.c ---*/
|
cannam@89
|
1094 /*-------------------------------------------------------------*/
|