annotate src/bzip2-1.0.6/blocksort.c @ 169:223a55898ab9 tip default

Add null config files
author Chris Cannam <cannam@all-day-breakfast.com>
date Mon, 02 Mar 2020 14:03:47 +0000
parents 8a15ff55d9af
children
rev   line source
cannam@89 1
cannam@89 2 /*-------------------------------------------------------------*/
cannam@89 3 /*--- Block sorting machinery ---*/
cannam@89 4 /*--- blocksort.c ---*/
cannam@89 5 /*-------------------------------------------------------------*/
cannam@89 6
cannam@89 7 /* ------------------------------------------------------------------
cannam@89 8 This file is part of bzip2/libbzip2, a program and library for
cannam@89 9 lossless, block-sorting data compression.
cannam@89 10
cannam@89 11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
cannam@89 12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
cannam@89 13
cannam@89 14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
cannam@89 15 README file.
cannam@89 16
cannam@89 17 This program is released under the terms of the license contained
cannam@89 18 in the file LICENSE.
cannam@89 19 ------------------------------------------------------------------ */
cannam@89 20
cannam@89 21
cannam@89 22 #include "bzlib_private.h"
cannam@89 23
cannam@89 24 /*---------------------------------------------*/
cannam@89 25 /*--- Fallback O(N log(N)^2) sorting ---*/
cannam@89 26 /*--- algorithm, for repetitive blocks ---*/
cannam@89 27 /*---------------------------------------------*/
cannam@89 28
cannam@89 29 /*---------------------------------------------*/
cannam@89 30 static
cannam@89 31 __inline__
cannam@89 32 void fallbackSimpleSort ( UInt32* fmap,
cannam@89 33 UInt32* eclass,
cannam@89 34 Int32 lo,
cannam@89 35 Int32 hi )
cannam@89 36 {
cannam@89 37 Int32 i, j, tmp;
cannam@89 38 UInt32 ec_tmp;
cannam@89 39
cannam@89 40 if (lo == hi) return;
cannam@89 41
cannam@89 42 if (hi - lo > 3) {
cannam@89 43 for ( i = hi-4; i >= lo; i-- ) {
cannam@89 44 tmp = fmap[i];
cannam@89 45 ec_tmp = eclass[tmp];
cannam@89 46 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
cannam@89 47 fmap[j-4] = fmap[j];
cannam@89 48 fmap[j-4] = tmp;
cannam@89 49 }
cannam@89 50 }
cannam@89 51
cannam@89 52 for ( i = hi-1; i >= lo; i-- ) {
cannam@89 53 tmp = fmap[i];
cannam@89 54 ec_tmp = eclass[tmp];
cannam@89 55 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
cannam@89 56 fmap[j-1] = fmap[j];
cannam@89 57 fmap[j-1] = tmp;
cannam@89 58 }
cannam@89 59 }
cannam@89 60
cannam@89 61
cannam@89 62 /*---------------------------------------------*/
cannam@89 63 #define fswap(zz1, zz2) \
cannam@89 64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
cannam@89 65
cannam@89 66 #define fvswap(zzp1, zzp2, zzn) \
cannam@89 67 { \
cannam@89 68 Int32 yyp1 = (zzp1); \
cannam@89 69 Int32 yyp2 = (zzp2); \
cannam@89 70 Int32 yyn = (zzn); \
cannam@89 71 while (yyn > 0) { \
cannam@89 72 fswap(fmap[yyp1], fmap[yyp2]); \
cannam@89 73 yyp1++; yyp2++; yyn--; \
cannam@89 74 } \
cannam@89 75 }
cannam@89 76
cannam@89 77
cannam@89 78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
cannam@89 79
cannam@89 80 #define fpush(lz,hz) { stackLo[sp] = lz; \
cannam@89 81 stackHi[sp] = hz; \
cannam@89 82 sp++; }
cannam@89 83
cannam@89 84 #define fpop(lz,hz) { sp--; \
cannam@89 85 lz = stackLo[sp]; \
cannam@89 86 hz = stackHi[sp]; }
cannam@89 87
cannam@89 88 #define FALLBACK_QSORT_SMALL_THRESH 10
cannam@89 89 #define FALLBACK_QSORT_STACK_SIZE 100
cannam@89 90
cannam@89 91
cannam@89 92 static
cannam@89 93 void fallbackQSort3 ( UInt32* fmap,
cannam@89 94 UInt32* eclass,
cannam@89 95 Int32 loSt,
cannam@89 96 Int32 hiSt )
cannam@89 97 {
cannam@89 98 Int32 unLo, unHi, ltLo, gtHi, n, m;
cannam@89 99 Int32 sp, lo, hi;
cannam@89 100 UInt32 med, r, r3;
cannam@89 101 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
cannam@89 102 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
cannam@89 103
cannam@89 104 r = 0;
cannam@89 105
cannam@89 106 sp = 0;
cannam@89 107 fpush ( loSt, hiSt );
cannam@89 108
cannam@89 109 while (sp > 0) {
cannam@89 110
cannam@89 111 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
cannam@89 112
cannam@89 113 fpop ( lo, hi );
cannam@89 114 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
cannam@89 115 fallbackSimpleSort ( fmap, eclass, lo, hi );
cannam@89 116 continue;
cannam@89 117 }
cannam@89 118
cannam@89 119 /* Random partitioning. Median of 3 sometimes fails to
cannam@89 120 avoid bad cases. Median of 9 seems to help but
cannam@89 121 looks rather expensive. This too seems to work but
cannam@89 122 is cheaper. Guidance for the magic constants
cannam@89 123 7621 and 32768 is taken from Sedgewick's algorithms
cannam@89 124 book, chapter 35.
cannam@89 125 */
cannam@89 126 r = ((r * 7621) + 1) % 32768;
cannam@89 127 r3 = r % 3;
cannam@89 128 if (r3 == 0) med = eclass[fmap[lo]]; else
cannam@89 129 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
cannam@89 130 med = eclass[fmap[hi]];
cannam@89 131
cannam@89 132 unLo = ltLo = lo;
cannam@89 133 unHi = gtHi = hi;
cannam@89 134
cannam@89 135 while (1) {
cannam@89 136 while (1) {
cannam@89 137 if (unLo > unHi) break;
cannam@89 138 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
cannam@89 139 if (n == 0) {
cannam@89 140 fswap(fmap[unLo], fmap[ltLo]);
cannam@89 141 ltLo++; unLo++;
cannam@89 142 continue;
cannam@89 143 };
cannam@89 144 if (n > 0) break;
cannam@89 145 unLo++;
cannam@89 146 }
cannam@89 147 while (1) {
cannam@89 148 if (unLo > unHi) break;
cannam@89 149 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
cannam@89 150 if (n == 0) {
cannam@89 151 fswap(fmap[unHi], fmap[gtHi]);
cannam@89 152 gtHi--; unHi--;
cannam@89 153 continue;
cannam@89 154 };
cannam@89 155 if (n < 0) break;
cannam@89 156 unHi--;
cannam@89 157 }
cannam@89 158 if (unLo > unHi) break;
cannam@89 159 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
cannam@89 160 }
cannam@89 161
cannam@89 162 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
cannam@89 163
cannam@89 164 if (gtHi < ltLo) continue;
cannam@89 165
cannam@89 166 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
cannam@89 167 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
cannam@89 168
cannam@89 169 n = lo + unLo - ltLo - 1;
cannam@89 170 m = hi - (gtHi - unHi) + 1;
cannam@89 171
cannam@89 172 if (n - lo > hi - m) {
cannam@89 173 fpush ( lo, n );
cannam@89 174 fpush ( m, hi );
cannam@89 175 } else {
cannam@89 176 fpush ( m, hi );
cannam@89 177 fpush ( lo, n );
cannam@89 178 }
cannam@89 179 }
cannam@89 180 }
cannam@89 181
cannam@89 182 #undef fmin
cannam@89 183 #undef fpush
cannam@89 184 #undef fpop
cannam@89 185 #undef fswap
cannam@89 186 #undef fvswap
cannam@89 187 #undef FALLBACK_QSORT_SMALL_THRESH
cannam@89 188 #undef FALLBACK_QSORT_STACK_SIZE
cannam@89 189
cannam@89 190
cannam@89 191 /*---------------------------------------------*/
cannam@89 192 /* Pre:
cannam@89 193 nblock > 0
cannam@89 194 eclass exists for [0 .. nblock-1]
cannam@89 195 ((UChar*)eclass) [0 .. nblock-1] holds block
cannam@89 196 ptr exists for [0 .. nblock-1]
cannam@89 197
cannam@89 198 Post:
cannam@89 199 ((UChar*)eclass) [0 .. nblock-1] holds block
cannam@89 200 All other areas of eclass destroyed
cannam@89 201 fmap [0 .. nblock-1] holds sorted order
cannam@89 202 bhtab [ 0 .. 2+(nblock/32) ] destroyed
cannam@89 203 */
cannam@89 204
cannam@89 205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
cannam@89 206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
cannam@89 207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
cannam@89 208 #define WORD_BH(zz) bhtab[(zz) >> 5]
cannam@89 209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
cannam@89 210
cannam@89 211 static
cannam@89 212 void fallbackSort ( UInt32* fmap,
cannam@89 213 UInt32* eclass,
cannam@89 214 UInt32* bhtab,
cannam@89 215 Int32 nblock,
cannam@89 216 Int32 verb )
cannam@89 217 {
cannam@89 218 Int32 ftab[257];
cannam@89 219 Int32 ftabCopy[256];
cannam@89 220 Int32 H, i, j, k, l, r, cc, cc1;
cannam@89 221 Int32 nNotDone;
cannam@89 222 Int32 nBhtab;
cannam@89 223 UChar* eclass8 = (UChar*)eclass;
cannam@89 224
cannam@89 225 /*--
cannam@89 226 Initial 1-char radix sort to generate
cannam@89 227 initial fmap and initial BH bits.
cannam@89 228 --*/
cannam@89 229 if (verb >= 4)
cannam@89 230 VPrintf0 ( " bucket sorting ...\n" );
cannam@89 231 for (i = 0; i < 257; i++) ftab[i] = 0;
cannam@89 232 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
cannam@89 233 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
cannam@89 234 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
cannam@89 235
cannam@89 236 for (i = 0; i < nblock; i++) {
cannam@89 237 j = eclass8[i];
cannam@89 238 k = ftab[j] - 1;
cannam@89 239 ftab[j] = k;
cannam@89 240 fmap[k] = i;
cannam@89 241 }
cannam@89 242
cannam@89 243 nBhtab = 2 + (nblock / 32);
cannam@89 244 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
cannam@89 245 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
cannam@89 246
cannam@89 247 /*--
cannam@89 248 Inductively refine the buckets. Kind-of an
cannam@89 249 "exponential radix sort" (!), inspired by the
cannam@89 250 Manber-Myers suffix array construction algorithm.
cannam@89 251 --*/
cannam@89 252
cannam@89 253 /*-- set sentinel bits for block-end detection --*/
cannam@89 254 for (i = 0; i < 32; i++) {
cannam@89 255 SET_BH(nblock + 2*i);
cannam@89 256 CLEAR_BH(nblock + 2*i + 1);
cannam@89 257 }
cannam@89 258
cannam@89 259 /*-- the log(N) loop --*/
cannam@89 260 H = 1;
cannam@89 261 while (1) {
cannam@89 262
cannam@89 263 if (verb >= 4)
cannam@89 264 VPrintf1 ( " depth %6d has ", H );
cannam@89 265
cannam@89 266 j = 0;
cannam@89 267 for (i = 0; i < nblock; i++) {
cannam@89 268 if (ISSET_BH(i)) j = i;
cannam@89 269 k = fmap[i] - H; if (k < 0) k += nblock;
cannam@89 270 eclass[k] = j;
cannam@89 271 }
cannam@89 272
cannam@89 273 nNotDone = 0;
cannam@89 274 r = -1;
cannam@89 275 while (1) {
cannam@89 276
cannam@89 277 /*-- find the next non-singleton bucket --*/
cannam@89 278 k = r + 1;
cannam@89 279 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
cannam@89 280 if (ISSET_BH(k)) {
cannam@89 281 while (WORD_BH(k) == 0xffffffff) k += 32;
cannam@89 282 while (ISSET_BH(k)) k++;
cannam@89 283 }
cannam@89 284 l = k - 1;
cannam@89 285 if (l >= nblock) break;
cannam@89 286 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
cannam@89 287 if (!ISSET_BH(k)) {
cannam@89 288 while (WORD_BH(k) == 0x00000000) k += 32;
cannam@89 289 while (!ISSET_BH(k)) k++;
cannam@89 290 }
cannam@89 291 r = k - 1;
cannam@89 292 if (r >= nblock) break;
cannam@89 293
cannam@89 294 /*-- now [l, r] bracket current bucket --*/
cannam@89 295 if (r > l) {
cannam@89 296 nNotDone += (r - l + 1);
cannam@89 297 fallbackQSort3 ( fmap, eclass, l, r );
cannam@89 298
cannam@89 299 /*-- scan bucket and generate header bits-- */
cannam@89 300 cc = -1;
cannam@89 301 for (i = l; i <= r; i++) {
cannam@89 302 cc1 = eclass[fmap[i]];
cannam@89 303 if (cc != cc1) { SET_BH(i); cc = cc1; };
cannam@89 304 }
cannam@89 305 }
cannam@89 306 }
cannam@89 307
cannam@89 308 if (verb >= 4)
cannam@89 309 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
cannam@89 310
cannam@89 311 H *= 2;
cannam@89 312 if (H > nblock || nNotDone == 0) break;
cannam@89 313 }
cannam@89 314
cannam@89 315 /*--
cannam@89 316 Reconstruct the original block in
cannam@89 317 eclass8 [0 .. nblock-1], since the
cannam@89 318 previous phase destroyed it.
cannam@89 319 --*/
cannam@89 320 if (verb >= 4)
cannam@89 321 VPrintf0 ( " reconstructing block ...\n" );
cannam@89 322 j = 0;
cannam@89 323 for (i = 0; i < nblock; i++) {
cannam@89 324 while (ftabCopy[j] == 0) j++;
cannam@89 325 ftabCopy[j]--;
cannam@89 326 eclass8[fmap[i]] = (UChar)j;
cannam@89 327 }
cannam@89 328 AssertH ( j < 256, 1005 );
cannam@89 329 }
cannam@89 330
cannam@89 331 #undef SET_BH
cannam@89 332 #undef CLEAR_BH
cannam@89 333 #undef ISSET_BH
cannam@89 334 #undef WORD_BH
cannam@89 335 #undef UNALIGNED_BH
cannam@89 336
cannam@89 337
cannam@89 338 /*---------------------------------------------*/
cannam@89 339 /*--- The main, O(N^2 log(N)) sorting ---*/
cannam@89 340 /*--- algorithm. Faster for "normal" ---*/
cannam@89 341 /*--- non-repetitive blocks. ---*/
cannam@89 342 /*---------------------------------------------*/
cannam@89 343
cannam@89 344 /*---------------------------------------------*/
cannam@89 345 static
cannam@89 346 __inline__
cannam@89 347 Bool mainGtU ( UInt32 i1,
cannam@89 348 UInt32 i2,
cannam@89 349 UChar* block,
cannam@89 350 UInt16* quadrant,
cannam@89 351 UInt32 nblock,
cannam@89 352 Int32* budget )
cannam@89 353 {
cannam@89 354 Int32 k;
cannam@89 355 UChar c1, c2;
cannam@89 356 UInt16 s1, s2;
cannam@89 357
cannam@89 358 AssertD ( i1 != i2, "mainGtU" );
cannam@89 359 /* 1 */
cannam@89 360 c1 = block[i1]; c2 = block[i2];
cannam@89 361 if (c1 != c2) return (c1 > c2);
cannam@89 362 i1++; i2++;
cannam@89 363 /* 2 */
cannam@89 364 c1 = block[i1]; c2 = block[i2];
cannam@89 365 if (c1 != c2) return (c1 > c2);
cannam@89 366 i1++; i2++;
cannam@89 367 /* 3 */
cannam@89 368 c1 = block[i1]; c2 = block[i2];
cannam@89 369 if (c1 != c2) return (c1 > c2);
cannam@89 370 i1++; i2++;
cannam@89 371 /* 4 */
cannam@89 372 c1 = block[i1]; c2 = block[i2];
cannam@89 373 if (c1 != c2) return (c1 > c2);
cannam@89 374 i1++; i2++;
cannam@89 375 /* 5 */
cannam@89 376 c1 = block[i1]; c2 = block[i2];
cannam@89 377 if (c1 != c2) return (c1 > c2);
cannam@89 378 i1++; i2++;
cannam@89 379 /* 6 */
cannam@89 380 c1 = block[i1]; c2 = block[i2];
cannam@89 381 if (c1 != c2) return (c1 > c2);
cannam@89 382 i1++; i2++;
cannam@89 383 /* 7 */
cannam@89 384 c1 = block[i1]; c2 = block[i2];
cannam@89 385 if (c1 != c2) return (c1 > c2);
cannam@89 386 i1++; i2++;
cannam@89 387 /* 8 */
cannam@89 388 c1 = block[i1]; c2 = block[i2];
cannam@89 389 if (c1 != c2) return (c1 > c2);
cannam@89 390 i1++; i2++;
cannam@89 391 /* 9 */
cannam@89 392 c1 = block[i1]; c2 = block[i2];
cannam@89 393 if (c1 != c2) return (c1 > c2);
cannam@89 394 i1++; i2++;
cannam@89 395 /* 10 */
cannam@89 396 c1 = block[i1]; c2 = block[i2];
cannam@89 397 if (c1 != c2) return (c1 > c2);
cannam@89 398 i1++; i2++;
cannam@89 399 /* 11 */
cannam@89 400 c1 = block[i1]; c2 = block[i2];
cannam@89 401 if (c1 != c2) return (c1 > c2);
cannam@89 402 i1++; i2++;
cannam@89 403 /* 12 */
cannam@89 404 c1 = block[i1]; c2 = block[i2];
cannam@89 405 if (c1 != c2) return (c1 > c2);
cannam@89 406 i1++; i2++;
cannam@89 407
cannam@89 408 k = nblock + 8;
cannam@89 409
cannam@89 410 do {
cannam@89 411 /* 1 */
cannam@89 412 c1 = block[i1]; c2 = block[i2];
cannam@89 413 if (c1 != c2) return (c1 > c2);
cannam@89 414 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 415 if (s1 != s2) return (s1 > s2);
cannam@89 416 i1++; i2++;
cannam@89 417 /* 2 */
cannam@89 418 c1 = block[i1]; c2 = block[i2];
cannam@89 419 if (c1 != c2) return (c1 > c2);
cannam@89 420 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 421 if (s1 != s2) return (s1 > s2);
cannam@89 422 i1++; i2++;
cannam@89 423 /* 3 */
cannam@89 424 c1 = block[i1]; c2 = block[i2];
cannam@89 425 if (c1 != c2) return (c1 > c2);
cannam@89 426 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 427 if (s1 != s2) return (s1 > s2);
cannam@89 428 i1++; i2++;
cannam@89 429 /* 4 */
cannam@89 430 c1 = block[i1]; c2 = block[i2];
cannam@89 431 if (c1 != c2) return (c1 > c2);
cannam@89 432 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 433 if (s1 != s2) return (s1 > s2);
cannam@89 434 i1++; i2++;
cannam@89 435 /* 5 */
cannam@89 436 c1 = block[i1]; c2 = block[i2];
cannam@89 437 if (c1 != c2) return (c1 > c2);
cannam@89 438 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 439 if (s1 != s2) return (s1 > s2);
cannam@89 440 i1++; i2++;
cannam@89 441 /* 6 */
cannam@89 442 c1 = block[i1]; c2 = block[i2];
cannam@89 443 if (c1 != c2) return (c1 > c2);
cannam@89 444 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 445 if (s1 != s2) return (s1 > s2);
cannam@89 446 i1++; i2++;
cannam@89 447 /* 7 */
cannam@89 448 c1 = block[i1]; c2 = block[i2];
cannam@89 449 if (c1 != c2) return (c1 > c2);
cannam@89 450 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 451 if (s1 != s2) return (s1 > s2);
cannam@89 452 i1++; i2++;
cannam@89 453 /* 8 */
cannam@89 454 c1 = block[i1]; c2 = block[i2];
cannam@89 455 if (c1 != c2) return (c1 > c2);
cannam@89 456 s1 = quadrant[i1]; s2 = quadrant[i2];
cannam@89 457 if (s1 != s2) return (s1 > s2);
cannam@89 458 i1++; i2++;
cannam@89 459
cannam@89 460 if (i1 >= nblock) i1 -= nblock;
cannam@89 461 if (i2 >= nblock) i2 -= nblock;
cannam@89 462
cannam@89 463 k -= 8;
cannam@89 464 (*budget)--;
cannam@89 465 }
cannam@89 466 while (k >= 0);
cannam@89 467
cannam@89 468 return False;
cannam@89 469 }
cannam@89 470
cannam@89 471
cannam@89 472 /*---------------------------------------------*/
cannam@89 473 /*--
cannam@89 474 Knuth's increments seem to work better
cannam@89 475 than Incerpi-Sedgewick here. Possibly
cannam@89 476 because the number of elems to sort is
cannam@89 477 usually small, typically <= 20.
cannam@89 478 --*/
cannam@89 479 static
cannam@89 480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
cannam@89 481 9841, 29524, 88573, 265720,
cannam@89 482 797161, 2391484 };
cannam@89 483
cannam@89 484 static
cannam@89 485 void mainSimpleSort ( UInt32* ptr,
cannam@89 486 UChar* block,
cannam@89 487 UInt16* quadrant,
cannam@89 488 Int32 nblock,
cannam@89 489 Int32 lo,
cannam@89 490 Int32 hi,
cannam@89 491 Int32 d,
cannam@89 492 Int32* budget )
cannam@89 493 {
cannam@89 494 Int32 i, j, h, bigN, hp;
cannam@89 495 UInt32 v;
cannam@89 496
cannam@89 497 bigN = hi - lo + 1;
cannam@89 498 if (bigN < 2) return;
cannam@89 499
cannam@89 500 hp = 0;
cannam@89 501 while (incs[hp] < bigN) hp++;
cannam@89 502 hp--;
cannam@89 503
cannam@89 504 for (; hp >= 0; hp--) {
cannam@89 505 h = incs[hp];
cannam@89 506
cannam@89 507 i = lo + h;
cannam@89 508 while (True) {
cannam@89 509
cannam@89 510 /*-- copy 1 --*/
cannam@89 511 if (i > hi) break;
cannam@89 512 v = ptr[i];
cannam@89 513 j = i;
cannam@89 514 while ( mainGtU (
cannam@89 515 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
cannam@89 516 ) ) {
cannam@89 517 ptr[j] = ptr[j-h];
cannam@89 518 j = j - h;
cannam@89 519 if (j <= (lo + h - 1)) break;
cannam@89 520 }
cannam@89 521 ptr[j] = v;
cannam@89 522 i++;
cannam@89 523
cannam@89 524 /*-- copy 2 --*/
cannam@89 525 if (i > hi) break;
cannam@89 526 v = ptr[i];
cannam@89 527 j = i;
cannam@89 528 while ( mainGtU (
cannam@89 529 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
cannam@89 530 ) ) {
cannam@89 531 ptr[j] = ptr[j-h];
cannam@89 532 j = j - h;
cannam@89 533 if (j <= (lo + h - 1)) break;
cannam@89 534 }
cannam@89 535 ptr[j] = v;
cannam@89 536 i++;
cannam@89 537
cannam@89 538 /*-- copy 3 --*/
cannam@89 539 if (i > hi) break;
cannam@89 540 v = ptr[i];
cannam@89 541 j = i;
cannam@89 542 while ( mainGtU (
cannam@89 543 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
cannam@89 544 ) ) {
cannam@89 545 ptr[j] = ptr[j-h];
cannam@89 546 j = j - h;
cannam@89 547 if (j <= (lo + h - 1)) break;
cannam@89 548 }
cannam@89 549 ptr[j] = v;
cannam@89 550 i++;
cannam@89 551
cannam@89 552 if (*budget < 0) return;
cannam@89 553 }
cannam@89 554 }
cannam@89 555 }
cannam@89 556
cannam@89 557
cannam@89 558 /*---------------------------------------------*/
cannam@89 559 /*--
cannam@89 560 The following is an implementation of
cannam@89 561 an elegant 3-way quicksort for strings,
cannam@89 562 described in a paper "Fast Algorithms for
cannam@89 563 Sorting and Searching Strings", by Robert
cannam@89 564 Sedgewick and Jon L. Bentley.
cannam@89 565 --*/
cannam@89 566
cannam@89 567 #define mswap(zz1, zz2) \
cannam@89 568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
cannam@89 569
cannam@89 570 #define mvswap(zzp1, zzp2, zzn) \
cannam@89 571 { \
cannam@89 572 Int32 yyp1 = (zzp1); \
cannam@89 573 Int32 yyp2 = (zzp2); \
cannam@89 574 Int32 yyn = (zzn); \
cannam@89 575 while (yyn > 0) { \
cannam@89 576 mswap(ptr[yyp1], ptr[yyp2]); \
cannam@89 577 yyp1++; yyp2++; yyn--; \
cannam@89 578 } \
cannam@89 579 }
cannam@89 580
cannam@89 581 static
cannam@89 582 __inline__
cannam@89 583 UChar mmed3 ( UChar a, UChar b, UChar c )
cannam@89 584 {
cannam@89 585 UChar t;
cannam@89 586 if (a > b) { t = a; a = b; b = t; };
cannam@89 587 if (b > c) {
cannam@89 588 b = c;
cannam@89 589 if (a > b) b = a;
cannam@89 590 }
cannam@89 591 return b;
cannam@89 592 }
cannam@89 593
cannam@89 594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
cannam@89 595
cannam@89 596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
cannam@89 597 stackHi[sp] = hz; \
cannam@89 598 stackD [sp] = dz; \
cannam@89 599 sp++; }
cannam@89 600
cannam@89 601 #define mpop(lz,hz,dz) { sp--; \
cannam@89 602 lz = stackLo[sp]; \
cannam@89 603 hz = stackHi[sp]; \
cannam@89 604 dz = stackD [sp]; }
cannam@89 605
cannam@89 606
cannam@89 607 #define mnextsize(az) (nextHi[az]-nextLo[az])
cannam@89 608
cannam@89 609 #define mnextswap(az,bz) \
cannam@89 610 { Int32 tz; \
cannam@89 611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
cannam@89 612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
cannam@89 613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
cannam@89 614
cannam@89 615
cannam@89 616 #define MAIN_QSORT_SMALL_THRESH 20
cannam@89 617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
cannam@89 618 #define MAIN_QSORT_STACK_SIZE 100
cannam@89 619
cannam@89 620 static
cannam@89 621 void mainQSort3 ( UInt32* ptr,
cannam@89 622 UChar* block,
cannam@89 623 UInt16* quadrant,
cannam@89 624 Int32 nblock,
cannam@89 625 Int32 loSt,
cannam@89 626 Int32 hiSt,
cannam@89 627 Int32 dSt,
cannam@89 628 Int32* budget )
cannam@89 629 {
cannam@89 630 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
cannam@89 631 Int32 sp, lo, hi, d;
cannam@89 632
cannam@89 633 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
cannam@89 634 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
cannam@89 635 Int32 stackD [MAIN_QSORT_STACK_SIZE];
cannam@89 636
cannam@89 637 Int32 nextLo[3];
cannam@89 638 Int32 nextHi[3];
cannam@89 639 Int32 nextD [3];
cannam@89 640
cannam@89 641 sp = 0;
cannam@89 642 mpush ( loSt, hiSt, dSt );
cannam@89 643
cannam@89 644 while (sp > 0) {
cannam@89 645
cannam@89 646 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
cannam@89 647
cannam@89 648 mpop ( lo, hi, d );
cannam@89 649 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
cannam@89 650 d > MAIN_QSORT_DEPTH_THRESH) {
cannam@89 651 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
cannam@89 652 if (*budget < 0) return;
cannam@89 653 continue;
cannam@89 654 }
cannam@89 655
cannam@89 656 med = (Int32)
cannam@89 657 mmed3 ( block[ptr[ lo ]+d],
cannam@89 658 block[ptr[ hi ]+d],
cannam@89 659 block[ptr[ (lo+hi)>>1 ]+d] );
cannam@89 660
cannam@89 661 unLo = ltLo = lo;
cannam@89 662 unHi = gtHi = hi;
cannam@89 663
cannam@89 664 while (True) {
cannam@89 665 while (True) {
cannam@89 666 if (unLo > unHi) break;
cannam@89 667 n = ((Int32)block[ptr[unLo]+d]) - med;
cannam@89 668 if (n == 0) {
cannam@89 669 mswap(ptr[unLo], ptr[ltLo]);
cannam@89 670 ltLo++; unLo++; continue;
cannam@89 671 };
cannam@89 672 if (n > 0) break;
cannam@89 673 unLo++;
cannam@89 674 }
cannam@89 675 while (True) {
cannam@89 676 if (unLo > unHi) break;
cannam@89 677 n = ((Int32)block[ptr[unHi]+d]) - med;
cannam@89 678 if (n == 0) {
cannam@89 679 mswap(ptr[unHi], ptr[gtHi]);
cannam@89 680 gtHi--; unHi--; continue;
cannam@89 681 };
cannam@89 682 if (n < 0) break;
cannam@89 683 unHi--;
cannam@89 684 }
cannam@89 685 if (unLo > unHi) break;
cannam@89 686 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
cannam@89 687 }
cannam@89 688
cannam@89 689 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
cannam@89 690
cannam@89 691 if (gtHi < ltLo) {
cannam@89 692 mpush(lo, hi, d+1 );
cannam@89 693 continue;
cannam@89 694 }
cannam@89 695
cannam@89 696 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
cannam@89 697 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
cannam@89 698
cannam@89 699 n = lo + unLo - ltLo - 1;
cannam@89 700 m = hi - (gtHi - unHi) + 1;
cannam@89 701
cannam@89 702 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
cannam@89 703 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
cannam@89 704 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
cannam@89 705
cannam@89 706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
cannam@89 707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
cannam@89 708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
cannam@89 709
cannam@89 710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
cannam@89 711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
cannam@89 712
cannam@89 713 mpush (nextLo[0], nextHi[0], nextD[0]);
cannam@89 714 mpush (nextLo[1], nextHi[1], nextD[1]);
cannam@89 715 mpush (nextLo[2], nextHi[2], nextD[2]);
cannam@89 716 }
cannam@89 717 }
cannam@89 718
cannam@89 719 #undef mswap
cannam@89 720 #undef mvswap
cannam@89 721 #undef mpush
cannam@89 722 #undef mpop
cannam@89 723 #undef mmin
cannam@89 724 #undef mnextsize
cannam@89 725 #undef mnextswap
cannam@89 726 #undef MAIN_QSORT_SMALL_THRESH
cannam@89 727 #undef MAIN_QSORT_DEPTH_THRESH
cannam@89 728 #undef MAIN_QSORT_STACK_SIZE
cannam@89 729
cannam@89 730
cannam@89 731 /*---------------------------------------------*/
cannam@89 732 /* Pre:
cannam@89 733 nblock > N_OVERSHOOT
cannam@89 734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
cannam@89 735 ((UChar*)block32) [0 .. nblock-1] holds block
cannam@89 736 ptr exists for [0 .. nblock-1]
cannam@89 737
cannam@89 738 Post:
cannam@89 739 ((UChar*)block32) [0 .. nblock-1] holds block
cannam@89 740 All other areas of block32 destroyed
cannam@89 741 ftab [0 .. 65536 ] destroyed
cannam@89 742 ptr [0 .. nblock-1] holds sorted order
cannam@89 743 if (*budget < 0), sorting was abandoned
cannam@89 744 */
cannam@89 745
cannam@89 746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
cannam@89 747 #define SETMASK (1 << 21)
cannam@89 748 #define CLEARMASK (~(SETMASK))
cannam@89 749
cannam@89 750 static
cannam@89 751 void mainSort ( UInt32* ptr,
cannam@89 752 UChar* block,
cannam@89 753 UInt16* quadrant,
cannam@89 754 UInt32* ftab,
cannam@89 755 Int32 nblock,
cannam@89 756 Int32 verb,
cannam@89 757 Int32* budget )
cannam@89 758 {
cannam@89 759 Int32 i, j, k, ss, sb;
cannam@89 760 Int32 runningOrder[256];
cannam@89 761 Bool bigDone[256];
cannam@89 762 Int32 copyStart[256];
cannam@89 763 Int32 copyEnd [256];
cannam@89 764 UChar c1;
cannam@89 765 Int32 numQSorted;
cannam@89 766 UInt16 s;
cannam@89 767 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
cannam@89 768
cannam@89 769 /*-- set up the 2-byte frequency table --*/
cannam@89 770 for (i = 65536; i >= 0; i--) ftab[i] = 0;
cannam@89 771
cannam@89 772 j = block[0] << 8;
cannam@89 773 i = nblock-1;
cannam@89 774 for (; i >= 3; i -= 4) {
cannam@89 775 quadrant[i] = 0;
cannam@89 776 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
cannam@89 777 ftab[j]++;
cannam@89 778 quadrant[i-1] = 0;
cannam@89 779 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
cannam@89 780 ftab[j]++;
cannam@89 781 quadrant[i-2] = 0;
cannam@89 782 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
cannam@89 783 ftab[j]++;
cannam@89 784 quadrant[i-3] = 0;
cannam@89 785 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
cannam@89 786 ftab[j]++;
cannam@89 787 }
cannam@89 788 for (; i >= 0; i--) {
cannam@89 789 quadrant[i] = 0;
cannam@89 790 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
cannam@89 791 ftab[j]++;
cannam@89 792 }
cannam@89 793
cannam@89 794 /*-- (emphasises close relationship of block & quadrant) --*/
cannam@89 795 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
cannam@89 796 block [nblock+i] = block[i];
cannam@89 797 quadrant[nblock+i] = 0;
cannam@89 798 }
cannam@89 799
cannam@89 800 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
cannam@89 801
cannam@89 802 /*-- Complete the initial radix sort --*/
cannam@89 803 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
cannam@89 804
cannam@89 805 s = block[0] << 8;
cannam@89 806 i = nblock-1;
cannam@89 807 for (; i >= 3; i -= 4) {
cannam@89 808 s = (s >> 8) | (block[i] << 8);
cannam@89 809 j = ftab[s] -1;
cannam@89 810 ftab[s] = j;
cannam@89 811 ptr[j] = i;
cannam@89 812 s = (s >> 8) | (block[i-1] << 8);
cannam@89 813 j = ftab[s] -1;
cannam@89 814 ftab[s] = j;
cannam@89 815 ptr[j] = i-1;
cannam@89 816 s = (s >> 8) | (block[i-2] << 8);
cannam@89 817 j = ftab[s] -1;
cannam@89 818 ftab[s] = j;
cannam@89 819 ptr[j] = i-2;
cannam@89 820 s = (s >> 8) | (block[i-3] << 8);
cannam@89 821 j = ftab[s] -1;
cannam@89 822 ftab[s] = j;
cannam@89 823 ptr[j] = i-3;
cannam@89 824 }
cannam@89 825 for (; i >= 0; i--) {
cannam@89 826 s = (s >> 8) | (block[i] << 8);
cannam@89 827 j = ftab[s] -1;
cannam@89 828 ftab[s] = j;
cannam@89 829 ptr[j] = i;
cannam@89 830 }
cannam@89 831
cannam@89 832 /*--
cannam@89 833 Now ftab contains the first loc of every small bucket.
cannam@89 834 Calculate the running order, from smallest to largest
cannam@89 835 big bucket.
cannam@89 836 --*/
cannam@89 837 for (i = 0; i <= 255; i++) {
cannam@89 838 bigDone [i] = False;
cannam@89 839 runningOrder[i] = i;
cannam@89 840 }
cannam@89 841
cannam@89 842 {
cannam@89 843 Int32 vv;
cannam@89 844 Int32 h = 1;
cannam@89 845 do h = 3 * h + 1; while (h <= 256);
cannam@89 846 do {
cannam@89 847 h = h / 3;
cannam@89 848 for (i = h; i <= 255; i++) {
cannam@89 849 vv = runningOrder[i];
cannam@89 850 j = i;
cannam@89 851 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
cannam@89 852 runningOrder[j] = runningOrder[j-h];
cannam@89 853 j = j - h;
cannam@89 854 if (j <= (h - 1)) goto zero;
cannam@89 855 }
cannam@89 856 zero:
cannam@89 857 runningOrder[j] = vv;
cannam@89 858 }
cannam@89 859 } while (h != 1);
cannam@89 860 }
cannam@89 861
cannam@89 862 /*--
cannam@89 863 The main sorting loop.
cannam@89 864 --*/
cannam@89 865
cannam@89 866 numQSorted = 0;
cannam@89 867
cannam@89 868 for (i = 0; i <= 255; i++) {
cannam@89 869
cannam@89 870 /*--
cannam@89 871 Process big buckets, starting with the least full.
cannam@89 872 Basically this is a 3-step process in which we call
cannam@89 873 mainQSort3 to sort the small buckets [ss, j], but
cannam@89 874 also make a big effort to avoid the calls if we can.
cannam@89 875 --*/
cannam@89 876 ss = runningOrder[i];
cannam@89 877
cannam@89 878 /*--
cannam@89 879 Step 1:
cannam@89 880 Complete the big bucket [ss] by quicksorting
cannam@89 881 any unsorted small buckets [ss, j], for j != ss.
cannam@89 882 Hopefully previous pointer-scanning phases have already
cannam@89 883 completed many of the small buckets [ss, j], so
cannam@89 884 we don't have to sort them at all.
cannam@89 885 --*/
cannam@89 886 for (j = 0; j <= 255; j++) {
cannam@89 887 if (j != ss) {
cannam@89 888 sb = (ss << 8) + j;
cannam@89 889 if ( ! (ftab[sb] & SETMASK) ) {
cannam@89 890 Int32 lo = ftab[sb] & CLEARMASK;
cannam@89 891 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
cannam@89 892 if (hi > lo) {
cannam@89 893 if (verb >= 4)
cannam@89 894 VPrintf4 ( " qsort [0x%x, 0x%x] "
cannam@89 895 "done %d this %d\n",
cannam@89 896 ss, j, numQSorted, hi - lo + 1 );
cannam@89 897 mainQSort3 (
cannam@89 898 ptr, block, quadrant, nblock,
cannam@89 899 lo, hi, BZ_N_RADIX, budget
cannam@89 900 );
cannam@89 901 numQSorted += (hi - lo + 1);
cannam@89 902 if (*budget < 0) return;
cannam@89 903 }
cannam@89 904 }
cannam@89 905 ftab[sb] |= SETMASK;
cannam@89 906 }
cannam@89 907 }
cannam@89 908
cannam@89 909 AssertH ( !bigDone[ss], 1006 );
cannam@89 910
cannam@89 911 /*--
cannam@89 912 Step 2:
cannam@89 913 Now scan this big bucket [ss] so as to synthesise the
cannam@89 914 sorted order for small buckets [t, ss] for all t,
cannam@89 915 including, magically, the bucket [ss,ss] too.
cannam@89 916 This will avoid doing Real Work in subsequent Step 1's.
cannam@89 917 --*/
cannam@89 918 {
cannam@89 919 for (j = 0; j <= 255; j++) {
cannam@89 920 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
cannam@89 921 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
cannam@89 922 }
cannam@89 923 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
cannam@89 924 k = ptr[j]-1; if (k < 0) k += nblock;
cannam@89 925 c1 = block[k];
cannam@89 926 if (!bigDone[c1])
cannam@89 927 ptr[ copyStart[c1]++ ] = k;
cannam@89 928 }
cannam@89 929 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
cannam@89 930 k = ptr[j]-1; if (k < 0) k += nblock;
cannam@89 931 c1 = block[k];
cannam@89 932 if (!bigDone[c1])
cannam@89 933 ptr[ copyEnd[c1]-- ] = k;
cannam@89 934 }
cannam@89 935 }
cannam@89 936
cannam@89 937 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
cannam@89 938 ||
cannam@89 939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
cannam@89 940 Necessity for this case is demonstrated by compressing
cannam@89 941 a sequence of approximately 48.5 million of character
cannam@89 942 251; 1.0.0/1.0.1 will then die here. */
cannam@89 943 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
cannam@89 944 1007 )
cannam@89 945
cannam@89 946 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
cannam@89 947
cannam@89 948 /*--
cannam@89 949 Step 3:
cannam@89 950 The [ss] big bucket is now done. Record this fact,
cannam@89 951 and update the quadrant descriptors. Remember to
cannam@89 952 update quadrants in the overshoot area too, if
cannam@89 953 necessary. The "if (i < 255)" test merely skips
cannam@89 954 this updating for the last bucket processed, since
cannam@89 955 updating for the last bucket is pointless.
cannam@89 956
cannam@89 957 The quadrant array provides a way to incrementally
cannam@89 958 cache sort orderings, as they appear, so as to
cannam@89 959 make subsequent comparisons in fullGtU() complete
cannam@89 960 faster. For repetitive blocks this makes a big
cannam@89 961 difference (but not big enough to be able to avoid
cannam@89 962 the fallback sorting mechanism, exponential radix sort).
cannam@89 963
cannam@89 964 The precise meaning is: at all times:
cannam@89 965
cannam@89 966 for 0 <= i < nblock and 0 <= j <= nblock
cannam@89 967
cannam@89 968 if block[i] != block[j],
cannam@89 969
cannam@89 970 then the relative values of quadrant[i] and
cannam@89 971 quadrant[j] are meaningless.
cannam@89 972
cannam@89 973 else {
cannam@89 974 if quadrant[i] < quadrant[j]
cannam@89 975 then the string starting at i lexicographically
cannam@89 976 precedes the string starting at j
cannam@89 977
cannam@89 978 else if quadrant[i] > quadrant[j]
cannam@89 979 then the string starting at j lexicographically
cannam@89 980 precedes the string starting at i
cannam@89 981
cannam@89 982 else
cannam@89 983 the relative ordering of the strings starting
cannam@89 984 at i and j has not yet been determined.
cannam@89 985 }
cannam@89 986 --*/
cannam@89 987 bigDone[ss] = True;
cannam@89 988
cannam@89 989 if (i < 255) {
cannam@89 990 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
cannam@89 991 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
cannam@89 992 Int32 shifts = 0;
cannam@89 993
cannam@89 994 while ((bbSize >> shifts) > 65534) shifts++;
cannam@89 995
cannam@89 996 for (j = bbSize-1; j >= 0; j--) {
cannam@89 997 Int32 a2update = ptr[bbStart + j];
cannam@89 998 UInt16 qVal = (UInt16)(j >> shifts);
cannam@89 999 quadrant[a2update] = qVal;
cannam@89 1000 if (a2update < BZ_N_OVERSHOOT)
cannam@89 1001 quadrant[a2update + nblock] = qVal;
cannam@89 1002 }
cannam@89 1003 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
cannam@89 1004 }
cannam@89 1005
cannam@89 1006 }
cannam@89 1007
cannam@89 1008 if (verb >= 4)
cannam@89 1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
cannam@89 1010 nblock, numQSorted, nblock - numQSorted );
cannam@89 1011 }
cannam@89 1012
cannam@89 1013 #undef BIGFREQ
cannam@89 1014 #undef SETMASK
cannam@89 1015 #undef CLEARMASK
cannam@89 1016
cannam@89 1017
cannam@89 1018 /*---------------------------------------------*/
cannam@89 1019 /* Pre:
cannam@89 1020 nblock > 0
cannam@89 1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
cannam@89 1022 ((UChar*)arr2) [0 .. nblock-1] holds block
cannam@89 1023 arr1 exists for [0 .. nblock-1]
cannam@89 1024
cannam@89 1025 Post:
cannam@89 1026 ((UChar*)arr2) [0 .. nblock-1] holds block
cannam@89 1027 All other areas of block destroyed
cannam@89 1028 ftab [ 0 .. 65536 ] destroyed
cannam@89 1029 arr1 [0 .. nblock-1] holds sorted order
cannam@89 1030 */
cannam@89 1031 void BZ2_blockSort ( EState* s )
cannam@89 1032 {
cannam@89 1033 UInt32* ptr = s->ptr;
cannam@89 1034 UChar* block = s->block;
cannam@89 1035 UInt32* ftab = s->ftab;
cannam@89 1036 Int32 nblock = s->nblock;
cannam@89 1037 Int32 verb = s->verbosity;
cannam@89 1038 Int32 wfact = s->workFactor;
cannam@89 1039 UInt16* quadrant;
cannam@89 1040 Int32 budget;
cannam@89 1041 Int32 budgetInit;
cannam@89 1042 Int32 i;
cannam@89 1043
cannam@89 1044 if (nblock < 10000) {
cannam@89 1045 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
cannam@89 1046 } else {
cannam@89 1047 /* Calculate the location for quadrant, remembering to get
cannam@89 1048 the alignment right. Assumes that &(block[0]) is at least
cannam@89 1049 2-byte aligned -- this should be ok since block is really
cannam@89 1050 the first section of arr2.
cannam@89 1051 */
cannam@89 1052 i = nblock+BZ_N_OVERSHOOT;
cannam@89 1053 if (i & 1) i++;
cannam@89 1054 quadrant = (UInt16*)(&(block[i]));
cannam@89 1055
cannam@89 1056 /* (wfact-1) / 3 puts the default-factor-30
cannam@89 1057 transition point at very roughly the same place as
cannam@89 1058 with v0.1 and v0.9.0.
cannam@89 1059 Not that it particularly matters any more, since the
cannam@89 1060 resulting compressed stream is now the same regardless
cannam@89 1061 of whether or not we use the main sort or fallback sort.
cannam@89 1062 */
cannam@89 1063 if (wfact < 1 ) wfact = 1;
cannam@89 1064 if (wfact > 100) wfact = 100;
cannam@89 1065 budgetInit = nblock * ((wfact-1) / 3);
cannam@89 1066 budget = budgetInit;
cannam@89 1067
cannam@89 1068 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
cannam@89 1069 if (verb >= 3)
cannam@89 1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
cannam@89 1071 budgetInit - budget,
cannam@89 1072 nblock,
cannam@89 1073 (float)(budgetInit - budget) /
cannam@89 1074 (float)(nblock==0 ? 1 : nblock) );
cannam@89 1075 if (budget < 0) {
cannam@89 1076 if (verb >= 2)
cannam@89 1077 VPrintf0 ( " too repetitive; using fallback"
cannam@89 1078 " sorting algorithm\n" );
cannam@89 1079 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
cannam@89 1080 }
cannam@89 1081 }
cannam@89 1082
cannam@89 1083 s->origPtr = -1;
cannam@89 1084 for (i = 0; i < s->nblock; i++)
cannam@89 1085 if (ptr[i] == 0)
cannam@89 1086 { s->origPtr = i; break; };
cannam@89 1087
cannam@89 1088 AssertH( s->origPtr != -1, 1003 );
cannam@89 1089 }
cannam@89 1090
cannam@89 1091
cannam@89 1092 /*-------------------------------------------------------------*/
cannam@89 1093 /*--- end blocksort.c ---*/
cannam@89 1094 /*-------------------------------------------------------------*/