annotate src/bzip2-1.0.6/blocksort.c @ 83:ae30d91d2ffe

Replace these with versions built using an older toolset (so as to avoid ABI compatibilities when linking on Ubuntu 14.04 for packaging purposes)
author Chris Cannam
date Fri, 07 Feb 2020 11:51:13 +0000
parents e13257ea84a4
children
rev   line source
Chris@4 1
Chris@4 2 /*-------------------------------------------------------------*/
Chris@4 3 /*--- Block sorting machinery ---*/
Chris@4 4 /*--- blocksort.c ---*/
Chris@4 5 /*-------------------------------------------------------------*/
Chris@4 6
Chris@4 7 /* ------------------------------------------------------------------
Chris@4 8 This file is part of bzip2/libbzip2, a program and library for
Chris@4 9 lossless, block-sorting data compression.
Chris@4 10
Chris@4 11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
Chris@4 12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
Chris@4 13
Chris@4 14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
Chris@4 15 README file.
Chris@4 16
Chris@4 17 This program is released under the terms of the license contained
Chris@4 18 in the file LICENSE.
Chris@4 19 ------------------------------------------------------------------ */
Chris@4 20
Chris@4 21
Chris@4 22 #include "bzlib_private.h"
Chris@4 23
Chris@4 24 /*---------------------------------------------*/
Chris@4 25 /*--- Fallback O(N log(N)^2) sorting ---*/
Chris@4 26 /*--- algorithm, for repetitive blocks ---*/
Chris@4 27 /*---------------------------------------------*/
Chris@4 28
Chris@4 29 /*---------------------------------------------*/
Chris@4 30 static
Chris@4 31 __inline__
Chris@4 32 void fallbackSimpleSort ( UInt32* fmap,
Chris@4 33 UInt32* eclass,
Chris@4 34 Int32 lo,
Chris@4 35 Int32 hi )
Chris@4 36 {
Chris@4 37 Int32 i, j, tmp;
Chris@4 38 UInt32 ec_tmp;
Chris@4 39
Chris@4 40 if (lo == hi) return;
Chris@4 41
Chris@4 42 if (hi - lo > 3) {
Chris@4 43 for ( i = hi-4; i >= lo; i-- ) {
Chris@4 44 tmp = fmap[i];
Chris@4 45 ec_tmp = eclass[tmp];
Chris@4 46 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
Chris@4 47 fmap[j-4] = fmap[j];
Chris@4 48 fmap[j-4] = tmp;
Chris@4 49 }
Chris@4 50 }
Chris@4 51
Chris@4 52 for ( i = hi-1; i >= lo; i-- ) {
Chris@4 53 tmp = fmap[i];
Chris@4 54 ec_tmp = eclass[tmp];
Chris@4 55 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
Chris@4 56 fmap[j-1] = fmap[j];
Chris@4 57 fmap[j-1] = tmp;
Chris@4 58 }
Chris@4 59 }
Chris@4 60
Chris@4 61
Chris@4 62 /*---------------------------------------------*/
Chris@4 63 #define fswap(zz1, zz2) \
Chris@4 64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
Chris@4 65
Chris@4 66 #define fvswap(zzp1, zzp2, zzn) \
Chris@4 67 { \
Chris@4 68 Int32 yyp1 = (zzp1); \
Chris@4 69 Int32 yyp2 = (zzp2); \
Chris@4 70 Int32 yyn = (zzn); \
Chris@4 71 while (yyn > 0) { \
Chris@4 72 fswap(fmap[yyp1], fmap[yyp2]); \
Chris@4 73 yyp1++; yyp2++; yyn--; \
Chris@4 74 } \
Chris@4 75 }
Chris@4 76
Chris@4 77
Chris@4 78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
Chris@4 79
Chris@4 80 #define fpush(lz,hz) { stackLo[sp] = lz; \
Chris@4 81 stackHi[sp] = hz; \
Chris@4 82 sp++; }
Chris@4 83
Chris@4 84 #define fpop(lz,hz) { sp--; \
Chris@4 85 lz = stackLo[sp]; \
Chris@4 86 hz = stackHi[sp]; }
Chris@4 87
Chris@4 88 #define FALLBACK_QSORT_SMALL_THRESH 10
Chris@4 89 #define FALLBACK_QSORT_STACK_SIZE 100
Chris@4 90
Chris@4 91
Chris@4 92 static
Chris@4 93 void fallbackQSort3 ( UInt32* fmap,
Chris@4 94 UInt32* eclass,
Chris@4 95 Int32 loSt,
Chris@4 96 Int32 hiSt )
Chris@4 97 {
Chris@4 98 Int32 unLo, unHi, ltLo, gtHi, n, m;
Chris@4 99 Int32 sp, lo, hi;
Chris@4 100 UInt32 med, r, r3;
Chris@4 101 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
Chris@4 102 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
Chris@4 103
Chris@4 104 r = 0;
Chris@4 105
Chris@4 106 sp = 0;
Chris@4 107 fpush ( loSt, hiSt );
Chris@4 108
Chris@4 109 while (sp > 0) {
Chris@4 110
Chris@4 111 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
Chris@4 112
Chris@4 113 fpop ( lo, hi );
Chris@4 114 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
Chris@4 115 fallbackSimpleSort ( fmap, eclass, lo, hi );
Chris@4 116 continue;
Chris@4 117 }
Chris@4 118
Chris@4 119 /* Random partitioning. Median of 3 sometimes fails to
Chris@4 120 avoid bad cases. Median of 9 seems to help but
Chris@4 121 looks rather expensive. This too seems to work but
Chris@4 122 is cheaper. Guidance for the magic constants
Chris@4 123 7621 and 32768 is taken from Sedgewick's algorithms
Chris@4 124 book, chapter 35.
Chris@4 125 */
Chris@4 126 r = ((r * 7621) + 1) % 32768;
Chris@4 127 r3 = r % 3;
Chris@4 128 if (r3 == 0) med = eclass[fmap[lo]]; else
Chris@4 129 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
Chris@4 130 med = eclass[fmap[hi]];
Chris@4 131
Chris@4 132 unLo = ltLo = lo;
Chris@4 133 unHi = gtHi = hi;
Chris@4 134
Chris@4 135 while (1) {
Chris@4 136 while (1) {
Chris@4 137 if (unLo > unHi) break;
Chris@4 138 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
Chris@4 139 if (n == 0) {
Chris@4 140 fswap(fmap[unLo], fmap[ltLo]);
Chris@4 141 ltLo++; unLo++;
Chris@4 142 continue;
Chris@4 143 };
Chris@4 144 if (n > 0) break;
Chris@4 145 unLo++;
Chris@4 146 }
Chris@4 147 while (1) {
Chris@4 148 if (unLo > unHi) break;
Chris@4 149 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
Chris@4 150 if (n == 0) {
Chris@4 151 fswap(fmap[unHi], fmap[gtHi]);
Chris@4 152 gtHi--; unHi--;
Chris@4 153 continue;
Chris@4 154 };
Chris@4 155 if (n < 0) break;
Chris@4 156 unHi--;
Chris@4 157 }
Chris@4 158 if (unLo > unHi) break;
Chris@4 159 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
Chris@4 160 }
Chris@4 161
Chris@4 162 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
Chris@4 163
Chris@4 164 if (gtHi < ltLo) continue;
Chris@4 165
Chris@4 166 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
Chris@4 167 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
Chris@4 168
Chris@4 169 n = lo + unLo - ltLo - 1;
Chris@4 170 m = hi - (gtHi - unHi) + 1;
Chris@4 171
Chris@4 172 if (n - lo > hi - m) {
Chris@4 173 fpush ( lo, n );
Chris@4 174 fpush ( m, hi );
Chris@4 175 } else {
Chris@4 176 fpush ( m, hi );
Chris@4 177 fpush ( lo, n );
Chris@4 178 }
Chris@4 179 }
Chris@4 180 }
Chris@4 181
Chris@4 182 #undef fmin
Chris@4 183 #undef fpush
Chris@4 184 #undef fpop
Chris@4 185 #undef fswap
Chris@4 186 #undef fvswap
Chris@4 187 #undef FALLBACK_QSORT_SMALL_THRESH
Chris@4 188 #undef FALLBACK_QSORT_STACK_SIZE
Chris@4 189
Chris@4 190
Chris@4 191 /*---------------------------------------------*/
Chris@4 192 /* Pre:
Chris@4 193 nblock > 0
Chris@4 194 eclass exists for [0 .. nblock-1]
Chris@4 195 ((UChar*)eclass) [0 .. nblock-1] holds block
Chris@4 196 ptr exists for [0 .. nblock-1]
Chris@4 197
Chris@4 198 Post:
Chris@4 199 ((UChar*)eclass) [0 .. nblock-1] holds block
Chris@4 200 All other areas of eclass destroyed
Chris@4 201 fmap [0 .. nblock-1] holds sorted order
Chris@4 202 bhtab [ 0 .. 2+(nblock/32) ] destroyed
Chris@4 203 */
Chris@4 204
Chris@4 205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
Chris@4 206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
Chris@4 207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
Chris@4 208 #define WORD_BH(zz) bhtab[(zz) >> 5]
Chris@4 209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
Chris@4 210
Chris@4 211 static
Chris@4 212 void fallbackSort ( UInt32* fmap,
Chris@4 213 UInt32* eclass,
Chris@4 214 UInt32* bhtab,
Chris@4 215 Int32 nblock,
Chris@4 216 Int32 verb )
Chris@4 217 {
Chris@4 218 Int32 ftab[257];
Chris@4 219 Int32 ftabCopy[256];
Chris@4 220 Int32 H, i, j, k, l, r, cc, cc1;
Chris@4 221 Int32 nNotDone;
Chris@4 222 Int32 nBhtab;
Chris@4 223 UChar* eclass8 = (UChar*)eclass;
Chris@4 224
Chris@4 225 /*--
Chris@4 226 Initial 1-char radix sort to generate
Chris@4 227 initial fmap and initial BH bits.
Chris@4 228 --*/
Chris@4 229 if (verb >= 4)
Chris@4 230 VPrintf0 ( " bucket sorting ...\n" );
Chris@4 231 for (i = 0; i < 257; i++) ftab[i] = 0;
Chris@4 232 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
Chris@4 233 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
Chris@4 234 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
Chris@4 235
Chris@4 236 for (i = 0; i < nblock; i++) {
Chris@4 237 j = eclass8[i];
Chris@4 238 k = ftab[j] - 1;
Chris@4 239 ftab[j] = k;
Chris@4 240 fmap[k] = i;
Chris@4 241 }
Chris@4 242
Chris@4 243 nBhtab = 2 + (nblock / 32);
Chris@4 244 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
Chris@4 245 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
Chris@4 246
Chris@4 247 /*--
Chris@4 248 Inductively refine the buckets. Kind-of an
Chris@4 249 "exponential radix sort" (!), inspired by the
Chris@4 250 Manber-Myers suffix array construction algorithm.
Chris@4 251 --*/
Chris@4 252
Chris@4 253 /*-- set sentinel bits for block-end detection --*/
Chris@4 254 for (i = 0; i < 32; i++) {
Chris@4 255 SET_BH(nblock + 2*i);
Chris@4 256 CLEAR_BH(nblock + 2*i + 1);
Chris@4 257 }
Chris@4 258
Chris@4 259 /*-- the log(N) loop --*/
Chris@4 260 H = 1;
Chris@4 261 while (1) {
Chris@4 262
Chris@4 263 if (verb >= 4)
Chris@4 264 VPrintf1 ( " depth %6d has ", H );
Chris@4 265
Chris@4 266 j = 0;
Chris@4 267 for (i = 0; i < nblock; i++) {
Chris@4 268 if (ISSET_BH(i)) j = i;
Chris@4 269 k = fmap[i] - H; if (k < 0) k += nblock;
Chris@4 270 eclass[k] = j;
Chris@4 271 }
Chris@4 272
Chris@4 273 nNotDone = 0;
Chris@4 274 r = -1;
Chris@4 275 while (1) {
Chris@4 276
Chris@4 277 /*-- find the next non-singleton bucket --*/
Chris@4 278 k = r + 1;
Chris@4 279 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
Chris@4 280 if (ISSET_BH(k)) {
Chris@4 281 while (WORD_BH(k) == 0xffffffff) k += 32;
Chris@4 282 while (ISSET_BH(k)) k++;
Chris@4 283 }
Chris@4 284 l = k - 1;
Chris@4 285 if (l >= nblock) break;
Chris@4 286 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
Chris@4 287 if (!ISSET_BH(k)) {
Chris@4 288 while (WORD_BH(k) == 0x00000000) k += 32;
Chris@4 289 while (!ISSET_BH(k)) k++;
Chris@4 290 }
Chris@4 291 r = k - 1;
Chris@4 292 if (r >= nblock) break;
Chris@4 293
Chris@4 294 /*-- now [l, r] bracket current bucket --*/
Chris@4 295 if (r > l) {
Chris@4 296 nNotDone += (r - l + 1);
Chris@4 297 fallbackQSort3 ( fmap, eclass, l, r );
Chris@4 298
Chris@4 299 /*-- scan bucket and generate header bits-- */
Chris@4 300 cc = -1;
Chris@4 301 for (i = l; i <= r; i++) {
Chris@4 302 cc1 = eclass[fmap[i]];
Chris@4 303 if (cc != cc1) { SET_BH(i); cc = cc1; };
Chris@4 304 }
Chris@4 305 }
Chris@4 306 }
Chris@4 307
Chris@4 308 if (verb >= 4)
Chris@4 309 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
Chris@4 310
Chris@4 311 H *= 2;
Chris@4 312 if (H > nblock || nNotDone == 0) break;
Chris@4 313 }
Chris@4 314
Chris@4 315 /*--
Chris@4 316 Reconstruct the original block in
Chris@4 317 eclass8 [0 .. nblock-1], since the
Chris@4 318 previous phase destroyed it.
Chris@4 319 --*/
Chris@4 320 if (verb >= 4)
Chris@4 321 VPrintf0 ( " reconstructing block ...\n" );
Chris@4 322 j = 0;
Chris@4 323 for (i = 0; i < nblock; i++) {
Chris@4 324 while (ftabCopy[j] == 0) j++;
Chris@4 325 ftabCopy[j]--;
Chris@4 326 eclass8[fmap[i]] = (UChar)j;
Chris@4 327 }
Chris@4 328 AssertH ( j < 256, 1005 );
Chris@4 329 }
Chris@4 330
Chris@4 331 #undef SET_BH
Chris@4 332 #undef CLEAR_BH
Chris@4 333 #undef ISSET_BH
Chris@4 334 #undef WORD_BH
Chris@4 335 #undef UNALIGNED_BH
Chris@4 336
Chris@4 337
Chris@4 338 /*---------------------------------------------*/
Chris@4 339 /*--- The main, O(N^2 log(N)) sorting ---*/
Chris@4 340 /*--- algorithm. Faster for "normal" ---*/
Chris@4 341 /*--- non-repetitive blocks. ---*/
Chris@4 342 /*---------------------------------------------*/
Chris@4 343
Chris@4 344 /*---------------------------------------------*/
Chris@4 345 static
Chris@4 346 __inline__
Chris@4 347 Bool mainGtU ( UInt32 i1,
Chris@4 348 UInt32 i2,
Chris@4 349 UChar* block,
Chris@4 350 UInt16* quadrant,
Chris@4 351 UInt32 nblock,
Chris@4 352 Int32* budget )
Chris@4 353 {
Chris@4 354 Int32 k;
Chris@4 355 UChar c1, c2;
Chris@4 356 UInt16 s1, s2;
Chris@4 357
Chris@4 358 AssertD ( i1 != i2, "mainGtU" );
Chris@4 359 /* 1 */
Chris@4 360 c1 = block[i1]; c2 = block[i2];
Chris@4 361 if (c1 != c2) return (c1 > c2);
Chris@4 362 i1++; i2++;
Chris@4 363 /* 2 */
Chris@4 364 c1 = block[i1]; c2 = block[i2];
Chris@4 365 if (c1 != c2) return (c1 > c2);
Chris@4 366 i1++; i2++;
Chris@4 367 /* 3 */
Chris@4 368 c1 = block[i1]; c2 = block[i2];
Chris@4 369 if (c1 != c2) return (c1 > c2);
Chris@4 370 i1++; i2++;
Chris@4 371 /* 4 */
Chris@4 372 c1 = block[i1]; c2 = block[i2];
Chris@4 373 if (c1 != c2) return (c1 > c2);
Chris@4 374 i1++; i2++;
Chris@4 375 /* 5 */
Chris@4 376 c1 = block[i1]; c2 = block[i2];
Chris@4 377 if (c1 != c2) return (c1 > c2);
Chris@4 378 i1++; i2++;
Chris@4 379 /* 6 */
Chris@4 380 c1 = block[i1]; c2 = block[i2];
Chris@4 381 if (c1 != c2) return (c1 > c2);
Chris@4 382 i1++; i2++;
Chris@4 383 /* 7 */
Chris@4 384 c1 = block[i1]; c2 = block[i2];
Chris@4 385 if (c1 != c2) return (c1 > c2);
Chris@4 386 i1++; i2++;
Chris@4 387 /* 8 */
Chris@4 388 c1 = block[i1]; c2 = block[i2];
Chris@4 389 if (c1 != c2) return (c1 > c2);
Chris@4 390 i1++; i2++;
Chris@4 391 /* 9 */
Chris@4 392 c1 = block[i1]; c2 = block[i2];
Chris@4 393 if (c1 != c2) return (c1 > c2);
Chris@4 394 i1++; i2++;
Chris@4 395 /* 10 */
Chris@4 396 c1 = block[i1]; c2 = block[i2];
Chris@4 397 if (c1 != c2) return (c1 > c2);
Chris@4 398 i1++; i2++;
Chris@4 399 /* 11 */
Chris@4 400 c1 = block[i1]; c2 = block[i2];
Chris@4 401 if (c1 != c2) return (c1 > c2);
Chris@4 402 i1++; i2++;
Chris@4 403 /* 12 */
Chris@4 404 c1 = block[i1]; c2 = block[i2];
Chris@4 405 if (c1 != c2) return (c1 > c2);
Chris@4 406 i1++; i2++;
Chris@4 407
Chris@4 408 k = nblock + 8;
Chris@4 409
Chris@4 410 do {
Chris@4 411 /* 1 */
Chris@4 412 c1 = block[i1]; c2 = block[i2];
Chris@4 413 if (c1 != c2) return (c1 > c2);
Chris@4 414 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 415 if (s1 != s2) return (s1 > s2);
Chris@4 416 i1++; i2++;
Chris@4 417 /* 2 */
Chris@4 418 c1 = block[i1]; c2 = block[i2];
Chris@4 419 if (c1 != c2) return (c1 > c2);
Chris@4 420 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 421 if (s1 != s2) return (s1 > s2);
Chris@4 422 i1++; i2++;
Chris@4 423 /* 3 */
Chris@4 424 c1 = block[i1]; c2 = block[i2];
Chris@4 425 if (c1 != c2) return (c1 > c2);
Chris@4 426 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 427 if (s1 != s2) return (s1 > s2);
Chris@4 428 i1++; i2++;
Chris@4 429 /* 4 */
Chris@4 430 c1 = block[i1]; c2 = block[i2];
Chris@4 431 if (c1 != c2) return (c1 > c2);
Chris@4 432 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 433 if (s1 != s2) return (s1 > s2);
Chris@4 434 i1++; i2++;
Chris@4 435 /* 5 */
Chris@4 436 c1 = block[i1]; c2 = block[i2];
Chris@4 437 if (c1 != c2) return (c1 > c2);
Chris@4 438 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 439 if (s1 != s2) return (s1 > s2);
Chris@4 440 i1++; i2++;
Chris@4 441 /* 6 */
Chris@4 442 c1 = block[i1]; c2 = block[i2];
Chris@4 443 if (c1 != c2) return (c1 > c2);
Chris@4 444 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 445 if (s1 != s2) return (s1 > s2);
Chris@4 446 i1++; i2++;
Chris@4 447 /* 7 */
Chris@4 448 c1 = block[i1]; c2 = block[i2];
Chris@4 449 if (c1 != c2) return (c1 > c2);
Chris@4 450 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 451 if (s1 != s2) return (s1 > s2);
Chris@4 452 i1++; i2++;
Chris@4 453 /* 8 */
Chris@4 454 c1 = block[i1]; c2 = block[i2];
Chris@4 455 if (c1 != c2) return (c1 > c2);
Chris@4 456 s1 = quadrant[i1]; s2 = quadrant[i2];
Chris@4 457 if (s1 != s2) return (s1 > s2);
Chris@4 458 i1++; i2++;
Chris@4 459
Chris@4 460 if (i1 >= nblock) i1 -= nblock;
Chris@4 461 if (i2 >= nblock) i2 -= nblock;
Chris@4 462
Chris@4 463 k -= 8;
Chris@4 464 (*budget)--;
Chris@4 465 }
Chris@4 466 while (k >= 0);
Chris@4 467
Chris@4 468 return False;
Chris@4 469 }
Chris@4 470
Chris@4 471
Chris@4 472 /*---------------------------------------------*/
Chris@4 473 /*--
Chris@4 474 Knuth's increments seem to work better
Chris@4 475 than Incerpi-Sedgewick here. Possibly
Chris@4 476 because the number of elems to sort is
Chris@4 477 usually small, typically <= 20.
Chris@4 478 --*/
Chris@4 479 static
Chris@4 480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
Chris@4 481 9841, 29524, 88573, 265720,
Chris@4 482 797161, 2391484 };
Chris@4 483
Chris@4 484 static
Chris@4 485 void mainSimpleSort ( UInt32* ptr,
Chris@4 486 UChar* block,
Chris@4 487 UInt16* quadrant,
Chris@4 488 Int32 nblock,
Chris@4 489 Int32 lo,
Chris@4 490 Int32 hi,
Chris@4 491 Int32 d,
Chris@4 492 Int32* budget )
Chris@4 493 {
Chris@4 494 Int32 i, j, h, bigN, hp;
Chris@4 495 UInt32 v;
Chris@4 496
Chris@4 497 bigN = hi - lo + 1;
Chris@4 498 if (bigN < 2) return;
Chris@4 499
Chris@4 500 hp = 0;
Chris@4 501 while (incs[hp] < bigN) hp++;
Chris@4 502 hp--;
Chris@4 503
Chris@4 504 for (; hp >= 0; hp--) {
Chris@4 505 h = incs[hp];
Chris@4 506
Chris@4 507 i = lo + h;
Chris@4 508 while (True) {
Chris@4 509
Chris@4 510 /*-- copy 1 --*/
Chris@4 511 if (i > hi) break;
Chris@4 512 v = ptr[i];
Chris@4 513 j = i;
Chris@4 514 while ( mainGtU (
Chris@4 515 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 516 ) ) {
Chris@4 517 ptr[j] = ptr[j-h];
Chris@4 518 j = j - h;
Chris@4 519 if (j <= (lo + h - 1)) break;
Chris@4 520 }
Chris@4 521 ptr[j] = v;
Chris@4 522 i++;
Chris@4 523
Chris@4 524 /*-- copy 2 --*/
Chris@4 525 if (i > hi) break;
Chris@4 526 v = ptr[i];
Chris@4 527 j = i;
Chris@4 528 while ( mainGtU (
Chris@4 529 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 530 ) ) {
Chris@4 531 ptr[j] = ptr[j-h];
Chris@4 532 j = j - h;
Chris@4 533 if (j <= (lo + h - 1)) break;
Chris@4 534 }
Chris@4 535 ptr[j] = v;
Chris@4 536 i++;
Chris@4 537
Chris@4 538 /*-- copy 3 --*/
Chris@4 539 if (i > hi) break;
Chris@4 540 v = ptr[i];
Chris@4 541 j = i;
Chris@4 542 while ( mainGtU (
Chris@4 543 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
Chris@4 544 ) ) {
Chris@4 545 ptr[j] = ptr[j-h];
Chris@4 546 j = j - h;
Chris@4 547 if (j <= (lo + h - 1)) break;
Chris@4 548 }
Chris@4 549 ptr[j] = v;
Chris@4 550 i++;
Chris@4 551
Chris@4 552 if (*budget < 0) return;
Chris@4 553 }
Chris@4 554 }
Chris@4 555 }
Chris@4 556
Chris@4 557
Chris@4 558 /*---------------------------------------------*/
Chris@4 559 /*--
Chris@4 560 The following is an implementation of
Chris@4 561 an elegant 3-way quicksort for strings,
Chris@4 562 described in a paper "Fast Algorithms for
Chris@4 563 Sorting and Searching Strings", by Robert
Chris@4 564 Sedgewick and Jon L. Bentley.
Chris@4 565 --*/
Chris@4 566
Chris@4 567 #define mswap(zz1, zz2) \
Chris@4 568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
Chris@4 569
Chris@4 570 #define mvswap(zzp1, zzp2, zzn) \
Chris@4 571 { \
Chris@4 572 Int32 yyp1 = (zzp1); \
Chris@4 573 Int32 yyp2 = (zzp2); \
Chris@4 574 Int32 yyn = (zzn); \
Chris@4 575 while (yyn > 0) { \
Chris@4 576 mswap(ptr[yyp1], ptr[yyp2]); \
Chris@4 577 yyp1++; yyp2++; yyn--; \
Chris@4 578 } \
Chris@4 579 }
Chris@4 580
Chris@4 581 static
Chris@4 582 __inline__
Chris@4 583 UChar mmed3 ( UChar a, UChar b, UChar c )
Chris@4 584 {
Chris@4 585 UChar t;
Chris@4 586 if (a > b) { t = a; a = b; b = t; };
Chris@4 587 if (b > c) {
Chris@4 588 b = c;
Chris@4 589 if (a > b) b = a;
Chris@4 590 }
Chris@4 591 return b;
Chris@4 592 }
Chris@4 593
Chris@4 594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
Chris@4 595
Chris@4 596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
Chris@4 597 stackHi[sp] = hz; \
Chris@4 598 stackD [sp] = dz; \
Chris@4 599 sp++; }
Chris@4 600
Chris@4 601 #define mpop(lz,hz,dz) { sp--; \
Chris@4 602 lz = stackLo[sp]; \
Chris@4 603 hz = stackHi[sp]; \
Chris@4 604 dz = stackD [sp]; }
Chris@4 605
Chris@4 606
Chris@4 607 #define mnextsize(az) (nextHi[az]-nextLo[az])
Chris@4 608
Chris@4 609 #define mnextswap(az,bz) \
Chris@4 610 { Int32 tz; \
Chris@4 611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
Chris@4 612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
Chris@4 613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
Chris@4 614
Chris@4 615
Chris@4 616 #define MAIN_QSORT_SMALL_THRESH 20
Chris@4 617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
Chris@4 618 #define MAIN_QSORT_STACK_SIZE 100
Chris@4 619
Chris@4 620 static
Chris@4 621 void mainQSort3 ( UInt32* ptr,
Chris@4 622 UChar* block,
Chris@4 623 UInt16* quadrant,
Chris@4 624 Int32 nblock,
Chris@4 625 Int32 loSt,
Chris@4 626 Int32 hiSt,
Chris@4 627 Int32 dSt,
Chris@4 628 Int32* budget )
Chris@4 629 {
Chris@4 630 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
Chris@4 631 Int32 sp, lo, hi, d;
Chris@4 632
Chris@4 633 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
Chris@4 634 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
Chris@4 635 Int32 stackD [MAIN_QSORT_STACK_SIZE];
Chris@4 636
Chris@4 637 Int32 nextLo[3];
Chris@4 638 Int32 nextHi[3];
Chris@4 639 Int32 nextD [3];
Chris@4 640
Chris@4 641 sp = 0;
Chris@4 642 mpush ( loSt, hiSt, dSt );
Chris@4 643
Chris@4 644 while (sp > 0) {
Chris@4 645
Chris@4 646 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
Chris@4 647
Chris@4 648 mpop ( lo, hi, d );
Chris@4 649 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
Chris@4 650 d > MAIN_QSORT_DEPTH_THRESH) {
Chris@4 651 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
Chris@4 652 if (*budget < 0) return;
Chris@4 653 continue;
Chris@4 654 }
Chris@4 655
Chris@4 656 med = (Int32)
Chris@4 657 mmed3 ( block[ptr[ lo ]+d],
Chris@4 658 block[ptr[ hi ]+d],
Chris@4 659 block[ptr[ (lo+hi)>>1 ]+d] );
Chris@4 660
Chris@4 661 unLo = ltLo = lo;
Chris@4 662 unHi = gtHi = hi;
Chris@4 663
Chris@4 664 while (True) {
Chris@4 665 while (True) {
Chris@4 666 if (unLo > unHi) break;
Chris@4 667 n = ((Int32)block[ptr[unLo]+d]) - med;
Chris@4 668 if (n == 0) {
Chris@4 669 mswap(ptr[unLo], ptr[ltLo]);
Chris@4 670 ltLo++; unLo++; continue;
Chris@4 671 };
Chris@4 672 if (n > 0) break;
Chris@4 673 unLo++;
Chris@4 674 }
Chris@4 675 while (True) {
Chris@4 676 if (unLo > unHi) break;
Chris@4 677 n = ((Int32)block[ptr[unHi]+d]) - med;
Chris@4 678 if (n == 0) {
Chris@4 679 mswap(ptr[unHi], ptr[gtHi]);
Chris@4 680 gtHi--; unHi--; continue;
Chris@4 681 };
Chris@4 682 if (n < 0) break;
Chris@4 683 unHi--;
Chris@4 684 }
Chris@4 685 if (unLo > unHi) break;
Chris@4 686 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
Chris@4 687 }
Chris@4 688
Chris@4 689 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
Chris@4 690
Chris@4 691 if (gtHi < ltLo) {
Chris@4 692 mpush(lo, hi, d+1 );
Chris@4 693 continue;
Chris@4 694 }
Chris@4 695
Chris@4 696 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
Chris@4 697 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
Chris@4 698
Chris@4 699 n = lo + unLo - ltLo - 1;
Chris@4 700 m = hi - (gtHi - unHi) + 1;
Chris@4 701
Chris@4 702 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
Chris@4 703 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
Chris@4 704 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
Chris@4 705
Chris@4 706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
Chris@4 707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
Chris@4 708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
Chris@4 709
Chris@4 710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
Chris@4 711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
Chris@4 712
Chris@4 713 mpush (nextLo[0], nextHi[0], nextD[0]);
Chris@4 714 mpush (nextLo[1], nextHi[1], nextD[1]);
Chris@4 715 mpush (nextLo[2], nextHi[2], nextD[2]);
Chris@4 716 }
Chris@4 717 }
Chris@4 718
Chris@4 719 #undef mswap
Chris@4 720 #undef mvswap
Chris@4 721 #undef mpush
Chris@4 722 #undef mpop
Chris@4 723 #undef mmin
Chris@4 724 #undef mnextsize
Chris@4 725 #undef mnextswap
Chris@4 726 #undef MAIN_QSORT_SMALL_THRESH
Chris@4 727 #undef MAIN_QSORT_DEPTH_THRESH
Chris@4 728 #undef MAIN_QSORT_STACK_SIZE
Chris@4 729
Chris@4 730
Chris@4 731 /*---------------------------------------------*/
Chris@4 732 /* Pre:
Chris@4 733 nblock > N_OVERSHOOT
Chris@4 734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
Chris@4 735 ((UChar*)block32) [0 .. nblock-1] holds block
Chris@4 736 ptr exists for [0 .. nblock-1]
Chris@4 737
Chris@4 738 Post:
Chris@4 739 ((UChar*)block32) [0 .. nblock-1] holds block
Chris@4 740 All other areas of block32 destroyed
Chris@4 741 ftab [0 .. 65536 ] destroyed
Chris@4 742 ptr [0 .. nblock-1] holds sorted order
Chris@4 743 if (*budget < 0), sorting was abandoned
Chris@4 744 */
Chris@4 745
Chris@4 746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
Chris@4 747 #define SETMASK (1 << 21)
Chris@4 748 #define CLEARMASK (~(SETMASK))
Chris@4 749
Chris@4 750 static
Chris@4 751 void mainSort ( UInt32* ptr,
Chris@4 752 UChar* block,
Chris@4 753 UInt16* quadrant,
Chris@4 754 UInt32* ftab,
Chris@4 755 Int32 nblock,
Chris@4 756 Int32 verb,
Chris@4 757 Int32* budget )
Chris@4 758 {
Chris@4 759 Int32 i, j, k, ss, sb;
Chris@4 760 Int32 runningOrder[256];
Chris@4 761 Bool bigDone[256];
Chris@4 762 Int32 copyStart[256];
Chris@4 763 Int32 copyEnd [256];
Chris@4 764 UChar c1;
Chris@4 765 Int32 numQSorted;
Chris@4 766 UInt16 s;
Chris@4 767 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
Chris@4 768
Chris@4 769 /*-- set up the 2-byte frequency table --*/
Chris@4 770 for (i = 65536; i >= 0; i--) ftab[i] = 0;
Chris@4 771
Chris@4 772 j = block[0] << 8;
Chris@4 773 i = nblock-1;
Chris@4 774 for (; i >= 3; i -= 4) {
Chris@4 775 quadrant[i] = 0;
Chris@4 776 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
Chris@4 777 ftab[j]++;
Chris@4 778 quadrant[i-1] = 0;
Chris@4 779 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
Chris@4 780 ftab[j]++;
Chris@4 781 quadrant[i-2] = 0;
Chris@4 782 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
Chris@4 783 ftab[j]++;
Chris@4 784 quadrant[i-3] = 0;
Chris@4 785 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
Chris@4 786 ftab[j]++;
Chris@4 787 }
Chris@4 788 for (; i >= 0; i--) {
Chris@4 789 quadrant[i] = 0;
Chris@4 790 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
Chris@4 791 ftab[j]++;
Chris@4 792 }
Chris@4 793
Chris@4 794 /*-- (emphasises close relationship of block & quadrant) --*/
Chris@4 795 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
Chris@4 796 block [nblock+i] = block[i];
Chris@4 797 quadrant[nblock+i] = 0;
Chris@4 798 }
Chris@4 799
Chris@4 800 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
Chris@4 801
Chris@4 802 /*-- Complete the initial radix sort --*/
Chris@4 803 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
Chris@4 804
Chris@4 805 s = block[0] << 8;
Chris@4 806 i = nblock-1;
Chris@4 807 for (; i >= 3; i -= 4) {
Chris@4 808 s = (s >> 8) | (block[i] << 8);
Chris@4 809 j = ftab[s] -1;
Chris@4 810 ftab[s] = j;
Chris@4 811 ptr[j] = i;
Chris@4 812 s = (s >> 8) | (block[i-1] << 8);
Chris@4 813 j = ftab[s] -1;
Chris@4 814 ftab[s] = j;
Chris@4 815 ptr[j] = i-1;
Chris@4 816 s = (s >> 8) | (block[i-2] << 8);
Chris@4 817 j = ftab[s] -1;
Chris@4 818 ftab[s] = j;
Chris@4 819 ptr[j] = i-2;
Chris@4 820 s = (s >> 8) | (block[i-3] << 8);
Chris@4 821 j = ftab[s] -1;
Chris@4 822 ftab[s] = j;
Chris@4 823 ptr[j] = i-3;
Chris@4 824 }
Chris@4 825 for (; i >= 0; i--) {
Chris@4 826 s = (s >> 8) | (block[i] << 8);
Chris@4 827 j = ftab[s] -1;
Chris@4 828 ftab[s] = j;
Chris@4 829 ptr[j] = i;
Chris@4 830 }
Chris@4 831
Chris@4 832 /*--
Chris@4 833 Now ftab contains the first loc of every small bucket.
Chris@4 834 Calculate the running order, from smallest to largest
Chris@4 835 big bucket.
Chris@4 836 --*/
Chris@4 837 for (i = 0; i <= 255; i++) {
Chris@4 838 bigDone [i] = False;
Chris@4 839 runningOrder[i] = i;
Chris@4 840 }
Chris@4 841
Chris@4 842 {
Chris@4 843 Int32 vv;
Chris@4 844 Int32 h = 1;
Chris@4 845 do h = 3 * h + 1; while (h <= 256);
Chris@4 846 do {
Chris@4 847 h = h / 3;
Chris@4 848 for (i = h; i <= 255; i++) {
Chris@4 849 vv = runningOrder[i];
Chris@4 850 j = i;
Chris@4 851 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
Chris@4 852 runningOrder[j] = runningOrder[j-h];
Chris@4 853 j = j - h;
Chris@4 854 if (j <= (h - 1)) goto zero;
Chris@4 855 }
Chris@4 856 zero:
Chris@4 857 runningOrder[j] = vv;
Chris@4 858 }
Chris@4 859 } while (h != 1);
Chris@4 860 }
Chris@4 861
Chris@4 862 /*--
Chris@4 863 The main sorting loop.
Chris@4 864 --*/
Chris@4 865
Chris@4 866 numQSorted = 0;
Chris@4 867
Chris@4 868 for (i = 0; i <= 255; i++) {
Chris@4 869
Chris@4 870 /*--
Chris@4 871 Process big buckets, starting with the least full.
Chris@4 872 Basically this is a 3-step process in which we call
Chris@4 873 mainQSort3 to sort the small buckets [ss, j], but
Chris@4 874 also make a big effort to avoid the calls if we can.
Chris@4 875 --*/
Chris@4 876 ss = runningOrder[i];
Chris@4 877
Chris@4 878 /*--
Chris@4 879 Step 1:
Chris@4 880 Complete the big bucket [ss] by quicksorting
Chris@4 881 any unsorted small buckets [ss, j], for j != ss.
Chris@4 882 Hopefully previous pointer-scanning phases have already
Chris@4 883 completed many of the small buckets [ss, j], so
Chris@4 884 we don't have to sort them at all.
Chris@4 885 --*/
Chris@4 886 for (j = 0; j <= 255; j++) {
Chris@4 887 if (j != ss) {
Chris@4 888 sb = (ss << 8) + j;
Chris@4 889 if ( ! (ftab[sb] & SETMASK) ) {
Chris@4 890 Int32 lo = ftab[sb] & CLEARMASK;
Chris@4 891 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
Chris@4 892 if (hi > lo) {
Chris@4 893 if (verb >= 4)
Chris@4 894 VPrintf4 ( " qsort [0x%x, 0x%x] "
Chris@4 895 "done %d this %d\n",
Chris@4 896 ss, j, numQSorted, hi - lo + 1 );
Chris@4 897 mainQSort3 (
Chris@4 898 ptr, block, quadrant, nblock,
Chris@4 899 lo, hi, BZ_N_RADIX, budget
Chris@4 900 );
Chris@4 901 numQSorted += (hi - lo + 1);
Chris@4 902 if (*budget < 0) return;
Chris@4 903 }
Chris@4 904 }
Chris@4 905 ftab[sb] |= SETMASK;
Chris@4 906 }
Chris@4 907 }
Chris@4 908
Chris@4 909 AssertH ( !bigDone[ss], 1006 );
Chris@4 910
Chris@4 911 /*--
Chris@4 912 Step 2:
Chris@4 913 Now scan this big bucket [ss] so as to synthesise the
Chris@4 914 sorted order for small buckets [t, ss] for all t,
Chris@4 915 including, magically, the bucket [ss,ss] too.
Chris@4 916 This will avoid doing Real Work in subsequent Step 1's.
Chris@4 917 --*/
Chris@4 918 {
Chris@4 919 for (j = 0; j <= 255; j++) {
Chris@4 920 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
Chris@4 921 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
Chris@4 922 }
Chris@4 923 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
Chris@4 924 k = ptr[j]-1; if (k < 0) k += nblock;
Chris@4 925 c1 = block[k];
Chris@4 926 if (!bigDone[c1])
Chris@4 927 ptr[ copyStart[c1]++ ] = k;
Chris@4 928 }
Chris@4 929 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
Chris@4 930 k = ptr[j]-1; if (k < 0) k += nblock;
Chris@4 931 c1 = block[k];
Chris@4 932 if (!bigDone[c1])
Chris@4 933 ptr[ copyEnd[c1]-- ] = k;
Chris@4 934 }
Chris@4 935 }
Chris@4 936
Chris@4 937 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
Chris@4 938 ||
Chris@4 939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
Chris@4 940 Necessity for this case is demonstrated by compressing
Chris@4 941 a sequence of approximately 48.5 million of character
Chris@4 942 251; 1.0.0/1.0.1 will then die here. */
Chris@4 943 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
Chris@4 944 1007 )
Chris@4 945
Chris@4 946 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
Chris@4 947
Chris@4 948 /*--
Chris@4 949 Step 3:
Chris@4 950 The [ss] big bucket is now done. Record this fact,
Chris@4 951 and update the quadrant descriptors. Remember to
Chris@4 952 update quadrants in the overshoot area too, if
Chris@4 953 necessary. The "if (i < 255)" test merely skips
Chris@4 954 this updating for the last bucket processed, since
Chris@4 955 updating for the last bucket is pointless.
Chris@4 956
Chris@4 957 The quadrant array provides a way to incrementally
Chris@4 958 cache sort orderings, as they appear, so as to
Chris@4 959 make subsequent comparisons in fullGtU() complete
Chris@4 960 faster. For repetitive blocks this makes a big
Chris@4 961 difference (but not big enough to be able to avoid
Chris@4 962 the fallback sorting mechanism, exponential radix sort).
Chris@4 963
Chris@4 964 The precise meaning is: at all times:
Chris@4 965
Chris@4 966 for 0 <= i < nblock and 0 <= j <= nblock
Chris@4 967
Chris@4 968 if block[i] != block[j],
Chris@4 969
Chris@4 970 then the relative values of quadrant[i] and
Chris@4 971 quadrant[j] are meaningless.
Chris@4 972
Chris@4 973 else {
Chris@4 974 if quadrant[i] < quadrant[j]
Chris@4 975 then the string starting at i lexicographically
Chris@4 976 precedes the string starting at j
Chris@4 977
Chris@4 978 else if quadrant[i] > quadrant[j]
Chris@4 979 then the string starting at j lexicographically
Chris@4 980 precedes the string starting at i
Chris@4 981
Chris@4 982 else
Chris@4 983 the relative ordering of the strings starting
Chris@4 984 at i and j has not yet been determined.
Chris@4 985 }
Chris@4 986 --*/
Chris@4 987 bigDone[ss] = True;
Chris@4 988
Chris@4 989 if (i < 255) {
Chris@4 990 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
Chris@4 991 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
Chris@4 992 Int32 shifts = 0;
Chris@4 993
Chris@4 994 while ((bbSize >> shifts) > 65534) shifts++;
Chris@4 995
Chris@4 996 for (j = bbSize-1; j >= 0; j--) {
Chris@4 997 Int32 a2update = ptr[bbStart + j];
Chris@4 998 UInt16 qVal = (UInt16)(j >> shifts);
Chris@4 999 quadrant[a2update] = qVal;
Chris@4 1000 if (a2update < BZ_N_OVERSHOOT)
Chris@4 1001 quadrant[a2update + nblock] = qVal;
Chris@4 1002 }
Chris@4 1003 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
Chris@4 1004 }
Chris@4 1005
Chris@4 1006 }
Chris@4 1007
Chris@4 1008 if (verb >= 4)
Chris@4 1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
Chris@4 1010 nblock, numQSorted, nblock - numQSorted );
Chris@4 1011 }
Chris@4 1012
Chris@4 1013 #undef BIGFREQ
Chris@4 1014 #undef SETMASK
Chris@4 1015 #undef CLEARMASK
Chris@4 1016
Chris@4 1017
Chris@4 1018 /*---------------------------------------------*/
Chris@4 1019 /* Pre:
Chris@4 1020 nblock > 0
Chris@4 1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
Chris@4 1022 ((UChar*)arr2) [0 .. nblock-1] holds block
Chris@4 1023 arr1 exists for [0 .. nblock-1]
Chris@4 1024
Chris@4 1025 Post:
Chris@4 1026 ((UChar*)arr2) [0 .. nblock-1] holds block
Chris@4 1027 All other areas of block destroyed
Chris@4 1028 ftab [ 0 .. 65536 ] destroyed
Chris@4 1029 arr1 [0 .. nblock-1] holds sorted order
Chris@4 1030 */
Chris@4 1031 void BZ2_blockSort ( EState* s )
Chris@4 1032 {
Chris@4 1033 UInt32* ptr = s->ptr;
Chris@4 1034 UChar* block = s->block;
Chris@4 1035 UInt32* ftab = s->ftab;
Chris@4 1036 Int32 nblock = s->nblock;
Chris@4 1037 Int32 verb = s->verbosity;
Chris@4 1038 Int32 wfact = s->workFactor;
Chris@4 1039 UInt16* quadrant;
Chris@4 1040 Int32 budget;
Chris@4 1041 Int32 budgetInit;
Chris@4 1042 Int32 i;
Chris@4 1043
Chris@4 1044 if (nblock < 10000) {
Chris@4 1045 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
Chris@4 1046 } else {
Chris@4 1047 /* Calculate the location for quadrant, remembering to get
Chris@4 1048 the alignment right. Assumes that &(block[0]) is at least
Chris@4 1049 2-byte aligned -- this should be ok since block is really
Chris@4 1050 the first section of arr2.
Chris@4 1051 */
Chris@4 1052 i = nblock+BZ_N_OVERSHOOT;
Chris@4 1053 if (i & 1) i++;
Chris@4 1054 quadrant = (UInt16*)(&(block[i]));
Chris@4 1055
Chris@4 1056 /* (wfact-1) / 3 puts the default-factor-30
Chris@4 1057 transition point at very roughly the same place as
Chris@4 1058 with v0.1 and v0.9.0.
Chris@4 1059 Not that it particularly matters any more, since the
Chris@4 1060 resulting compressed stream is now the same regardless
Chris@4 1061 of whether or not we use the main sort or fallback sort.
Chris@4 1062 */
Chris@4 1063 if (wfact < 1 ) wfact = 1;
Chris@4 1064 if (wfact > 100) wfact = 100;
Chris@4 1065 budgetInit = nblock * ((wfact-1) / 3);
Chris@4 1066 budget = budgetInit;
Chris@4 1067
Chris@4 1068 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
Chris@4 1069 if (verb >= 3)
Chris@4 1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
Chris@4 1071 budgetInit - budget,
Chris@4 1072 nblock,
Chris@4 1073 (float)(budgetInit - budget) /
Chris@4 1074 (float)(nblock==0 ? 1 : nblock) );
Chris@4 1075 if (budget < 0) {
Chris@4 1076 if (verb >= 2)
Chris@4 1077 VPrintf0 ( " too repetitive; using fallback"
Chris@4 1078 " sorting algorithm\n" );
Chris@4 1079 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
Chris@4 1080 }
Chris@4 1081 }
Chris@4 1082
Chris@4 1083 s->origPtr = -1;
Chris@4 1084 for (i = 0; i < s->nblock; i++)
Chris@4 1085 if (ptr[i] == 0)
Chris@4 1086 { s->origPtr = i; break; };
Chris@4 1087
Chris@4 1088 AssertH( s->origPtr != -1, 1003 );
Chris@4 1089 }
Chris@4 1090
Chris@4 1091
Chris@4 1092 /*-------------------------------------------------------------*/
Chris@4 1093 /*--- end blocksort.c ---*/
Chris@4 1094 /*-------------------------------------------------------------*/